🏡 index : ~doyle/chartered.git

author Jordan Doyle <jordan@doyle.la> 2021-08-30 22:32:56.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-08-30 23:21:31.0 +01:00:00
commit
39329a3fc67f11814a1277f842bf7b07d728bd7d [patch]
tree
29a90757fbab5bbf5526b90209011fdc81be38a8
parent
e5b8e6847ecd0d15973d6fb889efdb3a072c7f7e
download
39329a3fc67f11814a1277f842bf7b07d728bd7d.tar.gz

Clean up packfile creation



Diff

 Cargo.lock          |  25 +++++++++++++++++++++++++
 Cargo.toml          |   2 ++
 src/main.rs         | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
 src/git/codec.rs    |   6 ++++--
 src/git/mod.rs      |  35 ++++++++++++++++++++++++++++-------
 src/git/packfile.rs | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
 6 files changed, 332 insertions(+), 262 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9fd759c..82ecddb 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -190,6 +190,7 @@
 "async-trait",
 "axum",
 "bytes",
 "chrono",
 "const-sha1",
 "crc",
 "env_logger",
@@ -197,6 +198,7 @@
 "format-bytes",
 "futures",
 "hex",
 "itoa",
 "sha-1",
 "thrussh",
 "thrussh-keys",
@@ -204,6 +206,19 @@
 "tokio-util",
 "tower",
 "tower-http",
]

[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
 "libc",
 "num-integer",
 "num-traits",
 "time",
 "winapi",
]

[[package]]
@@ -1192,6 +1207,16 @@
 "libsodium-sys",
 "pkg-config",
 "vcpkg",
]

[[package]]
name = "time"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
dependencies = [
 "libc",
 "winapi",
]

[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index b65ea86..c2a9079 100644
--- a/Cargo.toml
+++ a/Cargo.toml
@@ -23,6 +23,8 @@
sha-1 = "0.9"
const-sha1 = "0.2"
crc = "2"
chrono = "0.4"
itoa = "0.4"


format-bytes = "0.1"
diff --git a/src/main.rs b/src/main.rs
index 06cb08f..31b7a0e 100644
--- a/src/main.rs
+++ a/src/main.rs
@@ -1,26 +1,32 @@
#![deny(clippy::pedantic)]
#[allow(clippy::missing_errors_doc)]

pub mod git;

use crate::git::PktLine;
use crate::git::{
    codec::{Encoder, GitCodec},
    packfile::{Commit, CommitUserInfo, PackFileEntry, TreeItem, TreeItemKind},
    PktLine,
};

use bytes::BufMut;
use bytes::BytesMut;
use bytes::{BytesMut};
use futures::future::Future;
use git::codec::Encoder;
use git::codec::GitCodec;
use std::{fmt::Write, pin::Pin, sync::Arc};
use thrussh::server::{Auth, Session};
use thrussh::*;
use thrussh_keys::*;
use thrussh::{
    ChannelId, CryptoVec, server::{self, Auth, Session},
};
use thrussh_keys::key;
use tokio_util::codec::{Decoder, Encoder as TokioEncoder};

#[tokio::main]
#[allow(clippy::semicolon_if_nothing_returned)] // broken clippy lint
async fn main() {
    env_logger::init();

    let mut config = thrussh::server::Config::default();
    config
        .keys
        .push(thrussh_keys::key::KeyPair::generate_ed25519().unwrap());
        .push(key::KeyPair::generate_ed25519().unwrap());
    let config = Arc::new(config);
    thrussh::server::run(config, "127.0.0.1:2233", Server)
        .await
@@ -54,14 +60,16 @@
        session.data(
            channel,
            CryptoVec::from_slice(self.output_bytes.split().as_ref()),
        )
        );
    }
}

type AsyncHandlerFn = Pin<Box<dyn Future<Output = Result<(Handler, Session), <Handler as server::Handler>::Error>> + Send>>;

impl server::Handler for Handler {
    type Error = anyhow::Error;
    type FutureAuth = futures::future::Ready<Result<(Self, server::Auth), anyhow::Error>>;
    type FutureUnit = Pin<Box<dyn Future<Output = Result<(Self, Session), Self::Error>> + Send>>;
    type FutureUnit = AsyncHandlerFn;
    type FutureBool = futures::future::Ready<Result<(Self, Session, bool), anyhow::Error>>;

    fn finished_auth(self, auth: Auth) -> Self::FutureAuth {
@@ -158,45 +166,33 @@
            }

            // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n"
            // echo -ne

            let tree_bytes = format_bytes::format_bytes!(
                b"100644 test\0{}",
                const_sha1::sha1(&const_sha1::ConstBuffer::from_slice(
                    "blob 33\0testing this is a test cool test!".as_bytes()
                ))
                .bytes()
            );

            let tree = format_bytes::format_bytes!(
                b"tree {}\0{}",
                tree_bytes.len().to_string().as_bytes(),
                tree_bytes
            );

            let tree_hash = hex::encode(sha1::Sha1::digest(&tree));

            let commit_bytes = format!(
                "tree {}
author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100

test",
                tree_hash
            );

            let commit = format!("commit {}\0{}", commit_bytes.len(), commit_bytes);

            let commit_hash = hex::encode(sha1::Sha1::digest(commit.as_bytes()));
            let file = PackFileEntry::Blob(b"this is some text inside my cool test file!");

            let tree = PackFileEntry::Tree(vec![TreeItem {
                kind: TreeItemKind::File,
                name: "test",
                hash: file.hash()?,
            }]);

            let commit_user = CommitUserInfo {
                name: "Jordan Doyle",
                email: "jordan@doyle.la",
                time: chrono::Utc::now(),
            };

            let commit = PackFileEntry::Commit(Commit {
                tree: tree.hash()?,
                author: commit_user,
                committer: commit_user,
                message: "cool commit",
            });

            use sha1::Digest;
            println!(
                "commit hash: {} - tree hash: {} - file hash: {}",
                commit_hash,
                tree_hash,
                const_sha1::sha1(&const_sha1::ConstBuffer::from_slice(
                    "blob 33\0testing this is a test cool test!".as_bytes()
                ))
                hex::encode(&commit.hash()?),
                hex::encode(&tree.hash()?),
                hex::encode(&file.hash()?),
            );

            // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010008peel000bsymrefs000aunborn0014ref-prefix HEAD\n0000"
@@ -207,7 +203,11 @@
            // https://shafiul.github.io/gitbook/7_the_packfile.html
            if ls_refs {
                self.write(PktLine::Data(
                    format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(),
                    format!(
                        "{} HEAD symref-target:refs/heads/master\n",
                        hex::encode(&commit.hash()?)
                    )
                    .as_bytes(),
                ))?;
                self.write(PktLine::Flush)?;
                self.flush(&mut session, channel);
@@ -224,62 +224,15 @@

            if done {
                self.write(PktLine::Data(b"packfile\n"))?;

                {
                    let mut buf = BytesMut::new();
                    buf.put_u8(2); // sideband, 1 = msg
                    buf.extend_from_slice(b"Hello from chartered!\n");
                    self.write(PktLine::Data(buf.as_ref()))?;
                    self.flush(&mut session, channel);
                }

                // fatal: bad object 4ff484817ca2f1a10183da210a6e74f29764857d
                // error: ssh://127.0.0.1:2233/ did not send all necessary objects
                let packfile = git::packfile::PackFile::new(vec![
                    git::packfile::PackFileEntry::new(
                        git::packfile::PackFileEntryType::Commit,
                        commit_bytes.as_bytes(),
                    )?,
                    git::packfile::PackFileEntry::new(
                        git::packfile::PackFileEntryType::Tree,
                        &tree_bytes,
                    )?,
                    git::packfile::PackFileEntry::new(
                        git::packfile::PackFileEntryType::Blob,
                        b"testing this is a test cool test!",
                    )?,
                ]);

                // {
                //     let mut buf = BytesMut::new();
                //     buf.put_u8(1);
                //     git::packfile::PackFileIndex {
                //         packfile: &packfile,
                //     }
                //     .encode_to(&mut buf)?;
                //     self.write(PktLine::Data(buf.as_ref()))?;
                //     self.write(PktLine::Flush)?;
                // }

                {
                    let mut buf = BytesMut::new();
                    buf.put_u8(1); // sideband, 1 = continue
                    packfile.encode_to(&mut buf)?;
                    self.write(PktLine::Data(buf.as_ref()))?;
                }

                // {
                //     let mut buf = BytesMut::new();
                //     buf.put_u8(2); // sideband, 1 = msg
                //     buf.extend_from_slice(
                //         b"Total 3 (delta 0), reused 0 (delta 0), pack-reused 0\n",
                //     );
                //     self.write(PktLine::Data(buf.as_ref()))?;
                //     self.flush(&mut session, channel);
                // }
                self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?;
                self.flush(&mut session, channel);

                let packfile = git::packfile::PackFile::new(vec![commit, tree, file]);
                self.write(PktLine::SidebandData(packfile))?;
                self.write(PktLine::Flush)?;
                self.flush(&mut session, channel);

                session.exit_status_request(channel, 0);
                session.eof(channel);
                session.close(channel);
diff --git a/src/git/codec.rs b/src/git/codec.rs
index fc12996..96ce563 100644
--- a/src/git/codec.rs
+++ a/src/git/codec.rs
@@ -1,3 +1,5 @@
#![allow(clippy::module_name_repetitions)]

use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;

@@ -28,7 +30,7 @@
            return Ok(None);
        }

        let mut length_bytes = [0u8; 4];
        let mut length_bytes = [0_u8; 4];
        length_bytes.copy_from_slice(&src[..4]);
        let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize;

@@ -42,7 +44,7 @@
            return self.decode(src);
        }

        if length > 65520 || length < 4 {
        if !(4..=65520).contains(&length) {
            return Err(
                std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(),
            );
diff --git a/src/git/mod.rs b/src/git/mod.rs
index 6844310..e06d861 100644
--- a/src/git/mod.rs
+++ a/src/git/mod.rs
@@ -1,11 +1,19 @@
pub mod codec;
pub mod packfile;

use bytes::BytesMut;
use bytes::{BufMut, BytesMut};
use std::fmt::Write;

use self::packfile::PackFile;

pub enum PktLine<'a> {
    Data(&'a [u8]),
    /// Similar to a data packet, but used during packfile sending to indicate this

    /// packet is a block of data by appending a byte containing the u8 `1`.

    SidebandData(PackFile<'a>),
    /// Similar to a data packet, but used during packfile sending to indicate this

    /// packet is a status message by appending a byte containing the u8 `2`.

    SidebandMsg(&'a [u8]),
    Flush,
    Delimiter,
    ResponseEnd,
@@ -16,8 +24,25 @@
        match self {
            Self::Data(data) => {
                write!(buf, "{:04x}", data.len() + 4)?;
                buf.extend_from_slice(&data);
                buf.extend_from_slice(data);
            }
            Self::SidebandData(packfile) => {
                // split the buf off so the cost of counting the bytes to put in the
                // data line prefix is just the cost of `unsplit` (an atomic decrement)
                let mut data_buf = buf.split_off(buf.len());

                data_buf.put_u8(1); // sideband, 1 = data
                packfile.encode_to(&mut data_buf)?;

                // write into the buf not the data buf so it's at the start of the msg
                write!(buf, "{:04x}", data_buf.len() + 4)?;
                buf.unsplit(data_buf);
            }
            Self::SidebandMsg(msg) => {
                write!(buf, "{:04x}", msg.len() + 4 + 1)?;
                buf.put_u8(2); // sideband, 2 = msg
                buf.extend_from_slice(msg);
            }
            Self::Flush => buf.extend_from_slice(b"0000"),
            Self::Delimiter => buf.extend_from_slice(b"0001"),
            Self::ResponseEnd => buf.extend_from_slice(b"0002"),
@@ -26,12 +51,6 @@
        Ok(())
    }
}

// impl From<PktLine<'_>> for CryptoVec {
//     fn from(val: PktLine<'_>) -> Self {
//         Self::from(val.encode())
//     }
// }

impl<'a> From<&'a str> for PktLine<'a> {
    fn from(val: &'a str) -> Self {
diff --git a/src/git/packfile.rs b/src/git/packfile.rs
index 48c06f0..befdcfc 100644
--- a/src/git/packfile.rs
+++ a/src/git/packfile.rs
@@ -1,138 +1,164 @@
use bytes::{BufMut, BytesMut};
use const_sha1::{sha1, ConstBuffer};
use flate2::{write::ZlibEncoder, Compression};
use sha1::{Digest, Sha1};
use std::convert::TryInto;
use std::io::Write as IoWrite;

// The offset/sha1[] tables are sorted by sha1[] values (this is to
// allow binary search of this table), and fanout[] table points at
// the offset/sha1[] table in a specific way (so that part of the
// latter table that covers all hashes that start with a given byte
// can be found to avoid 8 iterations of the binary search).
pub struct PackFileIndex<'a> {
    pub packfile: &'a PackFile,
use sha1::{
    digest::{generic_array::GenericArray, FixedOutputDirty},
    Digest, Sha1,
};
use std::{convert::TryInto, fmt::Write, io::Write as IoWrite};

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: Vec<PackFileEntry<'a>>,
}

impl<'a> PackFileIndex<'a> {
    pub fn encode_to(self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        // split the buffer so we can hash only what we're currently generating at the
        // end of this function
        let mut buf = original_buf.split_off(original_buf.len());
impl<'a> PackFile<'a> {
    #[must_use]
    pub fn new(entries: Vec<PackFileEntry<'a>>) -> Self {
        Self { entries }
    }

        buf.extend_from_slice(b"\xfftOc"); // magic header
        buf.put_u32(2); // version
    #[must_use]
    pub const fn header_size() -> usize {
        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    }

        // calculate total `PackFileEntry` hashes beginning with the same first byte
        let mut totals_by_first_byte = [0u32; 256];
        for entry in &self.packfile.entries {
            totals_by_first_byte[entry.uncompressed_sha1[0] as usize] += 1;
        }
    #[must_use]
    pub const fn footer_size() -> usize {
        20
    }

        // calculate fanout value by taking cumulative totals of first byte counts
        let mut cumulative = 0;
        for i in 0..256usize {
            cumulative += totals_by_first_byte[i];
            buf.put_u32(cumulative);
        }
    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut buf = original_buf.split_off(original_buf.len());
        buf.reserve(Self::header_size() + Self::footer_size());

        // write all the sha hashes out, this needs to be sorted by the hash which should've
        // been done by `PackFile::new()`
        for entry in &self.packfile.entries {
            buf.extend_from_slice(&entry.uncompressed_sha1);
        }
        // header
        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile

        for entry in &self.packfile.entries {
            buf.put_u32(entry.compressed_crc32);
        // body
        for entry in &self.entries {
            entry.encode_to(&mut buf)?;
        }

        let mut offset = PackFile::header_size();

        // encode offsets into the packfile
        for entry in &self.packfile.entries {
            offset += entry.compressed_data.len();

            let mut offset_be = offset.to_be();
        // footer
        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));

            while offset_be != 0 {
                // read 7 LSBs from the `offset_be` and push them off for the next iteration
                let mut val = (offset_be & 0b1111111) as u8;
                offset_be >>= 7;
        original_buf.unsplit(buf);

                if offset_be != 0 {
                    // MSB set to 1 implies there's more offset_be bytes to come, otherwise
                    // the data starts after this byte
                    val |= 1 << 7;
                }
        Ok(())
    }
}

                buf.put_u8(val);
            }
        }
pub struct Commit<'a> {
    pub tree: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray
    // pub parent: [u8; 20],
    pub author: CommitUserInfo<'a>,
    pub committer: CommitUserInfo<'a>,
    // pub gpgsig: &str,
    pub message: &'a str,
}

        // push a copy of the hash that appears at the end of the packfile
        buf.extend_from_slice(&self.packfile.hash);
impl Commit<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex)?;

        // hash of the whole buffer we've just generated for the index
        let mut hasher = Sha1::new();
        hasher.update(&buf);
        let result = hasher.finalize();
        buf.extend_from_slice(result.as_ref());
        out.write_str("tree ")?;
        out.extend_from_slice(&tree_hex);
        out.write_char('\n')?;

        // put the buffer we've just generated back into the mutable buffer we were passed
        original_buf.unsplit(buf);
        writeln!(out, "author {}", self.author.encode())?;
        writeln!(out, "committer {}", self.committer.encode())?;
        write!(out, "\n{}", self.message)?;

        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let mut len = 0;
        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
        len += "author ".len() + self.author.size() + "\n".len();
        len += "committer ".len() + self.committer.size() + "\n".len();
        len += "\n".len() + self.message.len();
        len
    }
}

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile {
    entries: Vec<PackFileEntry>,
    hash: [u8; 20],
#[derive(Copy, Clone, Debug)]
pub struct CommitUserInfo<'a> {
    pub name: &'a str,
    pub email: &'a str,
    pub time: chrono::DateTime<chrono::Utc>,
}

impl PackFile {
    pub fn new(mut entries: Vec<PackFileEntry>) -> Self {
        entries.sort_unstable_by_key(|v| v.uncompressed_sha1[0]);
        let hash_buffer = entries.iter().fold(ConstBuffer::new(), |acc, curr| {
            acc.push_slice(&curr.uncompressed_sha1)
        });

        Self {
            entries,
            hash: sha1(&hash_buffer).bytes(),
        }
impl CommitUserInfo<'_> {
    fn encode(&self) -> String {
        // TODO: remove `format!`, `format_args!`?
        format!(
            "{} <{}> {} +0000",
            self.name,
            self.email,
            self.time.timestamp()
        )
    }

    pub const fn header_size() -> usize {
        4 + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    #[must_use]
    pub fn size(&self) -> usize {
        let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len();

        self.name.len()
            + "< ".len()
            + self.email.len()
            + "> ".len()
            + timestamp_len
            + " +0000".len()
    }

    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut buf = original_buf.split_off(original_buf.len());
}

        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(self.entries.len().try_into().unwrap()); // number of entries in the packfile
pub enum TreeItemKind {
    File,
    Directory,
}

        for entry in &self.entries {
            entry.encode_to(&mut buf)?;
impl TreeItemKind {
    #[must_use]
    pub const fn mode(&self) -> &'static str {
        match self {
            Self::File => "100644",
            Self::Directory => "0000",
        }

        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
    }
}

        original_buf.unsplit(buf);
pub struct TreeItem<'a> {
    pub kind: TreeItemKind,
    pub name: &'a str,
    pub hash: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, // [u8; 20] - but we have to deal with GenericArrays
}

// `[mode] [name]\0[hash]`
impl TreeItem<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
        out.extend_from_slice(&self.hash);
        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
    }
}

pub enum PackFileEntryType {
pub enum PackFileEntry<'a> {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
@@ -156,73 +182,41 @@
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit,
    Commit(Commit<'a>),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree,
    Tree(Vec<TreeItem<'a>>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob,
    Blob(&'a [u8]),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

pub struct PackFileEntry {
    entry_type: PackFileEntryType,
    compressed_data: Vec<u8>,
    compressed_crc32: u32,
    pub uncompressed_sha1: [u8; 20],
    uncompressed_size: usize,
}

impl PackFileEntry {
    pub fn new(entry_type: PackFileEntryType, data: &[u8]) -> Result<Self, anyhow::Error> {
        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(data)?;
        let compressed_data = e.finish()?;

        let compressed_crc32 = crc::Crc::<u32>::new(&crc::CRC_32_CKSUM).checksum(&compressed_data);

        Ok(Self {
            entry_type,
            compressed_data,
            compressed_crc32,
            uncompressed_sha1: sha1(&ConstBuffer::new().push_slice(data)).bytes(),
            uncompressed_size: data.len(),
        })
    }

    // fn size_of_data_be(&self) -> usize {
    //     self.uncompressed_size.to_be()
    // }

    // The object header is a series of one or more 1 byte (8 bit) hunks
    // that specify the type of object the following data is, and the size
    // of the data when expanded. Each byte is really 7 bits of data, with
    // the first bit being used to say if that hunk is the last one or not
    // before the data starts. If the first bit is a 1, you will read another
    // byte, otherwise the data starts next. The first 3 bits in the first
    // byte specifies the type of data, according to the table below.
impl PackFileEntry<'_> {
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size;
        let mut size = self.uncompressed_size();

        // write header
        {
            let mut val = 0b10000000u8;

            val |= match self.entry_type {
                PackFileEntryType::Commit => 0b001,
                PackFileEntryType::Tree => 0b010,
                PackFileEntryType::Blob => 0b011,
                // PackFileEntryType::Tag => 0b100,
                // PackFileEntryType::OfsDelta => 0b110,
                // PackFileEntryType::RefDelta => 0b111,
            let mut val = 0b1000_0000_u8;

            val |= match self {
                Self::Commit(_) => 0b001,
                Self::Tree(_) => 0b010,
                Self::Blob(_) => 0b011,
                // Self::Tag => 0b100,
                // Self::OfsDelta => 0b110,
                // Self::RefDelta => 0b111,
            } << 4;

            // pack the 4 LSBs of the size into the header
            val |= (size & 0b1111) as u8;
            #[allow(clippy::cast_possible_truncation)] // value is masked
            {
                val |= (size & 0b1111) as u8;
            }
            size >>= 4;

            buf.put_u8(val);
@@ -231,7 +225,8 @@
        // write size bytes
        while size != 0 {
            // read 7 LSBs from the `size` and push them off for the next iteration
            let mut val = (size & 0b1111111) as u8;
            #[allow(clippy::cast_possible_truncation)] // value is masked
            let mut val = (size & 0b111_1111) as u8;
            size >>= 7;

            if size != 0 {
@@ -244,10 +239,84 @@
        }
    }

    pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        self.write_header(buf);
        buf.extend_from_slice(&self.compressed_data);
    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
        self.write_header(original_out); // TODO: this needs space reserving for it

        // todo is there a way to stream through the zlibencoder so we don't have to
        // have this intermediate bytesmut and vec?
        let mut out = BytesMut::new();

        let size = self.uncompressed_size();
        original_out.reserve(size);
        // the data ends up getting compressed but we'll need at least this many bytes
        out.reserve(size);

        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(data) => {
                out.extend_from_slice(data);
            }
        }

        debug_assert_eq!(out.len(), size);

        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(&out)?;
        let compressed_data = e.finish()?;

        original_out.extend_from_slice(&compressed_data);

        Ok(())
    }

    #[must_use]
    pub fn uncompressed_size(&self) -> usize {
        match self {
            Self::Commit(commit) => commit.size(),
            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
            Self::Blob(data) => data.len(),
        }
    }

    // wen const generics for RustCrypto? :-(
    pub fn hash(
        &self,
    ) -> Result<GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, anyhow::Error> {
        let size = self.uncompressed_size();

        let file_prefix = match self {
            Self::Commit(_) => "commit",
            Self::Tree(_) => "tree",
            Self::Blob(_) => "blob",
        };

        let size_len = itoa::Buffer::new().format(size).len();

        let mut out =
            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);

        write!(out, "{} {}\0", file_prefix, size)?;
        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(blob) => {
                out.extend_from_slice(blob);
            }
        }

        Ok(sha1::Sha1::digest(&out))
    }
}