🏡 index : ~doyle/gitlab-cargo-shim.git

author Jordan Doyle <jordan@doyle.la> 2022-10-22 14:03:41.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-10-22 14:03:46.0 +00:00:00
commit
3e8a188c41e07f4bcb2be8e39c8633e1c4d0f52b [patch]
tree
4c7cbbee97bd7240e0fe9828ee622a7e66a03ae6
parent
e7a07bc4c883f3fa7e377b287a411869ca5875d5
download
3e8a188c41e07f4bcb2be8e39c8633e1c4d0f52b.tar.gz

Move to extracted packfile crate



Diff

 Cargo.lock                          |  33 +++-
 Cargo.toml                          |   5 +-
 src/git_command_handlers/fetch.rs   |  12 +-
 src/git_command_handlers/ls_refs.rs |   6 +-
 src/main.rs                         |  19 +--
 src/protocol/codec.rs               | 138 +---------------
 src/protocol/high_level.rs          | 178 +-------------------
 src/protocol/low_level.rs           | 339 +-------------------------------------
 src/protocol/mod.rs                 |   4 +-
 src/protocol/packet_line.rs         |  73 +--------
 10 files changed, 43 insertions(+), 764 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 005a48a..e8909ef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -186,9 +186,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"

[[package]]
name = "bytes"
version = "1.1.0"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"

[[package]]
name = "camino"
@@ -539,12 +539,12 @@ dependencies = [
 "cargo-platform",
 "cargo_metadata",
 "clap",
 "flate2",
 "futures",
 "hex",
 "indexmap",
 "indoc",
 "itoa",
 "packfile",
 "parking_lot 0.12.1",
 "parse_link_header",
 "percent-encoding",
@@ -556,7 +556,6 @@ dependencies = [
 "shlex",
 "thrussh",
 "thrussh-keys",
 "time",
 "tokio",
 "tokio-util",
 "toml",
@@ -908,6 +907,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"

[[package]]
name = "packfile"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eeb3adcc3e5e1d0bf59cd1a1bc0a663497c7f89a4dc1632d8568c15da66dbef"
dependencies = [
 "bytes",
 "flate2",
 "hex",
 "indexmap",
 "itoa",
 "sha1",
 "thiserror",
 "time",
 "tokio-util",
 "tracing",
]

[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1499,9 +1516,9 @@ dependencies = [

[[package]]
name = "time"
version = "0.3.11"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c"
dependencies = [
 "libc",
 "num_threads",
@@ -1722,9 +1739,9 @@ checksum = "68b90931029ab9b034b300b797048cf23723400aa757e8a2bfb9d748102f9821"

[[package]]
name = "ustr"
version = "0.8.1"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbd539d8973e229b9d04f15d36e6a8f8d8f85f946b366f06bb001aaed3fa9dd9"
checksum = "371436099f2980de56dc385b615696d3eabbdac9649a72b85f9d75f68474fa9c"
dependencies = [
 "ahash",
 "byteorder",
diff --git a/Cargo.toml b/Cargo.toml
index 6dfbceb..48b08ca 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,12 +16,12 @@ bytes = "1.1"
cargo_metadata = "0.15"
cargo-platform = "0.1"
clap = { version = "3.2", features = ["derive", "cargo"] }
flate2 = "1.0"
futures = "0.3"
hex = "0.4"
itoa = "1.0"
indexmap = "1.9"
indoc = "1.0"
packfile = "0.1"
parse_link_header = "0.3"
parking_lot = "0.12"
percent-encoding = "2.1"
@@ -35,11 +35,10 @@ tracing = "0.1"
tracing-subscriber = "0.3"
thrussh = "0.33"
thrussh-keys = "0.21"
time = "0.3"
tokio = { version = "1.17", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec"] }
toml = "0.5"
url = { version = "2.2", features = ["serde"] }
urlencoding = "2.1"
ustr = "0.8"
ustr = "0.9"
uuid = { version = "1.1", features = ["v4"] }
diff --git a/src/git_command_handlers/fetch.rs b/src/git_command_handlers/fetch.rs
index 98ad613..fe72273 100644
--- a/src/git_command_handlers/fetch.rs
+++ b/src/git_command_handlers/fetch.rs
@@ -1,14 +1,12 @@
use bytes::Bytes;
use packfile::{
    low_level::{PackFile, PackFileEntry},
    PktLine,
};
use thrussh::{server::Session, ChannelId};
use tracing::instrument;

use crate::{
    protocol::{
        low_level::{PackFile, PackFileEntry},
        packet_line::PktLine,
    },
    Handler, PackageProvider, UserProvider,
};
use crate::{Handler, PackageProvider, UserProvider};

#[instrument(skip(handle, session, channel, metadata, packfile_entries), err)]
pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
diff --git a/src/git_command_handlers/ls_refs.rs b/src/git_command_handlers/ls_refs.rs
index 1ae3521..914bf7e 100644
--- a/src/git_command_handlers/ls_refs.rs
+++ b/src/git_command_handlers/ls_refs.rs
@@ -5,13 +5,11 @@
//! [lsr]: https://git-scm.com/docs/protocol-v2/2.19.0#_ls_refs

use bytes::Bytes;
use packfile::{low_level::HashOutput, PktLine};
use thrussh::{server::Session, ChannelId};
use tracing::instrument;

use crate::{
    protocol::{low_level::HashOutput, packet_line::PktLine},
    Handler, PackageProvider, UserProvider,
};
use crate::{Handler, PackageProvider, UserProvider};

#[instrument(skip(handle, session, channel, _metadata, commit_hash), err)]
pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
diff --git a/src/main.rs b/src/main.rs
index cd6cb4f..18c2eca 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,19 +4,12 @@
pub mod config;
pub mod git_command_handlers;
pub mod metadata;
pub mod protocol;
pub mod providers;
pub mod util;

use crate::{
    config::Args,
    metadata::{CargoConfig, CargoIndexCrateMetadata},
    protocol::{
        codec::{Encoder, GitCodec},
        high_level::GitRepository,
        low_level::{HashOutput, PackFileEntry},
        packet_line::PktLine,
    },
    providers::{gitlab::Gitlab, PackageProvider, Release, ReleaseName, User, UserProvider},
    util::get_crate_folder,
};
@@ -25,6 +18,12 @@ use bytes::{BufMut, Bytes, BytesMut};
use clap::Parser;
use futures::Future;
use indexmap::IndexMap;
use packfile::{
    codec::{Encoder, GitCodec},
    high_level::GitRepository,
    low_level::{HashOutput, PackFileEntry},
    PktLine,
};
use parking_lot::RwLock;
use std::{
    borrow::Cow, collections::HashMap, fmt::Write, net::SocketAddr, net::SocketAddrV6, pin::Pin,
@@ -179,7 +178,7 @@ impl<U: UserProvider + PackageProvider + Send + Sync + 'static> Handler<U> {
    /// Writes a Git packet line response to the buffer, this should only
    /// be used once the client opens a `shell_request`.
    fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> {
        Encoder.encode(packet, &mut self.output_bytes)
        Ok(Encoder.encode(packet, &mut self.output_bytes)?)
    }

    /// Flushes the buffer out to the client
@@ -295,7 +294,7 @@ impl<U: UserProvider + PackageProvider + Send + Sync + 'static> Handler<U> {
        })?);

        // write config.json to the root of the repo
        packfile.insert(&[], "config.json".into(), config_json)?;
        packfile.insert(&[], "config.json", config_json)?;

        // fetch the releases for every project within the given project
        let releases_by_crate = self.fetch_releases_by_crate().await?;
@@ -326,7 +325,7 @@ impl<U: UserProvider + PackageProvider + Send + Sync + 'static> Handler<U> {
            // insert the crate version metadata into the packfile
            packfile.insert(
                &get_crate_folder(crate_name),
                Arc::clone(crate_name).into(),
                Arc::clone(crate_name),
                buffer.split().freeze(),
            )?;
        }
diff --git a/src/protocol/codec.rs b/src/protocol/codec.rs
deleted file mode 100644
index 50ef1aa..0000000
--- a/src/protocol/codec.rs
+++ /dev/null
@@ -1,138 +0,0 @@
#![allow(clippy::module_name_repetitions)]

use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;
use tracing::instrument;

use super::packet_line::PktLine;

pub struct Encoder;

impl codec::Encoder<PktLine<'_>> for Encoder {
    type Error = anyhow::Error;

    fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> {
        item.encode_to(dst)?;
        Ok(())
    }
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct GitCommand {
    pub command: Bytes,
    pub metadata: Vec<Bytes>,
}

#[derive(Default)]
pub struct GitCodec {
    command: GitCommand,
}

impl codec::Decoder for GitCodec {
    type Item = GitCommand;
    type Error = anyhow::Error;

    #[instrument(skip(self, src), err)]
    fn decode(&mut self, src: &mut bytes::BytesMut) -> Result<Option<Self::Item>, Self::Error> {
        loop {
            if src.len() < 4 {
                return Ok(None);
            }

            let mut length_bytes = [0_u8; 4];
            length_bytes.copy_from_slice(&src[..4]);
            let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize;

            if length == 0 {
                // flush
                src.advance(4);
                return Ok(Some(std::mem::take(&mut self.command)));
            } else if length == 1 || length == 2 {
                src.advance(4);
                continue;
            } else if !(4..=65520).contains(&length) {
                return Err(
                    std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(),
                );
            }

            // not enough bytes in the buffer yet, ask for more
            if src.len() < length {
                src.reserve(length - src.len());
                return Ok(None);
            }

            // length is inclusive of the 4 bytes that makes up itself
            let mut data = src.split_to(length).freeze();
            data.advance(4);

            // strip newlines for conformity
            if data.ends_with(b"\n") {
                data.truncate(data.len() - 1);
            }

            if self.command.command.is_empty() {
                self.command.command = data;
            } else {
                self.command.metadata.push(data);
            }
        }
    }
}

#[cfg(test)]
mod test {
    use bytes::{Bytes, BytesMut};
    use std::fmt::Write;
    use tokio_util::codec::Decoder;

    #[test]
    fn decode() {
        let mut codec = super::GitCodec::default();

        let mut bytes = BytesMut::new();

        bytes.write_str("0015agent=git/2.32.0").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(res, None);

        bytes.write_char('\n').unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(res, None);

        bytes.write_str("0000").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::from_static(b"agent=git/2.32.0"),
                metadata: vec![],
            })
        );

        bytes.write_str("0000").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::new(),
                metadata: vec![],
            })
        );

        bytes.write_str("0002").unwrap();
        bytes.write_str("0005a").unwrap();
        bytes.write_str("0001").unwrap();
        bytes.write_str("0005b").unwrap();
        bytes.write_str("0000").unwrap();

        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::from_static(b"a"),
                metadata: vec![Bytes::from_static(b"b")],
            })
        );
    }
}
diff --git a/src/protocol/high_level.rs b/src/protocol/high_level.rs
deleted file mode 100644
index 13fd764..0000000
--- a/src/protocol/high_level.rs
+++ /dev/null
@@ -1,178 +0,0 @@
//! A high-level interface for building packfiles. Wraps the `low_level` module
//! making a much easier interface for writing files and generating the root
//! commit.
//!
//! The output packfile will only have a single commit in it, which is fine
//! for our purposes because `cargo` will `git pull --force` from our Git
//! server, allowing us to ignore any history the client may have.

use crate::instrument;
use crate::util::ArcOrCowStr;
use bytes::Bytes;
use indexmap::IndexMap;

use super::low_level::{
    Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind,
};

/// The main way of interacting with the high level Packfile builder
///
/// Builds a whole packfile containing files, directories and commits - essentially
/// building out a full Git repository in memory.
#[derive(Default, Debug)]
pub struct GitRepository {
    /// A map containing all the blobs and their corresponding hashes so they're
    /// not inserted more than once for any files in the whole tree with the same
    /// content.
    packfile_entries: IndexMap<HashOutput, PackFileEntry>,
    /// An in-progress `Tree` currently being built out, the tree refers to items
    /// in `file_entries` by hash.
    tree: Tree,
}

impl GitRepository {
    /// Inserts a file into the repository, writing a file to the path
    /// `path/to/my-file` would require a `path` of `["path", "to"]`
    /// and a `file` of `"my-file"`.
    #[instrument(skip(self, file, content), err)]
    pub fn insert(
        &mut self,
        path: &[&'static str],
        file: ArcOrCowStr,
        content: Bytes,
    ) -> Result<(), anyhow::Error> {
        // we'll initialise the directory to the root of the tree, this means
        // if a path isn't specified we'll just write it to the root directory
        let mut directory = &mut self.tree;

        // loops through the parts in the path, recursing through the `directory`
        // `Tree` until we get to our target directory, creating any missing
        // directories along the way.
        for part in path {
            let tree_item = directory
                .0
                .entry((*part).into())
                .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default())));

            if let TreeItem::Tree(d) = tree_item.as_mut() {
                directory = d;
            } else {
                // TODO: how should we handle this? one of items we tried to
                //  recurse into was a file.
                anyhow::bail!("attempted to use a file as a directory");
            }
        }

        // wrap the file in a Blob so it's ready for writing into the packfile, and also
        // allows us to grab the hash of the file for use in the tree
        let entry = PackFileEntry::Blob(content);
        let file_hash = entry.hash()?;

        // todo: what should we do on overwrite?
        directory
            .0
            .insert(file, Box::new(TreeItem::Blob(file_hash)));

        self.packfile_entries.insert(file_hash, entry);

        Ok(())
    }

    /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`,
    /// all the files currently in the `tree`, returning all the packfile entries
    /// and also the commit hash so it can be referred to by `ls-ref`s.
    #[instrument(skip(self, name, email, message), err)]
    pub fn commit(
        mut self,
        name: &'static str,
        email: &'static str,
        message: &'static str,
    ) -> Result<(HashOutput, Vec<PackFileEntry>), anyhow::Error> {
        // gets the hash of the entire tree from the root
        let tree_hash = self
            .tree
            .into_packfile_entries(&mut self.packfile_entries)?;

        // build the commit using the given inputs
        let commit_user = CommitUserInfo {
            name,
            email,
            time: time::OffsetDateTime::now_utc(),
        };

        let commit = PackFileEntry::Commit(Commit {
            tree: tree_hash,
            author: commit_user,
            committer: commit_user,
            message,
        });

        // write the commit out to the packfile_entries
        let commit_hash = commit.hash()?;
        self.packfile_entries.insert(commit_hash, commit);

        Ok((
            commit_hash,
            self.packfile_entries.into_iter().map(|(_, v)| v).collect(),
        ))
    }
}

/// An in-progress tree builder, containing file hashes along with their names or nested trees
#[derive(Default, Debug)]
struct Tree(IndexMap<ArcOrCowStr, Box<TreeItem>>);

impl Tree {
    /// Recursively writes the the whole tree out to the given `pack_file`,
    /// the tree contains pointers to (hashes of) files contained within a
    /// directory, and pointers to other directories.
    #[instrument(skip(self, pack_file), err)]
    fn into_packfile_entries(
        self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry>,
    ) -> Result<HashOutput, anyhow::Error> {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in self.0 {
            tree.push(match *item {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    sort_name: name.to_string(),
                    name,
                    hash,
                },
                TreeItem::Tree(tree) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
                    sort_name: format!("{}/", name),
                    name,
                    // we're essentially working through our tree from the bottom up,
                    // so we can grab the hash of each directory along the way and
                    // reference it from the parent directory
                    hash: tree.into_packfile_entries(pack_file)?,
                },
            });
        }

        // we need to sort our tree alphabetically, otherwise Git will silently
        // stop parsing the rest of the tree once it comes across a non-sorted
        // tree entry.
        tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name));

        // gets the hash of the tree we've just worked on, and
        // pushes it to the packfile
        let tree = PackFileEntry::Tree(tree);
        let hash = tree.hash()?;
        pack_file.insert(hash, tree);

        Ok(hash)
    }
}

/// An item within a `Tree`, this could be a file blob or another directory.
#[derive(Debug)]
enum TreeItem {
    /// Refers to a file by hash
    Blob(HashOutput),
    /// Refers to a nested directory
    Tree(Tree),
}
diff --git a/src/protocol/low_level.rs b/src/protocol/low_level.rs
deleted file mode 100644
index 4179faa..0000000
--- a/src/protocol/low_level.rs
+++ /dev/null
@@ -1,339 +0,0 @@
use crate::util::ArcOrCowStr;
use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use std::{
    convert::TryInto,
    fmt::{Display, Formatter, Write},
    io::Write as IoWrite,
};
use tracing::instrument;

pub type HashOutput = [u8; 20];

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: &'a [PackFileEntry],
}

impl<'a> PackFile<'a> {
    #[must_use]
    pub fn new(entries: &'a [PackFileEntry]) -> Self {
        Self { entries }
    }

    #[must_use]
    pub const fn header_size() -> usize {
        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    }

    #[must_use]
    pub const fn footer_size() -> usize {
        20
    }

    #[instrument(skip(self, original_buf), err)]
    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut buf = original_buf.split_off(original_buf.len());
        buf.reserve(Self::header_size() + Self::footer_size());

        // header
        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile

        // body
        for entry in self.entries {
            entry.encode_to(&mut buf)?;
        }

        // footer
        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));

        original_buf.unsplit(buf);

        Ok(())
    }
}

#[derive(Debug, Clone)]
pub struct Commit {
    pub tree: HashOutput,
    // pub parent: [u8; 20],
    pub author: CommitUserInfo,
    pub committer: CommitUserInfo,
    // pub gpgsig: &str,
    pub message: &'static str,
}

impl Commit {
    #[instrument(skip(self, out), err)]
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex)?;

        out.write_str("tree ")?;
        out.extend_from_slice(&tree_hex);
        out.write_char('\n')?;

        writeln!(out, "author {}", self.author)?;
        writeln!(out, "committer {}", self.committer)?;
        write!(out, "\n{}", self.message)?;

        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let mut len = 0;
        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
        len += "author ".len() + self.author.size() + "\n".len();
        len += "committer ".len() + self.committer.size() + "\n".len();
        len += "\n".len() + self.message.len();
        len
    }
}

#[derive(Clone, Copy, Debug)]
pub struct CommitUserInfo {
    pub name: &'static str,
    pub email: &'static str,
    pub time: time::OffsetDateTime,
}

impl Display for CommitUserInfo {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{} <{}> {} +0000",
            self.name,
            self.email,
            self.time.unix_timestamp()
        )
    }
}

impl CommitUserInfo {
    #[must_use]
    pub fn size(&self) -> usize {
        let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();

        self.name.len()
            + "< ".len()
            + self.email.len()
            + "> ".len()
            + timestamp_len
            + " +0000".len()
    }
}

#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
    File,
    Directory,
}

impl TreeItemKind {
    #[must_use]
    pub const fn mode(&self) -> &'static str {
        match self {
            Self::File => "100644",
            Self::Directory => "40000",
        }
    }
}

#[derive(Debug)]
pub struct TreeItem {
    pub kind: TreeItemKind,
    pub name: ArcOrCowStr,
    pub hash: HashOutput,
    pub sort_name: String,
}

// `[mode] [name]\0[hash]`
impl TreeItem {
    #[instrument(skip(self, out), err)]
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
        out.extend_from_slice(&self.hash);
        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
    }
}

#[derive(Debug)] // could be copy but Vec<TreeItem<'a>>
pub enum PackFileEntry {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
    // author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // gpgsig -----BEGIN PGP SIGNATURE-----
    //
    // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
    // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
    // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
    // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
    // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
    // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
    // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
    // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
    // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
    // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
    // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
    // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
    // =fXoH
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit(Commit),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree(Vec<TreeItem>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob(Bytes),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

impl PackFileEntry {
    #[instrument(skip(self, buf))]
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size();

        // write header
        {
            let mut val = 0b1000_0000_u8;

            val |= match self {
                Self::Commit(_) => 0b001,
                Self::Tree(_) => 0b010,
                Self::Blob(_) => 0b011,
                // Self::Tag => 0b100,
                // Self::OfsDelta => 0b110,
                // Self::RefDelta => 0b111,
            } << 4;

            // pack the 4 LSBs of the size into the header
            #[allow(clippy::cast_possible_truncation)] // value is masked
            {
                val |= (size & 0b1111) as u8;
            }
            size >>= 4;

            buf.put_u8(val);
        }

        // write size bytes
        while size != 0 {
            // read 7 LSBs from the `size` and push them off for the next iteration
            #[allow(clippy::cast_possible_truncation)] // value is masked
            let mut val = (size & 0b111_1111) as u8;
            size >>= 7;

            if size != 0 {
                // MSB set to 1 implies there's more size bytes to come, otherwise
                // the data starts after this byte
                val |= 1 << 7;
            }

            buf.put_u8(val);
        }
    }

    #[instrument(skip(self, original_out), err)]
    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
        self.write_header(original_out); // TODO: this needs space reserving for it

        // todo is there a way to stream through the zlibencoder so we don't have to
        // have this intermediate bytesmut and vec?
        let mut out = BytesMut::new();

        let size = self.uncompressed_size();
        original_out.reserve(size);
        // the data ends up getting compressed but we'll need at least this many bytes
        out.reserve(size);

        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(data) => {
                out.extend_from_slice(data);
            }
        }

        debug_assert_eq!(out.len(), size);

        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(&out)?;
        let compressed_data = e.finish()?;

        original_out.extend_from_slice(&compressed_data);

        Ok(())
    }

    #[instrument(skip(self))]
    #[must_use]
    pub fn uncompressed_size(&self) -> usize {
        match self {
            Self::Commit(commit) => commit.size(),
            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
            Self::Blob(data) => data.len(),
        }
    }

    // wen const generics for RustCrypto? :-(
    #[instrument(skip(self), err)]
    pub fn hash(&self) -> Result<HashOutput, anyhow::Error> {
        let size = self.uncompressed_size();

        let file_prefix = match self {
            Self::Commit(_) => "commit",
            Self::Tree(_) => "tree",
            Self::Blob(_) => "blob",
        };

        let size_len = itoa::Buffer::new().format(size).len();

        let mut out =
            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);

        write!(out, "{} {}\0", file_prefix, size)?;
        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(blob) => {
                out.extend_from_slice(blob);
            }
        }

        Ok(sha1::Sha1::digest(&out).into())
    }
}
diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs
deleted file mode 100644
index 5e11618..0000000
--- a/src/protocol/mod.rs
+++ /dev/null
@@ -1,4 +0,0 @@
pub mod codec;
pub mod high_level;
pub mod low_level;
pub mod packet_line;
diff --git a/src/protocol/packet_line.rs b/src/protocol/packet_line.rs
deleted file mode 100644
index 46dd236..0000000
--- a/src/protocol/packet_line.rs
+++ /dev/null
@@ -1,73 +0,0 @@
use bytes::{BufMut, BytesMut};
use std::fmt::Write;
use tracing::instrument;

use super::low_level::PackFile;

/// Every packet sent to the client from us should be a `PktLine`.
pub enum PktLine<'a> {
    Data(&'a [u8]),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a block of data by appending a byte containing the u8 `1`.
    SidebandData(PackFile<'a>),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a status message by appending a byte containing the u8 `2`.
    SidebandMsg(&'a [u8]),
    Flush,
    Delimiter,
    ResponseEnd,
}

impl PktLine<'_> {
    #[instrument(skip(self, buf), err)]
    pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        match self {
            Self::Data(data) => {
                write!(buf, "{:04x}", data.len() + 4)?;
                buf.extend_from_slice(data);
            }
            Self::SidebandData(packfile) => {
                // split the buf off so the cost of counting the bytes to put in the
                // data line prefix is just the cost of `unsplit` (an atomic decrement)
                let mut data_buf = buf.split_off(buf.len());

                data_buf.put_u8(1); // sideband, 1 = data
                packfile.encode_to(&mut data_buf)?;

                // write into the buf not the data buf so it's at the start of the msg
                write!(buf, "{:04x}", data_buf.len() + 4)?;
                buf.unsplit(data_buf);
            }
            Self::SidebandMsg(msg) => {
                write!(buf, "{:04x}", msg.len() + 4 + 1)?;
                buf.put_u8(2); // sideband, 2 = msg
                buf.extend_from_slice(msg);
            }
            Self::Flush => buf.extend_from_slice(b"0000"),
            Self::Delimiter => buf.extend_from_slice(b"0001"),
            Self::ResponseEnd => buf.extend_from_slice(b"0002"),
        }

        Ok(())
    }
}

impl<'a> From<&'a str> for PktLine<'a> {
    fn from(val: &'a str) -> Self {
        PktLine::Data(val.as_bytes())
    }
}

#[cfg(test)]
mod test {
    use bytes::BytesMut;

    #[test]
    fn test_pkt_line() {
        let mut buffer = BytesMut::new();
        super::PktLine::Data(b"agent=git/2.32.0\n")
            .encode_to(&mut buffer)
            .unwrap();
        assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n");
    }
}