🏡 index : ~doyle/packfile.git

author Jordan Doyle <jordan@doyle.la> 2022-10-22 13:43:31.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-10-22 13:45:26.0 +00:00:00
commit
85a1dd96c94028d9ddcb9d129c97b55bed8f79f9 [patch]
tree
c3221335fc20cc2abba1fd664a64b56c8a03586d
download
85a1dd96c94028d9ddcb9d129c97b55bed8f79f9.tar.gz

Initial commit



Diff

 .github/dependabot.yml           |  12 +-
 .github/workflows/audit.yml      |  16 ++-
 .github/workflows/audit_cron.yml |  14 ++-
 .github/workflows/ci.yml         |  62 +++++++-
 .gitignore                       |   3 +-
 Cargo.toml                       |  27 +++-
 LICENSE                          |  13 +-
 src/codec.rs                     | 151 +++++++++++++++++-
 src/error.rs                     |  28 +++-
 src/high_level.rs                | 180 +++++++++++++++++++++-
 src/lib.rs                       |  30 +++-
 src/low_level.rs                 | 351 ++++++++++++++++++++++++++++++++++++++++-
 src/packet_line.rs               |  78 +++++++++-
 src/util.rs                      |  59 +++++++-
 14 files changed, 1024 insertions(+)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..0dad033
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates

version: 2

updates:
  - package-ecosystem: "cargo"
    directory: "/"
    schedule:
      interval: "monthly"
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
new file mode 100644
index 0000000..f7302be
--- /dev/null
+++ b/.github/workflows/audit.yml
@@ -0,0 +1,16 @@
name: Security audit

on:
  push:
    paths:
      - '**/Cargo.toml'
      - '**/Cargo.lock'

jobs:
  security_audit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v1
      - uses: actions-rs/audit-check@v1
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/audit_cron.yml b/.github/workflows/audit_cron.yml
new file mode 100644
index 0000000..56de683
--- /dev/null
+++ b/.github/workflows/audit_cron.yml
@@ -0,0 +1,14 @@
name: Security audit (cron)

on:
  schedule:
    - cron: '0 0 * * *'

jobs:
  audit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/audit-check@v1
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..ead4831
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,62 @@
on: [push, pull_request]

name: CI

jobs:
  check:
    name: Check
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - uses: actions-rs/cargo@v1
        with:
          command: check

  test:
    name: Test Suite
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - uses: actions-rs/cargo@v1
        with:
          command: test

  fmt:
    name: Rustfmt
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - run: rustup component add rustfmt
      - uses: actions-rs/cargo@v1
        with:
          command: fmt

  clippy:
    name: Clippy
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - run: rustup component add clippy
      - uses: actions-rs/cargo@v1
        with:
          command: clippy
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..91b8835
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
/target
/Cargo.lock
.idea/
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..854af54
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,27 @@
[package]
name = "packfile"
authors = ["Jordan Doyle <jordan@doyle.la>"]
description = "A simple library providing utilities to generate Git Packfiles in memory and send them to clients"
version = "0.1.0"
edition = "2021"
license = "WTFPL"
keywords = ["git", "packfile", "in-memory", "protocol"]
categories = ["development-tools"]
exclude = ["/.github"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bytes = "1.2"
flate2 = "1.0"
hex = "0.4"
indexmap = "1.9"
itoa = "1.0"
sha1 = "0.10"
thiserror = "1.0"
time = "0.3.15"
tokio-util = { version = "0.7", features = ["codec"], optional = true }
tracing = "0.1"

[features]
default = ["tokio-util"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8b1a9d8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
                   Version 2, December 2004

Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>

Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.

           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

 0. You just DO WHAT THE FUCK YOU WANT TO.
diff --git a/src/codec.rs b/src/codec.rs
new file mode 100644
index 0000000..f0dd2ed
--- /dev/null
+++ b/src/codec.rs
@@ -0,0 +1,151 @@
//! A [`tokio_util::codec`] implementation for the [Git wire format].
//!
//! [Git wire format]: https://git-scm.com/docs/protocol-v2

#![allow(clippy::module_name_repetitions)]

use std::ops::RangeInclusive;

use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;
use tracing::instrument;

use crate::{packet_line::PktLine, Error};

const ALLOWED_PACKET_LENGTH: RangeInclusive<usize> = 4..=65520;

pub struct Encoder;

impl codec::Encoder<PktLine<'_>> for Encoder {
    type Error = Error;

    fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> {
        item.encode_to(dst)?;
        Ok(())
    }
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct GitCommand {
    pub command: Bytes,
    pub metadata: Vec<Bytes>,
}

#[derive(Default)]
pub struct GitCodec {
    command: GitCommand,
}

impl codec::Decoder for GitCodec {
    type Item = GitCommand;
    type Error = Error;

    #[instrument(skip(self, src), err)]
    fn decode(&mut self, src: &mut bytes::BytesMut) -> Result<Option<Self::Item>, Self::Error> {
        loop {
            if src.len() < 4 {
                return Ok(None);
            }

            let mut length_bytes = [0_u8; 4];
            length_bytes.copy_from_slice(&src[..4]);
            let length = u16::from_str_radix(
                std::str::from_utf8(&length_bytes).map_err(Error::ParseLengthBytes)?,
                16,
            )
            .map_err(Error::ParseLengthAsHex)? as usize;

            if length == 0 {
                // flush
                src.advance(4);
                return Ok(Some(std::mem::take(&mut self.command)));
            } else if length == 1 || length == 2 {
                src.advance(4);
                continue;
            } else if !ALLOWED_PACKET_LENGTH.contains(&length) {
                return Err(Error::PacketLengthExceedsSpec(
                    ALLOWED_PACKET_LENGTH,
                    length,
                ));
            }

            // not enough bytes in the buffer yet, ask for more
            if src.len() < length {
                src.reserve(length - src.len());
                return Ok(None);
            }

            // length is inclusive of the 4 bytes that makes up itself
            let mut data = src.split_to(length).freeze();
            data.advance(4);

            // strip newlines for conformity
            if data.ends_with(b"\n") {
                data.truncate(data.len() - 1);
            }

            if self.command.command.is_empty() {
                self.command.command = data;
            } else {
                self.command.metadata.push(data);
            }
        }
    }
}

#[cfg(test)]
mod test {
    use bytes::{Bytes, BytesMut};
    use std::fmt::Write;
    use tokio_util::codec::Decoder;

    #[test]
    fn decode() {
        let mut codec = super::GitCodec::default();

        let mut bytes = BytesMut::new();

        bytes.write_str("0015agent=git/2.32.0").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(res, None);

        bytes.write_char('\n').unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(res, None);

        bytes.write_str("0000").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::from_static(b"agent=git/2.32.0"),
                metadata: vec![],
            })
        );

        bytes.write_str("0000").unwrap();
        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::new(),
                metadata: vec![],
            })
        );

        bytes.write_str("0002").unwrap();
        bytes.write_str("0005a").unwrap();
        bytes.write_str("0001").unwrap();
        bytes.write_str("0005b").unwrap();
        bytes.write_str("0000").unwrap();

        let res = codec.decode(&mut bytes).unwrap();
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::from_static(b"a"),
                metadata: vec![Bytes::from_static(b"b")],
            })
        );
    }
}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..10cf135
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,28 @@
use std::ops::RangeInclusive;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum Error {
    #[error("Failed to write formatted string to buffer: {0}")]
    BufferWrite(#[from] std::fmt::Error),
    #[error("{0} is not a directory")]
    NotDirectory(&'static str),
    #[cfg(feature = "tokio-util")]
    #[error("Failed to parse utf-8 encoded prefix: {0}")]
    ParseLengthBytes(std::str::Utf8Error),
    #[cfg(feature = "tokio-util")]
    #[error("Failed to parse length from hex string: {0}")]
    ParseLengthAsHex(std::num::ParseIntError),
    #[error("Failed to write bytes to compress to zlib: {0}")]
    CompressWrite(std::io::Error),
    #[error("Failed to compress packfile with zlib: {0}")]
    Compress(std::io::Error),
    #[error("Failed to encode tree hash to hex: {0}")]
    EncodeTreeHash(hex::FromHexError),
    #[error("Entries in packfile exceeds a u32: {0}")]
    EntriesExceedsU32(std::num::TryFromIntError),
    #[error("Packet length is not in the range {0:?} as defined by the spec, got {1}")]
    PacketLengthExceedsSpec(RangeInclusive<usize>, usize),
    #[error("I/O error: {0}")]
    Io(#[from] std::io::Error),
}
diff --git a/src/high_level.rs b/src/high_level.rs
new file mode 100644
index 0000000..b06a1e6
--- /dev/null
+++ b/src/high_level.rs
@@ -0,0 +1,180 @@
//! A high-level interface for building packfiles. Wraps the `low_level` module
//! making a much easier interface for writing files and generating the root
//! commit.
//!
//! The output packfile will only have a single commit in it, which is fine
//! for our purposes because `cargo` will `git pull --force` from our Git
//! server, allowing us to ignore any history the client may have.

use bytes::Bytes;
use indexmap::IndexMap;
use tracing::instrument;

use crate::{
    low_level::{
        Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem,
        TreeItemKind,
    },
    util::ArcOrCowStr,
    Error,
};

/// The main way of interacting with the high level Packfile builder
///
/// Builds a whole packfile containing files, directories and commits - essentially
/// building out a full Git repository in memory.
#[derive(Default, Debug)]
pub struct GitRepository {
    /// A map containing all the blobs and their corresponding hashes so they're
    /// not inserted more than once for any files in the whole tree with the same
    /// content.
    packfile_entries: IndexMap<HashOutput, PackFileEntry>,
    /// An in-progress `Tree` currently being built out, the tree refers to items
    /// in `file_entries` by hash.
    tree: Tree,
}

impl GitRepository {
    /// Inserts a file into the repository, writing a file to the path
    /// `path/to/my-file` would require a `path` of `["path", "to"]`
    /// and a `file` of `"my-file"`.
    #[instrument(skip(self, file, content), err)]
    pub fn insert(
        &mut self,
        path: &[&'static str],
        file: impl Into<ArcOrCowStr>,
        content: Bytes,
    ) -> Result<(), Error> {
        // we'll initialise the directory to the root of the tree, this means
        // if a path isn't specified we'll just write it to the root directory
        let mut directory = &mut self.tree;

        // loops through the parts in the path, recursing through the `directory`
        // `Tree` until we get to our target directory, creating any missing
        // directories along the way.
        for part in path {
            let tree_item = directory
                .0
                .entry((*part).into())
                .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default())));

            if let TreeItem::Tree(d) = tree_item.as_mut() {
                directory = d;
            } else {
                return Err(Error::NotDirectory(part));
            }
        }

        // wrap the file in a Blob so it's ready for writing into the packfile, and also
        // allows us to grab the hash of the file for use in the tree
        let entry = PackFileEntry::Blob(content);
        let file_hash = entry.hash()?;

        // todo: what should we do on overwrite?
        directory
            .0
            .insert(file.into(), Box::new(TreeItem::Blob(file_hash)));

        self.packfile_entries.insert(file_hash, entry);

        Ok(())
    }

    /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`,
    /// all the files currently in the `tree`, returning all the packfile entries
    /// and also the commit hash so it can be referred to by `ls-ref`s.
    #[instrument(skip(self, name, email, message), err)]
    pub fn commit(
        mut self,
        name: &'static str,
        email: &'static str,
        message: &'static str,
    ) -> Result<(HashOutput, Vec<PackFileEntry>), Error> {
        // gets the hash of the entire tree from the root
        let tree_hash = self
            .tree
            .into_packfile_entries(&mut self.packfile_entries)?;

        // build the commit using the given inputs
        let commit_user = CommitUserInfo {
            name,
            email,
            time: time::OffsetDateTime::now_utc(),
        };

        let commit = PackFileEntry::Commit(Commit {
            tree: tree_hash,
            author: commit_user,
            committer: commit_user,
            message,
        });

        // write the commit out to the packfile_entries
        let commit_hash = commit.hash()?;
        self.packfile_entries.insert(commit_hash, commit);

        Ok((
            commit_hash,
            self.packfile_entries.into_iter().map(|(_, v)| v).collect(),
        ))
    }
}

/// An in-progress tree builder, containing file hashes along with their names or nested trees
#[derive(Default, Debug)]
struct Tree(IndexMap<ArcOrCowStr, Box<TreeItem>>);

impl Tree {
    /// Recursively writes the the whole tree out to the given `pack_file`,
    /// the tree contains pointers to (hashes of) files contained within a
    /// directory, and pointers to other directories.
    #[instrument(skip(self, pack_file), err)]
    fn into_packfile_entries(
        self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry>,
    ) -> Result<HashOutput, Error> {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in self.0 {
            tree.push(match *item {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    sort_name: name.to_string(),
                    name,
                    hash,
                },
                TreeItem::Tree(tree) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
                    sort_name: format!("{}/", name),
                    name,
                    // we're essentially working through our tree from the bottom up,
                    // so we can grab the hash of each directory along the way and
                    // reference it from the parent directory
                    hash: tree.into_packfile_entries(pack_file)?,
                },
            });
        }

        // we need to sort our tree alphabetically, otherwise Git will silently
        // stop parsing the rest of the tree once it comes across a non-sorted
        // tree entry.
        tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name));

        // gets the hash of the tree we've just worked on, and
        // pushes it to the packfile
        let tree = PackFileEntry::Tree(tree);
        let hash = tree.hash()?;
        pack_file.insert(hash, tree);

        Ok(hash)
    }
}

/// An item within a `Tree`, this could be a file blob or another directory.
#[derive(Debug)]
enum TreeItem {
    /// Refers to a file by hash
    Blob(HashOutput),
    /// Refers to a nested directory
    Tree(Tree),
}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..3ec4e32
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,30 @@
#![deny(clippy::pedantic)]
//! `packfile` is a simple library providing utilities to generate [Git Packfiles] in memory.
//!
//! Usage:
//!
//! ```rust
//! # use packfile::{high_level::GitRepository, low_level::PackFile};
//! #
//! let mut repo = GitRepository::default();
//! repo.insert(&["path", "to"], "file.txt", "hello world!".into()).unwrap();
//! let (_commit_hash, entries) =
//!     repo.commit("Linus Torvalds", "torvalds@example.com", "Some commit message").unwrap();
//!
//! let _packfile = PackFile::new(&entries);
//! // ... packfile can then be encoded within a [`SidebandData`] to send the data to a client
//! ```
//!
//! [Git Packfiles]: https://git-scm.com/book/en/v2/Git-Internals-Packfiles
//! [`SidebandData`]: crate::codec::Codec::SidebandData

#[cfg(feature = "tokio-util")]
pub mod codec;
mod error;
pub mod high_level;
pub mod low_level;
mod packet_line;
mod util;

pub use error::Error;
pub use packet_line::PktLine;
diff --git a/src/low_level.rs b/src/low_level.rs
new file mode 100644
index 0000000..f9ca68d
--- /dev/null
+++ b/src/low_level.rs
@@ -0,0 +1,351 @@
//! A low-level Git packfile builder.
//!
//! This implementation requires the caller to push directories to the packfile manually, in the
//! order that Git expects.

use std::{
    convert::TryInto,
    fmt::{Display, Formatter, Write},
    io::Write as IoWrite,
};

use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use tracing::instrument;

use crate::{util::ArcOrCowStr, Error};

pub type HashOutput = [u8; 20];

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: &'a [PackFileEntry],
}

impl<'a> PackFile<'a> {
    #[must_use]
    pub fn new(entries: &'a [PackFileEntry]) -> Self {
        Self { entries }
    }

    #[must_use]
    pub const fn header_size() -> usize {
        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    }

    #[must_use]
    pub const fn footer_size() -> usize {
        20
    }

    #[instrument(skip(self, original_buf), err)]
    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), Error> {
        let mut buf = original_buf.split_off(original_buf.len());
        buf.reserve(Self::header_size() + Self::footer_size());

        // header
        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(
            self.entries
                .len()
                .try_into()
                .map_err(Error::EntriesExceedsU32)?,
        ); // number of entries in the packfile

        // body
        for entry in self.entries {
            entry.encode_to(&mut buf)?;
        }

        // footer
        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));

        original_buf.unsplit(buf);

        Ok(())
    }
}

#[derive(Debug, Clone)]
pub struct Commit {
    pub tree: HashOutput,
    // pub parent: [u8; 20],
    pub author: CommitUserInfo,
    pub committer: CommitUserInfo,
    // pub gpgsig: &str,
    pub message: &'static str,
}

impl Commit {
    #[instrument(skip(self, out), err)]
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex).map_err(Error::EncodeTreeHash)?;

        out.write_str("tree ")?;
        out.extend_from_slice(&tree_hex);
        out.write_char('\n')?;

        writeln!(out, "author {}", self.author)?;
        writeln!(out, "committer {}", self.committer)?;
        write!(out, "\n{}", self.message)?;

        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let mut len = 0;
        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
        len += "author ".len() + self.author.size() + "\n".len();
        len += "committer ".len() + self.committer.size() + "\n".len();
        len += "\n".len() + self.message.len();
        len
    }
}

#[derive(Clone, Copy, Debug)]
pub struct CommitUserInfo {
    pub name: &'static str,
    pub email: &'static str,
    pub time: time::OffsetDateTime,
}

impl Display for CommitUserInfo {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{} <{}> {} +0000",
            self.name,
            self.email,
            self.time.unix_timestamp()
        )
    }
}

impl CommitUserInfo {
    #[must_use]
    pub fn size(&self) -> usize {
        let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();

        self.name.len()
            + "< ".len()
            + self.email.len()
            + "> ".len()
            + timestamp_len
            + " +0000".len()
    }
}

#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
    File,
    Directory,
}

impl TreeItemKind {
    #[must_use]
    pub const fn mode(&self) -> &'static str {
        match self {
            Self::File => "100644",
            Self::Directory => "40000",
        }
    }
}

#[derive(Debug)]
pub struct TreeItem {
    pub kind: TreeItemKind,
    pub name: ArcOrCowStr,
    pub hash: HashOutput,
    pub sort_name: String,
}

// `[mode] [name]\0[hash]`
impl TreeItem {
    #[instrument(skip(self, out), err)]
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
        out.extend_from_slice(&self.hash);
        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
    }
}

#[derive(Debug)] // could be copy but Vec<TreeItem<'a>>
pub enum PackFileEntry {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
    // author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // gpgsig -----BEGIN PGP SIGNATURE-----
    //
    // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
    // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
    // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
    // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
    // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
    // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
    // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
    // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
    // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
    // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
    // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
    // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
    // =fXoH
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit(Commit),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree(Vec<TreeItem>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob(Bytes),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

impl PackFileEntry {
    #[instrument(skip(self, buf))]
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size();

        // write header
        {
            let mut val = 0b1000_0000_u8;

            val |= match self {
                Self::Commit(_) => 0b001,
                Self::Tree(_) => 0b010,
                Self::Blob(_) => 0b011,
                // Self::Tag => 0b100,
                // Self::OfsDelta => 0b110,
                // Self::RefDelta => 0b111,
            } << 4;

            // pack the 4 LSBs of the size into the header
            #[allow(clippy::cast_possible_truncation)] // value is masked
            {
                val |= (size & 0b1111) as u8;
            }
            size >>= 4;

            buf.put_u8(val);
        }

        // write size bytes
        while size != 0 {
            // read 7 LSBs from the `size` and push them off for the next iteration
            #[allow(clippy::cast_possible_truncation)] // value is masked
            let mut val = (size & 0b111_1111) as u8;
            size >>= 7;

            if size != 0 {
                // MSB set to 1 implies there's more size bytes to come, otherwise
                // the data starts after this byte
                val |= 1 << 7;
            }

            buf.put_u8(val);
        }
    }

    #[instrument(skip(self, original_out), err)]
    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), Error> {
        self.write_header(original_out); // TODO: this needs space reserving for it

        // todo is there a way to stream through the zlibencoder so we don't have to
        // have this intermediate bytesmut and vec?
        let mut out = BytesMut::new();

        let size = self.uncompressed_size();
        original_out.reserve(size);
        // the data ends up getting compressed but we'll need at least this many bytes
        out.reserve(size);

        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(data) => {
                out.extend_from_slice(data);
            }
        }

        debug_assert_eq!(out.len(), size);

        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(&out).map_err(Error::CompressWrite)?;
        let compressed_data = e.finish().map_err(Error::Compress)?;

        original_out.extend_from_slice(&compressed_data);

        Ok(())
    }

    #[instrument(skip(self))]
    #[must_use]
    pub fn uncompressed_size(&self) -> usize {
        match self {
            Self::Commit(commit) => commit.size(),
            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
            Self::Blob(data) => data.len(),
        }
    }

    // wen const generics for RustCrypto? :-(
    #[instrument(skip(self), err)]
    pub fn hash(&self) -> Result<HashOutput, Error> {
        let size = self.uncompressed_size();

        let file_prefix = match self {
            Self::Commit(_) => "commit",
            Self::Tree(_) => "tree",
            Self::Blob(_) => "blob",
        };

        let size_len = itoa::Buffer::new().format(size).len();

        let mut out =
            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);

        write!(out, "{} {}\0", file_prefix, size)?;
        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(blob) => {
                out.extend_from_slice(blob);
            }
        }

        Ok(sha1::Sha1::digest(&out).into())
    }
}
diff --git a/src/packet_line.rs b/src/packet_line.rs
new file mode 100644
index 0000000..5556760
--- /dev/null
+++ b/src/packet_line.rs
@@ -0,0 +1,78 @@
use std::fmt::Write;

use bytes::{BufMut, BytesMut};
use tracing::instrument;

use crate::{low_level::PackFile, Error};

/// A wrapper containing every possible type of message that can be sent to a Git client.
pub enum PktLine<'a> {
    /// General data sent to a client, generally a UTF-8 encoded string.
    Data(&'a [u8]),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a block of data by appending a byte containing the u8 `1`.
    SidebandData(PackFile<'a>),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a status message by appending a byte containing the u8 `2`.
    SidebandMsg(&'a [u8]),
    /// Indicates the end of a response.
    Flush,
    /// Separates sections of a response.
    Delimiter,
    /// Indicates the end of the response, allowing the client to send another request.
    ResponseEnd,
}

impl PktLine<'_> {
    #[instrument(skip(self, buf), err)]
    pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), Error> {
        match self {
            Self::Data(data) => {
                write!(buf, "{:04x}", data.len() + 4)?;
                buf.extend_from_slice(data);
            }
            Self::SidebandData(packfile) => {
                // split the buf off so the cost of counting the bytes to put in the
                // data line prefix is just the cost of `unsplit` (an atomic decrement)
                let mut data_buf = buf.split_off(buf.len());

                data_buf.put_u8(1); // sideband, 1 = data
                packfile.encode_to(&mut data_buf)?;

                // write into the buf not the data buf so it's at the start of the msg
                write!(buf, "{:04x}", data_buf.len() + 4)?;
                buf.unsplit(data_buf);
            }
            Self::SidebandMsg(msg) => {
                write!(buf, "{:04x}", msg.len() + 4 + 1)?;
                buf.put_u8(2); // sideband, 2 = msg
                buf.extend_from_slice(msg);
            }
            Self::Flush => buf.extend_from_slice(b"0000"),
            Self::Delimiter => buf.extend_from_slice(b"0001"),
            Self::ResponseEnd => buf.extend_from_slice(b"0002"),
        }

        Ok(())
    }
}

impl<'a> From<&'a str> for PktLine<'a> {
    fn from(val: &'a str) -> Self {
        PktLine::Data(val.as_bytes())
    }
}

#[cfg(test)]
mod test {
    use bytes::BytesMut;

    #[test]
    fn test_pkt_line() {
        let mut buffer = BytesMut::new();
        super::PktLine::Data(b"agent=git/2.32.0\n")
            .encode_to(&mut buffer)
            .unwrap();
        assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n");
    }
}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..7375df2
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,59 @@
use std::{
    borrow::Cow,
    fmt::{Display, Formatter},
    ops::Deref,
    sync::Arc,
};

#[derive(Debug, Hash, PartialEq, Eq)]
pub enum ArcOrCowStr {
    Arc(Arc<str>),
    Cow(Cow<'static, str>),
}

impl From<Arc<str>> for ArcOrCowStr {
    fn from(v: Arc<str>) -> Self {
        Self::Arc(v)
    }
}

impl From<Cow<'static, str>> for ArcOrCowStr {
    fn from(v: Cow<'static, str>) -> Self {
        Self::Cow(v)
    }
}

impl From<&'static str> for ArcOrCowStr {
    fn from(v: &'static str) -> Self {
        Self::Cow(Cow::Borrowed(v))
    }
}

impl From<String> for ArcOrCowStr {
    fn from(v: String) -> Self {
        Self::Cow(Cow::Owned(v))
    }
}

impl AsRef<str> for ArcOrCowStr {
    fn as_ref(&self) -> &str {
        match self {
            Self::Arc(v) => v.as_ref(),
            Self::Cow(v) => v.as_ref(),
        }
    }
}

impl Deref for ArcOrCowStr {
    type Target = str;

    fn deref(&self) -> &Self::Target {
        self.as_ref()
    }
}

impl Display for ArcOrCowStr {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        std::fmt::Display::fmt(&**self, f)
    }
}