🏡 index : ~doyle/chartered.git

author Jordan Doyle <jordan@doyle.la> 2021-10-07 23:18:27.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-10-07 23:45:36.0 +01:00:00
commit
63abc7f6dac6236b93b97c54cdab2f50d9e57b76 [patch]
tree
6700213ee7b7dc270c5446363fc281a50efe6e0d
parent
e60c01fc33eb15184f0ceeb001811272723d3526
download
63abc7f6dac6236b93b97c54cdab2f50d9e57b76.tar.gz

Introduce high-level packfile generator



Diff

 Cargo.lock                                   |   2 ++
 chartered-db/Cargo.toml                      |   9 +++++++--
 chartered-git/Cargo.toml                     |   2 ++
 chartered-db/src/lib.rs                      |  15 ++++++++++++++-
 chartered-fs/src/lib.rs                      |   2 +-
 chartered-git/src/generators.rs              |  41 +++++++++++++++++++++++++++++++++++++++++
 chartered-git/src/main.rs                    | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
 chartered-git/src/git/codec.rs               |   2 +-
 chartered-git/src/git/mod.rs                 |   2 +-
 chartered-git/src/git/packfile.rs            | 326 --------------------------------------------------------------------------------
 chartered-git/src/git/packfile/high_level.rs | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 chartered-git/src/git/packfile/low_level.rs  | 326 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 chartered-git/src/git/packfile/mod.rs        |   2 ++
 13 files changed, 575 insertions(+), 439 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3a4329c..15f876d 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -303,6 +303,7 @@
 "format-bytes",
 "futures",
 "hex",
 "indexmap",
 "indoc",
 "itoa",
 "log",
@@ -314,6 +315,7 @@
 "thrussh-keys",
 "tokio",
 "tokio-util",
 "url",
]

[[package]]
diff --git a/chartered-db/Cargo.toml b/chartered-db/Cargo.toml
index efe6d99..d4583a6 100644
--- a/chartered-db/Cargo.toml
+++ a/chartered-db/Cargo.toml
@@ -12,14 +12,14 @@
base64 = "0.13"
bitflags = "1"
chrono = "0.4"
diesel = { version = "1", features = ["sqlite", "postgres", "r2d2", "chrono"] }
diesel = { version = "1", features = ["r2d2", "chrono"] }
diesel_logger = "0.1"
diesel_migrations = "1.4"
displaydoc = "0.2"
hex = "0.4"
http = "0.2"
itertools = "0.10"
libsqlite3-sys = { version = "*", features = ["bundled"] } # https://github.com/rusqlite/rusqlite/issues/914
libsqlite3-sys = { version = "*", features = ["bundled"], optional = true } # https://github.com/rusqlite/rusqlite/issues/914
option_set = "0.1"
rand = "0.8"
reqwest = "0.11"
@@ -30,3 +30,8 @@
uuid = "0.8"
dotenv = "0.15"
thrussh-keys = "0.21"

[features]
sqlite = ["libsqlite3-sys", "diesel/sqlite"]
postgres = ["diesel/postgres"]

diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml
index 3f3c3bb..2258169 100644
--- a/chartered-git/Cargo.toml
+++ a/chartered-git/Cargo.toml
@@ -20,6 +20,7 @@
format-bytes = "0.2"
futures = "0.3"
hex = "0.4"
indexmap = "1"
indoc = "1.0"
itoa = "0.4"
log = "0.4"
@@ -31,3 +32,4 @@
thrussh-keys = "0.21"
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.6", features = ["codec"] }
url = "2"
diff --git a/chartered-db/src/lib.rs b/chartered-db/src/lib.rs
index 22cf0a1..02fe455 100644
--- a/chartered-db/src/lib.rs
+++ a/chartered-db/src/lib.rs
@@ -53,7 +53,20 @@
use std::sync::Arc;
use thiserror::Error;

pub type ConnectionPool = Arc<Pool<ConnectionManager<LoggingConnection<diesel::SqliteConnection>>>>;
#[cfg(feature = "sqlite")]
pub type Connection = diesel::SqliteConnection;

#[cfg(feature = "postgres")]
pub type Connection = diesel::PostgresConnection;

#[cfg(not(any(feature = "sqlite", feature = "postgres")))]
compile_error!(
    "At least one database backend must be enabled using `--features [sqlite|postgres]`"
);
#[cfg(not(any(feature = "sqlite", feature = "postgres")))]
pub type Connection = unimplemented!();

pub type ConnectionPool = Arc<Pool<ConnectionManager<LoggingConnection<Connection>>>>;
pub type Result<T> = std::result::Result<T, Error>;

embed_migrations!();
diff --git a/chartered-fs/src/lib.rs b/chartered-fs/src/lib.rs
index 309e238..cc6d99a 100644
--- a/chartered-fs/src/lib.rs
+++ a/chartered-fs/src/lib.rs
@@ -152,7 +152,7 @@
    #[allow(clippy::pedantic)]
    async fn parse_filesystem() {
        // panic!("{:#?}", FS::from_str("s3://10.0.64.101:9000/my-bucket/my-location"));
        FS::from_str("file:///tmp/chartered");
        // FS::from_str("file:///tmp/chartered");
    }

    #[tokio::test]
diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs
new file mode 100644
index 0000000..cc987ba 100644
--- /dev/null
+++ a/chartered-git/src/generators.rs
@@ -1,0 +1,41 @@
use serde::Serialize;
use url::Url;

#[derive(Serialize, Debug, Clone)]
pub struct CargoConfig {
    pub dl: String,
    pub api: String,
}

impl CargoConfig {
    pub fn new(base: url::Url, api_key: &str, organisation: &str) -> Self {
        let base = format!("{}a/{}/o/{}", base, api_key, organisation);

        Self {
            dl: format!("{}/api/v1/crates", base),
            api: base,
        }
    }
}

#[cfg(test)]
mod test {
    use super::CargoConfig;

    #[test]
    fn test_cargo_config() {
        let conf = CargoConfig::new(
            url::Url::parse("https://127.0.0.1:1234").unwrap(),
            "my-api-key",
            "my-organisation",
        );
        assert_eq!(
            conf.dl.to_string(),
            "https://127.0.0.1:1234/a/my-api-key/o/my-organisation/api/v1/crates"
        );
        assert_eq!(
            conf.api.to_string(),
            "https://127.0.0.1:1234/a/my-api-key/o/my-organisation"
        );
    }
}
diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs
index 982c545..e982f05 100644
--- a/chartered-git/src/main.rs
+++ a/chartered-git/src/main.rs
@@ -1,11 +1,18 @@
#![deny(clippy::pedantic)]
mod generators;
#[allow(clippy::missing_errors_doc)]
pub mod git;

use crate::git::{
    codec::{Encoder, GitCodec},
    packfile::{Commit, CommitUserInfo, PackFileEntry, TreeItem, TreeItemKind},
    PktLine,
use crate::{
    generators::CargoConfig,
    git::{
        codec::{Encoder, GitCodec},
        packfile::{
            high_level::GitRepository,
            low_level::{Commit, CommitUserInfo, PackFile, PackFileEntry, TreeItem, TreeItemKind},
        },
        PktLine,
    },
};

use bytes::BytesMut;
@@ -20,6 +27,7 @@
};
use thrussh_keys::{key, PublicKeyBase64};
use tokio_util::codec::{Decoder, Encoder as TokioEncoder};
use url::Url;

#[tokio::main]
#[allow(clippy::semicolon_if_nothing_returned)] // broken clippy lint
@@ -56,8 +64,7 @@
            input_bytes: BytesMut::default(),
            output_bytes: BytesMut::default(),
            db: self.db.clone(),
            user: None,
            user_ssh_key: None,
            authed: None,
            organisation: None,
        }
    }
@@ -69,11 +76,15 @@
    input_bytes: BytesMut,
    output_bytes: BytesMut,
    db: chartered_db::ConnectionPool,
    user: Option<chartered_db::users::User>,
    user_ssh_key: Option<Arc<chartered_db::users::UserSshKey>>,
    organisation: Option<String>,
    authed: Option<Authed>,
}

struct Authed {
    user: chartered_db::users::User,
    auth_key: String,
}

impl Handler {
    fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> {
        Encoder {}.encode(packet, &mut self.output_bytes)
@@ -86,9 +97,9 @@
        );
    }

    fn user(&self) -> Result<&chartered_db::users::User, anyhow::Error> {
        match self.user {
            Some(ref user) => Ok(user),
    fn authed(&self) -> Result<&Authed, anyhow::Error> {
        match self.authed {
            Some(ref authed) => Ok(authed),
            None => anyhow::bail!("user not set after auth"),
        }
    }
@@ -97,13 +108,6 @@
        match self.organisation {
            Some(ref org) => Ok(org.as_str()),
            None => anyhow::bail!("org not set after auth"),
        }
    }

    fn user_ssh_key(&self) -> Result<&Arc<chartered_db::users::UserSshKey>, anyhow::Error> {
        match self.user_ssh_key {
            Some(ref ssh_key) => Ok(ssh_key),
            None => anyhow::bail!("user not set after auth"),
        }
    }
}
@@ -131,7 +135,7 @@

    fn shell_request(mut self, channel: ChannelId, mut session: Session) -> Self::FutureUnit {
        Box::pin(async move {
            let username = self.user()?.username.clone(); // todo
            let username = self.authed()?.user.username.clone(); // todo
            write!(&mut self.output_bytes, "Hi there, {}! You've successfully authenticated, but chartered does not provide shell access.\r\n", username)?;
            self.flush(&mut session, channel);
            session.close(channel);
@@ -201,7 +205,7 @@
        let public_key = key.public_key_bytes();

        Box::pin(async move {
            let (ssh_key, login_user) =
            let (ssh_key, user) =
                match chartered_db::users::User::find_by_ssh_key(self.db.clone(), public_key)
                    .await?
                {
@@ -213,9 +217,14 @@
            if let Err(e) = ssh_key.clone().update_last_used(self.db.clone()).await {
                warn!("Failed to update last used key: {:?}", e);
            }

            let auth_key = ssh_key
                .clone()
                .get_or_insert_session(self.db.clone(), self.ip.map(|v| v.to_string()))
                .await?
                .session_key;

            self.user = Some(login_user);
            self.user_ssh_key = Some(ssh_key);
            self.authed = Some(Authed { user, auth_key });

            self.finished_auth(server::Auth::Accept).await
        })
@@ -269,63 +278,31 @@
                }
            }

            let authed = self.authed()?;
            let org_name = self.org_name()?;

            if !ls_refs && !fetch && !done {
                return Ok((self, session));
            }

            // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n"

            let mut pack_file_entries = Vec::new();
            let mut root_tree = Vec::new();

            // TODO: key should be cached
            let config = format!(
                r#"{{"dl":"http://127.0.0.1:8888/a/{key}/o/{organisation}/api/v1/crates","api":"http://127.0.0.1:8888/a/{key}/o/{organisation}"}}"#,
                key = self
                    .user_ssh_key()?
                    .clone()
                    .get_or_insert_session(self.db.clone(), self.ip.map(|v| v.to_string()))
                    .await?
                    .session_key,
                organisation = self.org_name()?,
            let mut packfile = GitRepository::default();

            let config = CargoConfig::new(
                Url::parse("http://127.0.0.1:8888/")?,
                &authed.auth_key,
                org_name,
            );
            let config_file = PackFileEntry::Blob(config.as_bytes());

            root_tree.push(TreeItem {
                kind: TreeItemKind::File,
                name: "config.json",
                hash: config_file.hash()?,
            });
            pack_file_entries.push(config_file);
            let config = serde_json::to_vec(&config)?;
            packfile.insert(vec![], "config.json".to_string(), &config);

            // todo: the whole tree needs caching and then we can filter in code rather than at
            //  the database
            let tree = fetch_tree(
                self.db.clone(),
                self.user()?.id,
                self.org_name()?.to_string(),
            )
            .await;
            build_tree(&mut root_tree, &mut pack_file_entries, &tree)?;

            let root_tree = PackFileEntry::Tree(root_tree);
            let root_tree_hash = root_tree.hash()?;
            pack_file_entries.push(root_tree);

            let commit_user = CommitUserInfo {
                name: "Jordan Doyle",
                email: "jordan@doyle.la",
                time: chrono::Utc.ymd(2021, 9, 8).and_hms(17, 46, 1),
            };
            let commit = PackFileEntry::Commit(Commit {
                tree: root_tree_hash,
                author: commit_user,
                committer: commit_user,
                message: "Most recent crates",
            });
            let commit_hash = commit.hash()?;
            pack_file_entries.push(commit);
            let tree = fetch_tree(self.db.clone(), authed.user.id, org_name.to_string()).await;
            build_tree(&mut packfile, &tree)?;

            let (commit_hash, packfile_entries) =
                packfile.commit("computer", "john@computer.no", "Update crates");

            eprintln!("commit hash: {}", hex::encode(&commit_hash));

            // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010009peel\n000csymrefs\n000bunborn\n0014ref-prefix HEAD\n0019ref-prefix refs/HEAD\n001eref-prefix refs/tags/HEAD\n001fref-prefix refs/heads/HEAD\n0021ref-prefix refs/remotes/HEAD\n0026ref-prefix refs/remotes/HEAD/HEAD\n001aref-prefix refs/tags/\n0000"
@@ -358,7 +335,7 @@
                self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?;
                self.flush(&mut session, channel);

                let packfile = git::packfile::PackFile::new(pack_file_entries);
                let packfile = PackFile::new(packfile_entries);
                self.write(PktLine::SidebandData(packfile))?;
                self.write(PktLine::Flush)?;
                self.flush(&mut session, channel);
@@ -427,51 +404,23 @@
}

fn build_tree<'a>(
    root_tree: &mut Vec<TreeItem<'a>>,
    pack_file_entries: &mut Vec<PackFileEntry<'a>>,
    packfile: &mut GitRepository<'a>,
    tree: &'a TwoCharTree<TwoCharTree<BTreeMap<String, String>>>,
) -> Result<(), anyhow::Error> {
    root_tree.reserve(tree.len());
    pack_file_entries.reserve(tree.iter().map(|(_, v)| 1 + v.len()).sum::<usize>() + tree.len());

    for (first_level_dir, second_level_dirs) in tree.iter() {
        let mut first_level_tree = Vec::with_capacity(second_level_dirs.len());
        let first_level_dir = std::str::from_utf8(first_level_dir)?;

        for (second_level_dir, crates) in second_level_dirs.iter() {
            let mut second_level_tree = Vec::with_capacity(crates.len());
            let second_level_dir = std::str::from_utf8(second_level_dir)?;

            for (crate_name, versions_def) in crates.iter() {
                let file = PackFileEntry::Blob(versions_def.as_ref());
                let file_hash = file.hash()?;
                pack_file_entries.push(file);

                second_level_tree.push(TreeItem {
                    kind: TreeItemKind::File,
                    name: crate_name,
                    hash: file_hash,
                });
                packfile.insert(
                    vec![first_level_dir.to_string(), second_level_dir.to_string()],
                    crate_name.to_string(),
                    versions_def.as_ref(),
                );
            }

            let second_level_tree = PackFileEntry::Tree(second_level_tree);
            let second_level_tree_hash = second_level_tree.hash()?;
            pack_file_entries.push(second_level_tree);

            first_level_tree.push(TreeItem {
                kind: TreeItemKind::Directory,
                name: std::str::from_utf8(second_level_dir)?,
                hash: second_level_tree_hash,
            });
        }

        let first_level_tree = PackFileEntry::Tree(first_level_tree);
        let first_level_tree_hash = first_level_tree.hash()?;
        pack_file_entries.push(first_level_tree);

        root_tree.push(TreeItem {
            kind: TreeItemKind::Directory,
            name: std::str::from_utf8(first_level_dir)?,
            hash: first_level_tree_hash,
        });
    }

    Ok(())
diff --git a/chartered-git/src/git/codec.rs b/chartered-git/src/git/codec.rs
index 6cba5e0..5d86ea8 100644
--- a/chartered-git/src/git/codec.rs
+++ a/chartered-git/src/git/codec.rs
@@ -107,7 +107,7 @@
        assert_eq!(
            res,
            Some(super::GitCommand {
                command: Bytes::from_static(b"agent=git/2.32.0\n"),
                command: Bytes::from_static(b"agent=git/2.32.0"),
                metadata: vec![],
            })
        );
diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs
index e06d861..06c3a1d 100644
--- a/chartered-git/src/git/mod.rs
+++ a/chartered-git/src/git/mod.rs
@@ -1,10 +1,10 @@
pub mod codec;
pub mod packfile;

use bytes::{BufMut, BytesMut};
use std::fmt::Write;

use self::packfile::PackFile;
use self::packfile::low_level::PackFile;

pub enum PktLine<'a> {
    Data(&'a [u8]),
diff --git a/chartered-git/src/git/packfile.rs b/chartered-git/src/git/packfile.rs
deleted file mode 100644
index 464a9d0..0000000 100644
--- a/chartered-git/src/git/packfile.rs
+++ /dev/null
@@ -1,326 +1,0 @@
use bytes::{BufMut, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::{
    digest::{generic_array::GenericArray, FixedOutputDirty},
    Digest, Sha1,
};
use std::{convert::TryInto, fmt::Write, io::Write as IoWrite};

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: Vec<PackFileEntry<'a>>,
}

impl<'a> PackFile<'a> {
    #[must_use]
    pub fn new(entries: Vec<PackFileEntry<'a>>) -> Self {
        Self { entries }
    }

    #[must_use]
    pub const fn header_size() -> usize {
        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    }

    #[must_use]
    pub const fn footer_size() -> usize {
        20
    }

    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut buf = original_buf.split_off(original_buf.len());
        buf.reserve(Self::header_size() + Self::footer_size());

        // header
        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile

        // body
        for entry in &self.entries {
            entry.encode_to(&mut buf)?;
        }

        // footer
        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));

        original_buf.unsplit(buf);

        Ok(())
    }
}

#[derive(Debug)]
pub struct Commit<'a> {
    pub tree: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray
    // pub parent: [u8; 20],
    pub author: CommitUserInfo<'a>,
    pub committer: CommitUserInfo<'a>,
    // pub gpgsig: &str,
    pub message: &'a str,
}

impl Commit<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex)?;

        out.write_str("tree ")?;
        out.extend_from_slice(&tree_hex);
        out.write_char('\n')?;

        writeln!(out, "author {}", self.author.encode())?;
        writeln!(out, "committer {}", self.committer.encode())?;
        write!(out, "\n{}", self.message)?;

        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let mut len = 0;
        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
        len += "author ".len() + self.author.size() + "\n".len();
        len += "committer ".len() + self.committer.size() + "\n".len();
        len += "\n".len() + self.message.len();
        len
    }
}

#[derive(Copy, Clone, Debug)]
pub struct CommitUserInfo<'a> {
    pub name: &'a str,
    pub email: &'a str,
    pub time: chrono::DateTime<chrono::Utc>,
}

impl CommitUserInfo<'_> {
    fn encode(&self) -> String {
        // TODO: remove `format!`, `format_args!`?
        format!(
            "{} <{}> {} +0000",
            self.name,
            self.email,
            self.time.timestamp()
        )
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len();

        self.name.len()
            + "< ".len()
            + self.email.len()
            + "> ".len()
            + timestamp_len
            + " +0000".len()
    }
}

#[derive(Debug)]
pub enum TreeItemKind {
    File,
    Directory,
}

impl TreeItemKind {
    #[must_use]
    pub const fn mode(&self) -> &'static str {
        match self {
            Self::File => "100644",
            Self::Directory => "40000",
        }
    }
}

#[derive(Debug)]
pub struct TreeItem<'a> {
    pub kind: TreeItemKind,
    pub name: &'a str,
    pub hash: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, // [u8; 20] - but we have to deal with GenericArrays
}

// `[mode] [name]\0[hash]`
impl TreeItem<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
        out.extend_from_slice(&self.hash);
        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
    }
}

#[derive(Debug)]
pub enum PackFileEntry<'a> {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
    // author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // gpgsig -----BEGIN PGP SIGNATURE-----
    //
    // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
    // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
    // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
    // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
    // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
    // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
    // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
    // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
    // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
    // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
    // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
    // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
    // =fXoH
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit(Commit<'a>),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree(Vec<TreeItem<'a>>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob(&'a [u8]),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

impl PackFileEntry<'_> {
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size();

        // write header
        {
            let mut val = 0b1000_0000_u8;

            val |= match self {
                Self::Commit(_) => 0b001,
                Self::Tree(_) => 0b010,
                Self::Blob(_) => 0b011,
                // Self::Tag => 0b100,
                // Self::OfsDelta => 0b110,
                // Self::RefDelta => 0b111,
            } << 4;

            // pack the 4 LSBs of the size into the header
            #[allow(clippy::cast_possible_truncation)] // value is masked
            {
                val |= (size & 0b1111) as u8;
            }
            size >>= 4;

            buf.put_u8(val);
        }

        // write size bytes
        while size != 0 {
            // read 7 LSBs from the `size` and push them off for the next iteration
            #[allow(clippy::cast_possible_truncation)] // value is masked
            let mut val = (size & 0b111_1111) as u8;
            size >>= 7;

            if size != 0 {
                // MSB set to 1 implies there's more size bytes to come, otherwise
                // the data starts after this byte
                val |= 1 << 7;
            }

            buf.put_u8(val);
        }
    }

    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
        self.write_header(original_out); // TODO: this needs space reserving for it

        // todo is there a way to stream through the zlibencoder so we don't have to
        // have this intermediate bytesmut and vec?
        let mut out = BytesMut::new();

        let size = self.uncompressed_size();
        original_out.reserve(size);
        // the data ends up getting compressed but we'll need at least this many bytes
        out.reserve(size);

        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(data) => {
                out.extend_from_slice(data);
            }
        }

        debug_assert_eq!(out.len(), size);

        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(&out)?;
        let compressed_data = e.finish()?;

        original_out.extend_from_slice(&compressed_data);

        Ok(())
    }

    #[must_use]
    pub fn uncompressed_size(&self) -> usize {
        match self {
            Self::Commit(commit) => commit.size(),
            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
            Self::Blob(data) => data.len(),
        }
    }

    // wen const generics for RustCrypto? :-(
    pub fn hash(
        &self,
    ) -> Result<GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, anyhow::Error> {
        let size = self.uncompressed_size();

        let file_prefix = match self {
            Self::Commit(_) => "commit",
            Self::Tree(_) => "tree",
            Self::Blob(_) => "blob",
        };

        let size_len = itoa::Buffer::new().format(size).len();

        let mut out =
            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);

        write!(out, "{} {}\0", file_prefix, size)?;
        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(blob) => {
                out.extend_from_slice(blob);
            }
        }

        Ok(sha1::Sha1::digest(&out))
    }
}
diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs
new file mode 100644
index 0000000..b2af3e2 100644
--- /dev/null
+++ a/chartered-git/src/git/packfile/high_level.rs
@@ -1,0 +1,122 @@
use indexmap::IndexMap;

use super::low_level::{
    Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind,
};

#[derive(Default, Debug)]
pub struct Directory(IndexMap<String, Box<TreeItem>>);

impl Directory {
    fn into_packfile_entries<'a>(
        &'a self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>,
    ) -> HashOutput {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in &self.0 {
            tree.push(match item.as_ref() {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    name: &name,
                    hash: *hash,
                },
                TreeItem::Directory(dir) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
                    name: &name,
                    hash: dir.into_packfile_entries(pack_file),
                },
            })
        }

        let tree = PackFileEntry::Tree(tree);
        let hash = tree.hash().unwrap();
        pack_file.insert(hash, tree);

        hash
    }
}

#[derive(Debug)]
pub enum TreeItem {
    Blob(HashOutput),
    Directory(Directory),
}

#[derive(Default, Debug)]
pub struct GitRepository<'a> {
    file_entries: IndexMap<HashOutput, PackFileEntry<'a>>,
    tree: Directory,
}

impl<'a> GitRepository<'a> {
    pub fn insert(&mut self, path: Vec<String>, file: String, content: &'a [u8]) {
        let mut directory = &mut self.tree;

        for part in path {
            let tree_item = directory
                .0
                .entry(part)
                .or_insert_with(|| Box::new(TreeItem::Directory(Directory::default())));

            if let TreeItem::Directory(d) = tree_item.as_mut() {
                directory = d;
            } else {
                panic!("one of the path items was a blob");
            }
        }

        let entry = PackFileEntry::Blob(content);

        // todo: handle overwriting error
        let file_hash = entry.hash().unwrap();
        directory
            .0
            .insert(file, Box::new(TreeItem::Blob(file_hash)));
        self.file_entries.insert(file_hash, entry);
    }

    pub fn commit(
        &'a mut self,
        name: &'static str,
        email: &'static str,
        message: &'static str,
    ) -> (HashOutput, Vec<PackFileEntry<'a>>) {
        let tree_hash = self.tree.into_packfile_entries(&mut self.file_entries);

        let commit_user = CommitUserInfo {
            name,
            email,
            time: chrono::Utc::now(),
        };

        let commit = PackFileEntry::Commit(Commit {
            tree: tree_hash,
            author: commit_user,
            committer: commit_user,
            message,
        });

        let commit_hash = commit.hash().unwrap();
        self.file_entries.insert(commit_hash, commit);

        // TODO: make PackFileEntry copy and remove this clone
        (commit_hash, self.file_entries.values().cloned().collect())
    }
}

#[cfg(test)]
mod test {
    #[test]
    fn test() {
        let mut x = super::GitRepository::default();
        // x.insert(vec![], "a".to_string(), "nerd".as_ref());
        x.insert(
            vec!["a".to_string(), "b".to_string()],
            "c".to_string(),
            "nerd".as_ref(),
        );
        x.insert(vec![], "b".to_string(), "nerd".as_ref());
        panic!("{:#?}", x);
    }
}
diff --git a/chartered-git/src/git/packfile/low_level.rs b/chartered-git/src/git/packfile/low_level.rs
new file mode 100644
index 0000000..2440925 100644
--- /dev/null
+++ a/chartered-git/src/git/packfile/low_level.rs
@@ -1,0 +1,326 @@
use bytes::{BufMut, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::{
    digest::{generic_array::GenericArray, FixedOutputDirty},
    Digest, Sha1,
};
use std::{convert::TryInto, fmt::Write, io::Write as IoWrite};

pub type HashOutput = GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>; // [u8; 20], but sha-1 returns a GenericArray

// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: Vec<PackFileEntry<'a>>,
}

impl<'a> PackFile<'a> {
    #[must_use]
    pub fn new(entries: Vec<PackFileEntry<'a>>) -> Self {
        Self { entries }
    }

    #[must_use]
    pub const fn header_size() -> usize {
        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
    }

    #[must_use]
    pub const fn footer_size() -> usize {
        20
    }

    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut buf = original_buf.split_off(original_buf.len());
        buf.reserve(Self::header_size() + Self::footer_size());

        // header
        buf.extend_from_slice(b"PACK"); // magic header
        buf.put_u32(2); // version
        buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile

        // body
        for entry in &self.entries {
            entry.encode_to(&mut buf)?;
        }

        // footer
        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));

        original_buf.unsplit(buf);

        Ok(())
    }
}

#[derive(Debug, Clone, Copy)]
pub struct Commit<'a> {
    pub tree: HashOutput,
    // pub parent: [u8; 20],
    pub author: CommitUserInfo<'a>,
    pub committer: CommitUserInfo<'a>,
    // pub gpgsig: &str,
    pub message: &'a str,
}

impl Commit<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex)?;

        out.write_str("tree ")?;
        out.extend_from_slice(&tree_hex);
        out.write_char('\n')?;

        writeln!(out, "author {}", self.author.encode())?;
        writeln!(out, "committer {}", self.committer.encode())?;
        write!(out, "\n{}", self.message)?;

        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let mut len = 0;
        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
        len += "author ".len() + self.author.size() + "\n".len();
        len += "committer ".len() + self.committer.size() + "\n".len();
        len += "\n".len() + self.message.len();
        len
    }
}

#[derive(Copy, Clone, Debug)]
pub struct CommitUserInfo<'a> {
    pub name: &'a str,
    pub email: &'a str,
    pub time: chrono::DateTime<chrono::Utc>,
}

impl CommitUserInfo<'_> {
    fn encode(&self) -> String {
        // TODO: remove `format!`, `format_args!`?
        format!(
            "{} <{}> {} +0000",
            self.name,
            self.email,
            self.time.timestamp()
        )
    }

    #[must_use]
    pub fn size(&self) -> usize {
        let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len();

        self.name.len()
            + "< ".len()
            + self.email.len()
            + "> ".len()
            + timestamp_len
            + " +0000".len()
    }
}

#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
    File,
    Directory,
}

impl TreeItemKind {
    #[must_use]
    pub const fn mode(&self) -> &'static str {
        match self {
            Self::File => "100644",
            Self::Directory => "40000",
        }
    }
}

#[derive(Debug, Copy, Clone)]
pub struct TreeItem<'a> {
    pub kind: TreeItemKind,
    pub name: &'a str,
    pub hash: HashOutput,
}

// `[mode] [name]\0[hash]`
impl TreeItem<'_> {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
        out.extend_from_slice(&self.hash);
        Ok(())
    }

    #[must_use]
    pub fn size(&self) -> usize {
        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
    }
}

#[derive(Debug, Clone)] // could be copy but Vec<TreeItem<'a>>
pub enum PackFileEntry<'a> {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
    // author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
    // gpgsig -----BEGIN PGP SIGNATURE-----
    //
    // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
    // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
    // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
    // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
    // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
    // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
    // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
    // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
    // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
    // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
    // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
    // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
    // =fXoH
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit(Commit<'a>),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree(Vec<TreeItem<'a>>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob(&'a [u8]),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

impl PackFileEntry<'_> {
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size();

        // write header
        {
            let mut val = 0b1000_0000_u8;

            val |= match self {
                Self::Commit(_) => 0b001,
                Self::Tree(_) => 0b010,
                Self::Blob(_) => 0b011,
                // Self::Tag => 0b100,
                // Self::OfsDelta => 0b110,
                // Self::RefDelta => 0b111,
            } << 4;

            // pack the 4 LSBs of the size into the header
            #[allow(clippy::cast_possible_truncation)] // value is masked
            {
                val |= (size & 0b1111) as u8;
            }
            size >>= 4;

            buf.put_u8(val);
        }

        // write size bytes
        while size != 0 {
            // read 7 LSBs from the `size` and push them off for the next iteration
            #[allow(clippy::cast_possible_truncation)] // value is masked
            let mut val = (size & 0b111_1111) as u8;
            size >>= 7;

            if size != 0 {
                // MSB set to 1 implies there's more size bytes to come, otherwise
                // the data starts after this byte
                val |= 1 << 7;
            }

            buf.put_u8(val);
        }
    }

    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
        self.write_header(original_out); // TODO: this needs space reserving for it

        // todo is there a way to stream through the zlibencoder so we don't have to
        // have this intermediate bytesmut and vec?
        let mut out = BytesMut::new();

        let size = self.uncompressed_size();
        original_out.reserve(size);
        // the data ends up getting compressed but we'll need at least this many bytes
        out.reserve(size);

        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(data) => {
                out.extend_from_slice(data);
            }
        }

        debug_assert_eq!(out.len(), size);

        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
        e.write_all(&out)?;
        let compressed_data = e.finish()?;

        original_out.extend_from_slice(&compressed_data);

        Ok(())
    }

    #[must_use]
    pub fn uncompressed_size(&self) -> usize {
        match self {
            Self::Commit(commit) => commit.size(),
            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
            Self::Blob(data) => data.len(),
        }
    }

    // wen const generics for RustCrypto? :-(
    pub fn hash(&self) -> Result<HashOutput, anyhow::Error> {
        let size = self.uncompressed_size();

        let file_prefix = match self {
            Self::Commit(_) => "commit",
            Self::Tree(_) => "tree",
            Self::Blob(_) => "blob",
        };

        let size_len = itoa::Buffer::new().format(size).len();

        let mut out =
            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);

        write!(out, "{} {}\0", file_prefix, size)?;
        match self {
            Self::Commit(commit) => {
                commit.encode_to(&mut out)?;
            }
            Self::Tree(items) => {
                for item in items {
                    item.encode_to(&mut out)?;
                }
            }
            Self::Blob(blob) => {
                out.extend_from_slice(blob);
            }
        }

        Ok(sha1::Sha1::digest(&out))
    }
}
diff --git a/chartered-git/src/git/packfile/mod.rs b/chartered-git/src/git/packfile/mod.rs
new file mode 100644
index 0000000..a70e0a8 100644
--- /dev/null
+++ a/chartered-git/src/git/packfile/mod.rs
@@ -1,0 +1,2 @@
pub mod high_level;
pub mod low_level;