//! A high-level interface for building packfiles. Wraps the `low_level` module //! making a much easier interface for writing files and generating the root //! commit. //! //! The output packfile will only have a single commit in it, which is fine //! for our purposes because `cargo` will `git pull --force` from our Git //! server, allowing us to ignore any history the client may have. use bytes::Bytes; use indexmap::IndexMap; use crate::{ low_level::{ Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind, }, util::ArcOrCowStr, Error, }; /// The main way of interacting with the high level Packfile builder /// /// Builds a whole packfile containing files, directories and commits - essentially /// building out a full Git repository in memory. #[derive(Default, Debug)] pub struct GitRepository { /// A map containing all the blobs and their corresponding hashes so they're /// not inserted more than once for any files in the whole tree with the same /// content. packfile_entries: IndexMap, /// An in-progress `Tree` currently being built out, the tree refers to items /// in `file_entries` by hash. tree: Tree, } impl GitRepository { /// Inserts a file into the repository, writing a file to the path /// `path/to/my-file` would require a `path` of `["path", "to"]` /// and a `file` of `"my-file"`. #[cfg_attr( feature = "tracing", tracing::instrument(skip(self, file, content), err) )] pub fn insert( &mut self, path: &[&'static str], file: impl Into, content: Bytes, ) -> Result<(), Error> { // we'll initialise the directory to the root of the tree, this means // if a path isn't specified we'll just write it to the root directory let mut directory = &mut self.tree; // loops through the parts in the path, recursing through the `directory` // `Tree` until we get to our target directory, creating any missing // directories along the way. for part in path { let tree_item = directory .0 .entry((*part).into()) .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default()))); if let TreeItem::Tree(d) = tree_item.as_mut() { directory = d; } else { return Err(Error::NotDirectory(part)); } } // wrap the file in a Blob so it's ready for writing into the packfile, and also // allows us to grab the hash of the file for use in the tree let entry = PackFileEntry::Blob(content); let file_hash = entry.hash()?; // todo: what should we do on overwrite? directory .0 .insert(file.into(), Box::new(TreeItem::Blob(file_hash))); self.packfile_entries.insert(file_hash, entry); Ok(()) } /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`, /// all the files currently in the `tree`, returning all the packfile entries /// and also the commit hash so it can be referred to by `ls-ref`s. #[cfg_attr( feature = "tracing", tracing::instrument(skip(self, name, email, message), err) )] pub fn commit( mut self, name: &'static str, email: &'static str, message: &'static str, ) -> Result<(HashOutput, Vec), Error> { // gets the hash of the entire tree from the root let tree_hash = self .tree .into_packfile_entries(&mut self.packfile_entries)?; // build the commit using the given inputs let commit_user = CommitUserInfo { name, email, time: time::OffsetDateTime::UNIX_EPOCH, }; let commit = PackFileEntry::Commit(Commit { tree: tree_hash, author: commit_user, committer: commit_user, message, }); // write the commit out to the packfile_entries let commit_hash = commit.hash()?; self.packfile_entries.insert(commit_hash, commit); Ok(( commit_hash, self.packfile_entries.into_iter().map(|(_, v)| v).collect(), )) } } /// An in-progress tree builder, containing file hashes along with their names or nested trees #[derive(Default, Debug)] struct Tree(IndexMap>); impl Tree { /// Recursively writes the the whole tree out to the given `pack_file`, /// the tree contains pointers to (hashes of) files contained within a /// directory, and pointers to other directories. #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, pack_file), err))] fn into_packfile_entries( self, pack_file: &mut IndexMap, ) -> Result { let mut tree = Vec::with_capacity(self.0.len()); for (name, item) in self.0 { tree.push(match *item { TreeItem::Blob(hash) => LowLevelTreeItem { kind: TreeItemKind::File, sort_name: name.to_string(), name, hash, }, TreeItem::Tree(tree) => LowLevelTreeItem { kind: TreeItemKind::Directory, sort_name: format!("{name}/"), name, // we're essentially working through our tree from the bottom up, // so we can grab the hash of each directory along the way and // reference it from the parent directory hash: tree.into_packfile_entries(pack_file)?, }, }); } // we need to sort our tree alphabetically, otherwise Git will silently // stop parsing the rest of the tree once it comes across a non-sorted // tree entry. tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name)); // gets the hash of the tree we've just worked on, and // pushes it to the packfile let tree = PackFileEntry::Tree(tree); let hash = tree.hash()?; pack_file.insert(hash, tree); Ok(hash) } } /// An item within a `Tree`, this could be a file blob or another directory. #[derive(Debug)] enum TreeItem { /// Refers to a file by hash Blob(HashOutput), /// Refers to a nested directory Tree(Tree), } #[cfg(test)] mod test { use crate::{high_level::GitRepository, low_level::PackFile}; use bytes::{Bytes, BytesMut}; #[test] fn deterministic() { let mut repo = GitRepository::default(); repo.insert(&["a", "b"], "c.txt", Bytes::from("hello world!")) .unwrap(); repo.insert(&["c", "d"], "c.txt", Bytes::from("test")) .unwrap(); let (hash, packfile) = repo .commit("me", "me@example.com", "initial commit") .unwrap(); assert_eq!( hex::encode(hash), "6ba08bda5731edfb2a0a00e602d1dd4bbd9d341c" ); insta::assert_debug_snapshot!(packfile); } #[test] fn git_verify_pack() { let mut repo = GitRepository::default(); repo.insert(&[], "c.txt", Bytes::from(vec![0; 256])) .unwrap(); repo.insert(&["e", "f"], "c.txt", Bytes::from("hiya")) .unwrap(); repo.insert(&["c", "d"], "c.txt", Bytes::from("hello world!")) .unwrap(); let (_hash, packfile) = repo .commit("me", "me@example.com", "initial commit") .unwrap(); let mut output = BytesMut::new(); PackFile::new(&packfile).encode_to(&mut output).unwrap(); let stdout = crate::test::verify_pack_file(output.freeze()); insta::with_settings!({filters => vec![ (r"/(.*)/example.pack", "/path/to/example.pack") ]}, { insta::assert_snapshot!(stdout); }); } }