author | Jordan Doyle <jordan@doyle.la> | 2021-10-21 23:20:00.0 +01:00:00 |
---|---|---|
committer | Jordan Doyle <jordan@doyle.la> | 2021-10-21 23:28:05.0 +01:00:00 |
commit | f14b529055d2ae5757331495e367f0125b5db2db [patch] |
|
tree | 62d1f33b64587b13c28b9a72254637c56006364b |
|
parent | f559c415698411451ecb5ade58a67bf298b7eb6a |
|
download | f14b529055d2ae5757331495e367f0125b5db2db.tar.gz |
Document chartered-git
Diff
chartered-git/src/generators.rs | 1 + chartered-git/src/main.rs | 26 ++++++++++++++++++++++++++ chartered-git/src/tree.rs | 14 +++++++++++++- chartered-git/src/git/mod.rs | 1 + chartered-git/src/git/packfile/high_level.rs | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- 5 files changed, 142 insertions(+), 51 deletions(-) diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs index cd7b615..0574ae0 100644 --- a/chartered-git/src/generators.rs +++ a/chartered-git/src/generators.rs @@ -1,5 +1,6 @@ use serde::Serialize; /// The `config.json` file to write to the repository. #[derive(Serialize, Debug, Clone)] pub struct CargoConfig { pub dl: String, diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index d7d4675..3c98252 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -201,6 +201,12 @@ }.instrument(tracing::info_span!(parent: span, "shell request"))) } /// Initially when setting up the SSH connection, the remote Git client will send us an /// exec request (instead of the usual shell request that is sent when invoking `ssh`). /// /// The client will set `git-upload-pack` as the requested executable to run and also /// sends the path that was appended to the end of the connection string defined in /// cargo. fn exec_request( mut self, channel: ChannelId, @@ -213,21 +219,29 @@ Ok(data) => data, Err(e) => return Box::pin(futures::future::err(e.into())), }; // parses the given args in the same fashion as a POSIX shell let args = shlex::split(data); Box::pin(async move { debug!("exec {:?}", args); // if the client didn't send `GIT_PROTOCOL=version=2` as an environment // variable when connecting, we'll just close the connection if !self.is_git_protocol_v2 { anyhow::bail!("not git protocol v2"); } let mut args = args.into_iter().flat_map(Vec::into_iter); // check the executable requested to be ran is the `git-upload-pack` we // expect. we're not actually going to execute this, but we'll pretend // to be it instead in `data`. if args.next().as_deref() != Some("git-upload-pack") { anyhow::bail!("not git-upload-pack"); } // parse the requested organisation from the given path (the argument // given to `git-upload-pack`) if let Some(org) = args.next().filter(|v| v.as_str() != "/") { let org = org .trim_start_matches('/') @@ -243,6 +257,7 @@ session.close(channel); } // preamble, sending our capabilities and what have you self.write(PktLine::Data(b"version 2\n"))?; self.write(PktLine::Data(b"agent=chartered/0.1.0\n"))?; // TODO: clap::crate_name!()/clap::crate_version!() self.write(PktLine::Data(b"ls-refs=unborn\n"))?; @@ -265,6 +280,9 @@ Box::pin(futures::future::ready(Ok((self, session)))) } /// User is attempting to connect via pubkey, we'll lookup the key in the /// user database and create a new session if it exists, otherwise we'll /// reject the authentication attempt. fn auth_publickey(mut self, _username: &str, key: &key::PublicKey) -> Self::FutureAuth { let span = self.span.clone(); let public_key = key.public_key_bytes(); @@ -339,7 +357,10 @@ let authed = self.authed()?; let org_name = self.org_name()?; // start building the packfile we're going to send to the user let mut packfile = GitRepository::default(); // write the config.json to the root of the repository let config = CargoConfig::new( &Url::parse("http://127.0.0.1:8888/")?, &authed.auth_key, @@ -347,12 +368,17 @@ ); let config = serde_json::to_vec(&config)?; packfile.insert(ArrayVec::<_, 0>::new(), "config.json", &config)?; // build the tree of all the crates the user has access to, then write them // to the in-memory repository. // todo: the whole tree needs caching and then we can filter in code rather than at // the database let tree = Tree::build(self.db.clone(), authed.user.id, org_name.to_string()).await; tree.write_to_packfile(&mut packfile)?; // finalises the git repository, creating a commit and fetching the finalised // packfile and commit hash to return in `ls-refs` calls. let (commit_hash, packfile_entries) = packfile.commit("computer", "john@computer.no", "Update crates")?; diff --git a/chartered-git/src/tree.rs b/chartered-git/src/tree.rs index 10df911..e227c9e 100644 --- a/chartered-git/src/tree.rs +++ a/chartered-git/src/tree.rs @@ -1,5 +1,6 @@ //! Generates the Git folder/file tree that's returned back to the user //! containing the config & crate manifests. //! containing the config & crate manifests. Only contains crates that //! the user has access to. use crate::git::packfile::high_level::GitRepository; use arrayvec::ArrayVec; @@ -19,6 +20,8 @@ } impl Tree { /// Grabs all the crates that the user has access to and writes out the manifests to /// `self.crates`. pub async fn build(db: chartered_db::ConnectionPool, user_id: i32, org_name: String) -> Self { let mut crates = BTreeMap::new(); @@ -26,8 +29,11 @@ .await .unwrap() { // the manifest we'll be returning to the user let mut file = String::new(); // loop over all versions for the crate, serialising each version to json // and writing them to the manifest split by newline. for version in versions { let cksum = version.checksum.clone(); let yanked = version.yanked; @@ -43,12 +49,14 @@ file.push('\n'); } // insert the crate into `self.crates` crates.insert(crate_def.name, file); } Self { crates } } /// Writes all the crate manifests from `self.crates` out to the given `GitRepository`. pub fn write_to_packfile<'a>( &'a self, repo: &mut GitRepository<'a>, @@ -62,6 +70,10 @@ } } /// Crates with a total of 1, 2 or 3 characters in the same are written out to directories named /// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for /// using the frist four characters of the crate name, 2 for the first directory and the other 2 /// for the second. fn get_crate_folder(crate_name: &str) -> ArrayVec<&str, 2> { let mut folders = ArrayVec::new(); diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs index 06c3a1d..888bd7d 100644 --- a/chartered-git/src/git/mod.rs +++ a/chartered-git/src/git/mod.rs @@ -6,6 +6,7 @@ use self::packfile::low_level::PackFile; /// Every packet sent to the client from us should be a `PktLine`. pub enum PktLine<'a> { Data(&'a [u8]), /// Similar to a data packet, but used during packfile sending to indicate this diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs index 62dd4f6..6a49550 100644 --- a/chartered-git/src/git/packfile/high_level.rs +++ a/chartered-git/src/git/packfile/high_level.rs @@ -1,97 +1,93 @@ //! A high-level interface for building packfiles. Wraps the `low_level` module //! making a much easier interface for writing files and generating the root //! commit. //! //! The output packfile will only have a single commit in it, which is fine //! for our purposes because `cargo` will `git pull --force` from our Git //! server, allowing us to ignore any history the client may have. use arrayvec::ArrayVec; use indexmap::IndexMap; use super::low_level::{ Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind, }; #[derive(Default, Debug)] pub struct Directory<'a>(IndexMap<&'a str, Box<TreeItem<'a>>>); impl<'a> Directory<'a> { fn to_packfile_entries( &self, pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>, ) -> Result<HashOutput, anyhow::Error> { let mut tree = Vec::with_capacity(self.0.len()); for (name, item) in &self.0 { tree.push(match item.as_ref() { TreeItem::Blob(hash) => LowLevelTreeItem { kind: TreeItemKind::File, name, hash: *hash, }, TreeItem::Directory(dir) => LowLevelTreeItem { kind: TreeItemKind::Directory, name, hash: dir.to_packfile_entries(pack_file)?, }, }); } let tree = PackFileEntry::Tree(tree); let hash = tree.hash()?; pack_file.insert(hash, tree); Ok(hash) } } #[derive(Debug)] pub enum TreeItem<'a> { Blob(HashOutput), Directory(Directory<'a>), } /// The main way of interacting with the high level Packfile builder /// /// Builds a whole packfile containing files, directories and commits - essentially /// building out a full Git repository in memory. #[derive(Default, Debug)] pub struct GitRepository<'a> { file_entries: IndexMap<HashOutput, PackFileEntry<'a>>, tree: Directory<'a>, /// A map containing all the blobs and their corresponding hashes so they're /// not inserted more than once for any files in the whole tree with the same /// content. packfile_entries: IndexMap<HashOutput, PackFileEntry<'a>>, /// An in-progress `Tree` currently being built out, the tree refers to items /// in `file_entries` by hash. tree: Tree<'a>, } impl<'a> GitRepository<'a> { /// Inserts a file into the repository, writing a file to the path /// `path/to/my-file` would require a `path` of `["path", "to"]` /// and a `file` of `"my-file"`. pub fn insert<const N: usize>( &mut self, path: ArrayVec<&'a str, N>, file: &'a str, content: &'a [u8], ) -> Result<(), anyhow::Error> { // we'll initialise the directory to the root of the tree, this means // if a path isn't specified we'll just write it to the root directory let mut directory = &mut self.tree; // loops through the parts in the path, recursing through the `directory` // `Tree` until we get to our target directory, creating any missing // directories along the way. for part in path { let tree_item = directory .0 .entry(part) .or_insert_with(|| Box::new(TreeItem::Directory(Directory::default()))); .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default()))); if let TreeItem::Directory(d) = tree_item.as_mut() { if let TreeItem::Tree(d) = tree_item.as_mut() { directory = d; } else { anyhow::bail!("one of the path items was a blob"); // TODO: how should we handle this? one of items we tried to // recurse into was a directory. anyhow::bail!("attempted to use a file as a directory"); } } // wrap the file in a Blob so it's ready for writing into the packfile, and also // allows us to grab the hash of the file for use in the tree let entry = PackFileEntry::Blob(content); // todo: handle overwriting error let file_hash = entry.hash()?; // todo: what should we do on overwrite? directory .0 .insert(file, Box::new(TreeItem::Blob(file_hash))); self.file_entries.insert(file_hash, entry); self.packfile_entries.insert(file_hash, entry); Ok(()) } /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`, /// all the files currently in the `tree`, returning all the packfile entries /// and also the commit hash so it can be referred to by `ls-ref`s. pub fn commit( &'a mut self, name: &'static str, email: &'static str, message: &'static str, ) -> Result<(HashOutput, Vec<PackFileEntry<'a>>), anyhow::Error> { let tree_hash = self.tree.to_packfile_entries(&mut self.file_entries)?; // gets the hash of the entire tree from the root let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?; // build the commit using the given inputs let commit_user = CommitUserInfo { name, email, @@ -105,10 +101,65 @@ message, }); // write the commit out to the packfile_entries let commit_hash = commit.hash()?; self.file_entries.insert(commit_hash, commit); self.packfile_entries.insert(commit_hash, commit); // TODO: make PackFileEntry copy and remove this clone Ok((commit_hash, self.file_entries.values().cloned().collect())) Ok(( commit_hash, self.packfile_entries.values().cloned().collect(), )) } } /// An in-progress tree builder, containing file hashes along with their names or nested trees #[derive(Default, Debug)] struct Tree<'a>(IndexMap<&'a str, Box<TreeItem<'a>>>); impl<'a> Tree<'a> { /// Recursively writes the the whole tree out to the given `pack_file`, /// the tree contains pointers to (hashes of) files contained within a /// directory, and pointers to other directories. fn to_packfile_entries( &self, pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>, ) -> Result<HashOutput, anyhow::Error> { let mut tree = Vec::with_capacity(self.0.len()); for (name, item) in &self.0 { tree.push(match item.as_ref() { TreeItem::Blob(hash) => LowLevelTreeItem { kind: TreeItemKind::File, name, hash: *hash, }, TreeItem::Tree(tree) => LowLevelTreeItem { kind: TreeItemKind::Directory, name, // we're essentially working through our tree from the bottom up, // so we can grab the hash of each directory along the way and // reference it from the parent directory hash: tree.to_packfile_entries(pack_file)?, }, }); } // gets the hash of the tree we've just worked on, and // pushes it to the packfile let tree = PackFileEntry::Tree(tree); let hash = tree.hash()?; pack_file.insert(hash, tree); Ok(hash) } } /// An item within a `Tree`, this could be a file blob or another directory. #[derive(Debug)] enum TreeItem<'a> { /// Refers to a file by hash Blob(HashOutput), /// Refers to a nested directory Tree(Tree<'a>), }