From f14b529055d2ae5757331495e367f0125b5db2db Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Thu, 21 Oct 2021 23:20:00 +0100 Subject: [PATCH] Document chartered-git --- chartered-git/src/generators.rs | 1 + chartered-git/src/main.rs | 26 ++++++++++++++++++++++++++ chartered-git/src/tree.rs | 14 +++++++++++++- chartered-git/src/git/mod.rs | 1 + chartered-git/src/git/packfile/high_level.rs | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- 5 files changed, 142 insertions(+), 51 deletions(-) diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs index cd7b615..0574ae0 100644 --- a/chartered-git/src/generators.rs +++ a/chartered-git/src/generators.rs @@ -1,5 +1,6 @@ use serde::Serialize; +/// The `config.json` file to write to the repository. #[derive(Serialize, Debug, Clone)] pub struct CargoConfig { pub dl: String, diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index d7d4675..3c98252 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -201,6 +201,12 @@ }.instrument(tracing::info_span!(parent: span, "shell request"))) } + /// Initially when setting up the SSH connection, the remote Git client will send us an + /// exec request (instead of the usual shell request that is sent when invoking `ssh`). + /// + /// The client will set `git-upload-pack` as the requested executable to run and also + /// sends the path that was appended to the end of the connection string defined in + /// cargo. fn exec_request( mut self, channel: ChannelId, @@ -213,21 +219,29 @@ Ok(data) => data, Err(e) => return Box::pin(futures::future::err(e.into())), }; + // parses the given args in the same fashion as a POSIX shell let args = shlex::split(data); Box::pin(async move { debug!("exec {:?}", args); + // if the client didn't send `GIT_PROTOCOL=version=2` as an environment + // variable when connecting, we'll just close the connection if !self.is_git_protocol_v2 { anyhow::bail!("not git protocol v2"); } let mut args = args.into_iter().flat_map(Vec::into_iter); + // check the executable requested to be ran is the `git-upload-pack` we + // expect. we're not actually going to execute this, but we'll pretend + // to be it instead in `data`. if args.next().as_deref() != Some("git-upload-pack") { anyhow::bail!("not git-upload-pack"); } + // parse the requested organisation from the given path (the argument + // given to `git-upload-pack`) if let Some(org) = args.next().filter(|v| v.as_str() != "/") { let org = org .trim_start_matches('/') @@ -243,6 +257,7 @@ session.close(channel); } + // preamble, sending our capabilities and what have you self.write(PktLine::Data(b"version 2\n"))?; self.write(PktLine::Data(b"agent=chartered/0.1.0\n"))?; // TODO: clap::crate_name!()/clap::crate_version!() self.write(PktLine::Data(b"ls-refs=unborn\n"))?; @@ -265,6 +280,9 @@ Box::pin(futures::future::ready(Ok((self, session)))) } + /// User is attempting to connect via pubkey, we'll lookup the key in the + /// user database and create a new session if it exists, otherwise we'll + /// reject the authentication attempt. fn auth_publickey(mut self, _username: &str, key: &key::PublicKey) -> Self::FutureAuth { let span = self.span.clone(); let public_key = key.public_key_bytes(); @@ -339,7 +357,10 @@ let authed = self.authed()?; let org_name = self.org_name()?; + // start building the packfile we're going to send to the user let mut packfile = GitRepository::default(); + + // write the config.json to the root of the repository let config = CargoConfig::new( &Url::parse("http://127.0.0.1:8888/")?, &authed.auth_key, @@ -347,12 +368,17 @@ ); let config = serde_json::to_vec(&config)?; packfile.insert(ArrayVec::<_, 0>::new(), "config.json", &config)?; + + // build the tree of all the crates the user has access to, then write them + // to the in-memory repository. // todo: the whole tree needs caching and then we can filter in code rather than at // the database let tree = Tree::build(self.db.clone(), authed.user.id, org_name.to_string()).await; tree.write_to_packfile(&mut packfile)?; + // finalises the git repository, creating a commit and fetching the finalised + // packfile and commit hash to return in `ls-refs` calls. let (commit_hash, packfile_entries) = packfile.commit("computer", "john@computer.no", "Update crates")?; diff --git a/chartered-git/src/tree.rs b/chartered-git/src/tree.rs index 10df911..e227c9e 100644 --- a/chartered-git/src/tree.rs +++ a/chartered-git/src/tree.rs @@ -1,5 +1,6 @@ //! Generates the Git folder/file tree that's returned back to the user -//! containing the config & crate manifests. +//! containing the config & crate manifests. Only contains crates that +//! the user has access to. use crate::git::packfile::high_level::GitRepository; use arrayvec::ArrayVec; @@ -19,6 +20,8 @@ } impl Tree { + /// Grabs all the crates that the user has access to and writes out the manifests to + /// `self.crates`. pub async fn build(db: chartered_db::ConnectionPool, user_id: i32, org_name: String) -> Self { let mut crates = BTreeMap::new(); @@ -26,8 +29,11 @@ .await .unwrap() { + // the manifest we'll be returning to the user let mut file = String::new(); + // loop over all versions for the crate, serialising each version to json + // and writing them to the manifest split by newline. for version in versions { let cksum = version.checksum.clone(); let yanked = version.yanked; @@ -43,12 +49,14 @@ file.push('\n'); } + // insert the crate into `self.crates` crates.insert(crate_def.name, file); } Self { crates } } + /// Writes all the crate manifests from `self.crates` out to the given `GitRepository`. pub fn write_to_packfile<'a>( &'a self, repo: &mut GitRepository<'a>, @@ -62,6 +70,10 @@ } } +/// Crates with a total of 1, 2 or 3 characters in the same are written out to directories named +/// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for +/// using the frist four characters of the crate name, 2 for the first directory and the other 2 +/// for the second. fn get_crate_folder(crate_name: &str) -> ArrayVec<&str, 2> { let mut folders = ArrayVec::new(); diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs index 06c3a1d..888bd7d 100644 --- a/chartered-git/src/git/mod.rs +++ a/chartered-git/src/git/mod.rs @@ -6,6 +6,7 @@ use self::packfile::low_level::PackFile; +/// Every packet sent to the client from us should be a `PktLine`. pub enum PktLine<'a> { Data(&'a [u8]), /// Similar to a data packet, but used during packfile sending to indicate this diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs index 62dd4f6..6a49550 100644 --- a/chartered-git/src/git/packfile/high_level.rs +++ a/chartered-git/src/git/packfile/high_level.rs @@ -1,97 +1,93 @@ +//! A high-level interface for building packfiles. Wraps the `low_level` module +//! making a much easier interface for writing files and generating the root +//! commit. +//! +//! The output packfile will only have a single commit in it, which is fine +//! for our purposes because `cargo` will `git pull --force` from our Git +//! server, allowing us to ignore any history the client may have. + use arrayvec::ArrayVec; use indexmap::IndexMap; use super::low_level::{ Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind, }; - -#[derive(Default, Debug)] -pub struct Directory<'a>(IndexMap<&'a str, Box>>); - -impl<'a> Directory<'a> { - fn to_packfile_entries( - &self, - pack_file: &mut IndexMap>, - ) -> Result { - let mut tree = Vec::with_capacity(self.0.len()); - - for (name, item) in &self.0 { - tree.push(match item.as_ref() { - TreeItem::Blob(hash) => LowLevelTreeItem { - kind: TreeItemKind::File, - name, - hash: *hash, - }, - TreeItem::Directory(dir) => LowLevelTreeItem { - kind: TreeItemKind::Directory, - name, - hash: dir.to_packfile_entries(pack_file)?, - }, - }); - } - - let tree = PackFileEntry::Tree(tree); - let hash = tree.hash()?; - pack_file.insert(hash, tree); - - Ok(hash) - } -} - -#[derive(Debug)] -pub enum TreeItem<'a> { - Blob(HashOutput), - Directory(Directory<'a>), -} +/// The main way of interacting with the high level Packfile builder +/// +/// Builds a whole packfile containing files, directories and commits - essentially +/// building out a full Git repository in memory. #[derive(Default, Debug)] pub struct GitRepository<'a> { - file_entries: IndexMap>, - tree: Directory<'a>, + /// A map containing all the blobs and their corresponding hashes so they're + /// not inserted more than once for any files in the whole tree with the same + /// content. + packfile_entries: IndexMap>, + /// An in-progress `Tree` currently being built out, the tree refers to items + /// in `file_entries` by hash. + tree: Tree<'a>, } impl<'a> GitRepository<'a> { + /// Inserts a file into the repository, writing a file to the path + /// `path/to/my-file` would require a `path` of `["path", "to"]` + /// and a `file` of `"my-file"`. pub fn insert( &mut self, path: ArrayVec<&'a str, N>, file: &'a str, content: &'a [u8], ) -> Result<(), anyhow::Error> { + // we'll initialise the directory to the root of the tree, this means + // if a path isn't specified we'll just write it to the root directory let mut directory = &mut self.tree; + // loops through the parts in the path, recursing through the `directory` + // `Tree` until we get to our target directory, creating any missing + // directories along the way. for part in path { let tree_item = directory .0 .entry(part) - .or_insert_with(|| Box::new(TreeItem::Directory(Directory::default()))); + .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default()))); - if let TreeItem::Directory(d) = tree_item.as_mut() { + if let TreeItem::Tree(d) = tree_item.as_mut() { directory = d; } else { - anyhow::bail!("one of the path items was a blob"); + // TODO: how should we handle this? one of items we tried to + // recurse into was a directory. + anyhow::bail!("attempted to use a file as a directory"); } } + // wrap the file in a Blob so it's ready for writing into the packfile, and also + // allows us to grab the hash of the file for use in the tree let entry = PackFileEntry::Blob(content); - - // todo: handle overwriting error let file_hash = entry.hash()?; + + // todo: what should we do on overwrite? directory .0 .insert(file, Box::new(TreeItem::Blob(file_hash))); - self.file_entries.insert(file_hash, entry); + self.packfile_entries.insert(file_hash, entry); + Ok(()) } + /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`, + /// all the files currently in the `tree`, returning all the packfile entries + /// and also the commit hash so it can be referred to by `ls-ref`s. pub fn commit( &'a mut self, name: &'static str, email: &'static str, message: &'static str, ) -> Result<(HashOutput, Vec>), anyhow::Error> { - let tree_hash = self.tree.to_packfile_entries(&mut self.file_entries)?; + // gets the hash of the entire tree from the root + let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?; + // build the commit using the given inputs let commit_user = CommitUserInfo { name, email, @@ -105,10 +101,65 @@ message, }); + // write the commit out to the packfile_entries let commit_hash = commit.hash()?; - self.file_entries.insert(commit_hash, commit); + self.packfile_entries.insert(commit_hash, commit); // TODO: make PackFileEntry copy and remove this clone - Ok((commit_hash, self.file_entries.values().cloned().collect())) + Ok(( + commit_hash, + self.packfile_entries.values().cloned().collect(), + )) + } +} + +/// An in-progress tree builder, containing file hashes along with their names or nested trees +#[derive(Default, Debug)] +struct Tree<'a>(IndexMap<&'a str, Box>>); + +impl<'a> Tree<'a> { + /// Recursively writes the the whole tree out to the given `pack_file`, + /// the tree contains pointers to (hashes of) files contained within a + /// directory, and pointers to other directories. + fn to_packfile_entries( + &self, + pack_file: &mut IndexMap>, + ) -> Result { + let mut tree = Vec::with_capacity(self.0.len()); + + for (name, item) in &self.0 { + tree.push(match item.as_ref() { + TreeItem::Blob(hash) => LowLevelTreeItem { + kind: TreeItemKind::File, + name, + hash: *hash, + }, + TreeItem::Tree(tree) => LowLevelTreeItem { + kind: TreeItemKind::Directory, + name, + // we're essentially working through our tree from the bottom up, + // so we can grab the hash of each directory along the way and + // reference it from the parent directory + hash: tree.to_packfile_entries(pack_file)?, + }, + }); + } + + // gets the hash of the tree we've just worked on, and + // pushes it to the packfile + let tree = PackFileEntry::Tree(tree); + let hash = tree.hash()?; + pack_file.insert(hash, tree); + + Ok(hash) } +} + +/// An item within a `Tree`, this could be a file blob or another directory. +#[derive(Debug)] +enum TreeItem<'a> { + /// Refers to a file by hash + Blob(HashOutput), + /// Refers to a nested directory + Tree(Tree<'a>), } -- rgit 0.1.3