🏡 index : ~doyle/chartered.git

author Jordan Doyle <jordan@doyle.la> 2021-10-21 23:20:00.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-10-21 23:28:05.0 +01:00:00
commit
f14b529055d2ae5757331495e367f0125b5db2db [patch]
tree
62d1f33b64587b13c28b9a72254637c56006364b
parent
f559c415698411451ecb5ade58a67bf298b7eb6a
download
f14b529055d2ae5757331495e367f0125b5db2db.tar.gz

Document chartered-git



Diff

 chartered-git/src/generators.rs              |   1 +
 chartered-git/src/main.rs                    |  26 ++++++++++++++++++++++++++
 chartered-git/src/tree.rs                    |  14 +++++++++++++-
 chartered-git/src/git/mod.rs                 |   1 +
 chartered-git/src/git/packfile/high_level.rs | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
 5 files changed, 142 insertions(+), 51 deletions(-)

diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs
index cd7b615..0574ae0 100644
--- a/chartered-git/src/generators.rs
+++ a/chartered-git/src/generators.rs
@@ -1,5 +1,6 @@
use serde::Serialize;

/// The `config.json` file to write to the repository.

#[derive(Serialize, Debug, Clone)]
pub struct CargoConfig {
    pub dl: String,
diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs
index d7d4675..3c98252 100644
--- a/chartered-git/src/main.rs
+++ a/chartered-git/src/main.rs
@@ -201,6 +201,12 @@
        }.instrument(tracing::info_span!(parent: span, "shell request")))
    }

    /// Initially when setting up the SSH connection, the remote Git client will send us an

    /// exec request (instead of the usual shell request that is sent when invoking `ssh`).

    ///

    /// The client will set `git-upload-pack` as the requested executable to run and also

    /// sends the path that was appended to the end of the connection string defined in

    /// cargo.

    fn exec_request(
        mut self,
        channel: ChannelId,
@@ -213,21 +219,29 @@
            Ok(data) => data,
            Err(e) => return Box::pin(futures::future::err(e.into())),
        };
        // parses the given args in the same fashion as a POSIX shell
        let args = shlex::split(data);

        Box::pin(async move {
            debug!("exec {:?}", args);

            // if the client didn't send `GIT_PROTOCOL=version=2` as an environment
            // variable when connecting, we'll just close the connection
            if !self.is_git_protocol_v2 {
                anyhow::bail!("not git protocol v2");
            }

            let mut args = args.into_iter().flat_map(Vec::into_iter);

            // check the executable requested to be ran is the `git-upload-pack` we
            // expect. we're not actually going to execute this, but we'll pretend
            // to be it instead in `data`.
            if args.next().as_deref() != Some("git-upload-pack") {
                anyhow::bail!("not git-upload-pack");
            }

            // parse the requested organisation from the given path (the argument
            // given to `git-upload-pack`)
            if let Some(org) = args.next().filter(|v| v.as_str() != "/") {
                let org = org
                    .trim_start_matches('/')
@@ -243,6 +257,7 @@
                session.close(channel);
            }

            // preamble, sending our capabilities and what have you
            self.write(PktLine::Data(b"version 2\n"))?;
            self.write(PktLine::Data(b"agent=chartered/0.1.0\n"))?; // TODO: clap::crate_name!()/clap::crate_version!()
            self.write(PktLine::Data(b"ls-refs=unborn\n"))?;
@@ -265,6 +280,9 @@
        Box::pin(futures::future::ready(Ok((self, session))))
    }

    /// User is attempting to connect via pubkey, we'll lookup the key in the

    /// user database and create a new session if it exists, otherwise we'll

    /// reject the authentication attempt.

    fn auth_publickey(mut self, _username: &str, key: &key::PublicKey) -> Self::FutureAuth {
        let span = self.span.clone();
        let public_key = key.public_key_bytes();
@@ -339,7 +357,10 @@
                    let authed = self.authed()?;
                    let org_name = self.org_name()?;

                    // start building the packfile we're going to send to the user
                    let mut packfile = GitRepository::default();

                    // write the config.json to the root of the repository
                    let config = CargoConfig::new(
                        &Url::parse("http://127.0.0.1:8888/")?,
                        &authed.auth_key,
@@ -347,12 +368,17 @@
                    );
                    let config = serde_json::to_vec(&config)?;
                    packfile.insert(ArrayVec::<_, 0>::new(), "config.json", &config)?;

                    // build the tree of all the crates the user has access to, then write them
                    // to the in-memory repository.
                    // todo: the whole tree needs caching and then we can filter in code rather than at
                    //  the database
                    let tree =
                        Tree::build(self.db.clone(), authed.user.id, org_name.to_string()).await;
                    tree.write_to_packfile(&mut packfile)?;

                    // finalises the git repository, creating a commit and fetching the finalised
                    // packfile and commit hash to return in `ls-refs` calls.
                    let (commit_hash, packfile_entries) =
                        packfile.commit("computer", "john@computer.no", "Update crates")?;

diff --git a/chartered-git/src/tree.rs b/chartered-git/src/tree.rs
index 10df911..e227c9e 100644
--- a/chartered-git/src/tree.rs
+++ a/chartered-git/src/tree.rs
@@ -1,5 +1,6 @@
//! Generates the Git folder/file tree that's returned back to the user

//! containing the config & crate manifests.

//! containing the config & crate manifests. Only contains crates that

//! the user has access to.


use crate::git::packfile::high_level::GitRepository;
use arrayvec::ArrayVec;
@@ -19,6 +20,8 @@
}

impl Tree {
    /// Grabs all the crates that the user has access to and writes out the manifests to

    /// `self.crates`.

    pub async fn build(db: chartered_db::ConnectionPool, user_id: i32, org_name: String) -> Self {
        let mut crates = BTreeMap::new();

@@ -26,8 +29,11 @@
            .await
            .unwrap()
        {
            // the manifest we'll be returning to the user
            let mut file = String::new();

            // loop over all versions for the crate, serialising each version to json
            // and writing them to the manifest split by newline.
            for version in versions {
                let cksum = version.checksum.clone();
                let yanked = version.yanked;
@@ -43,12 +49,14 @@
                file.push('\n');
            }

            // insert the crate into `self.crates`
            crates.insert(crate_def.name, file);
        }

        Self { crates }
    }

    /// Writes all the crate manifests from `self.crates` out to the given `GitRepository`.

    pub fn write_to_packfile<'a>(
        &'a self,
        repo: &mut GitRepository<'a>,
@@ -62,6 +70,10 @@
    }
}

/// Crates with a total of 1, 2 or 3 characters in the same are written out to directories named

/// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for

/// using the frist four characters of the crate name, 2 for the first directory and the other 2

/// for the second.

fn get_crate_folder(crate_name: &str) -> ArrayVec<&str, 2> {
    let mut folders = ArrayVec::new();

diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs
index 06c3a1d..888bd7d 100644
--- a/chartered-git/src/git/mod.rs
+++ a/chartered-git/src/git/mod.rs
@@ -6,6 +6,7 @@

use self::packfile::low_level::PackFile;

/// Every packet sent to the client from us should be a `PktLine`.

pub enum PktLine<'a> {
    Data(&'a [u8]),
    /// Similar to a data packet, but used during packfile sending to indicate this

diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs
index 62dd4f6..6a49550 100644
--- a/chartered-git/src/git/packfile/high_level.rs
+++ a/chartered-git/src/git/packfile/high_level.rs
@@ -1,97 +1,93 @@
//! A high-level interface for building packfiles. Wraps the `low_level` module

//! making a much easier interface for writing files and generating the root

//! commit.

//!

//! The output packfile will only have a single commit in it, which is fine

//! for our purposes because `cargo` will `git pull --force` from our Git

//! server, allowing us to ignore any history the client may have.


use arrayvec::ArrayVec;
use indexmap::IndexMap;

use super::low_level::{
    Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind,
};

#[derive(Default, Debug)]
pub struct Directory<'a>(IndexMap<&'a str, Box<TreeItem<'a>>>);

impl<'a> Directory<'a> {
    fn to_packfile_entries(
        &self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>,
    ) -> Result<HashOutput, anyhow::Error> {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in &self.0 {
            tree.push(match item.as_ref() {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    name,
                    hash: *hash,
                },
                TreeItem::Directory(dir) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
                    name,
                    hash: dir.to_packfile_entries(pack_file)?,
                },
            });
        }

        let tree = PackFileEntry::Tree(tree);
        let hash = tree.hash()?;
        pack_file.insert(hash, tree);

        Ok(hash)
    }
}

#[derive(Debug)]
pub enum TreeItem<'a> {
    Blob(HashOutput),
    Directory(Directory<'a>),
}

/// The main way of interacting with the high level Packfile builder

///

/// Builds a whole packfile containing files, directories and commits - essentially

/// building out a full Git repository in memory.

#[derive(Default, Debug)]
pub struct GitRepository<'a> {
    file_entries: IndexMap<HashOutput, PackFileEntry<'a>>,
    tree: Directory<'a>,
    /// A map containing all the blobs and their corresponding hashes so they're

    /// not inserted more than once for any files in the whole tree with the same

    /// content.

    packfile_entries: IndexMap<HashOutput, PackFileEntry<'a>>,
    /// An in-progress `Tree` currently being built out, the tree refers to items

    /// in `file_entries` by hash.

    tree: Tree<'a>,
}

impl<'a> GitRepository<'a> {
    /// Inserts a file into the repository, writing a file to the path

    /// `path/to/my-file` would require a `path` of `["path", "to"]`

    /// and a `file` of `"my-file"`.

    pub fn insert<const N: usize>(
        &mut self,
        path: ArrayVec<&'a str, N>,
        file: &'a str,
        content: &'a [u8],
    ) -> Result<(), anyhow::Error> {
        // we'll initialise the directory to the root of the tree, this means
        // if a path isn't specified we'll just write it to the root directory
        let mut directory = &mut self.tree;

        // loops through the parts in the path, recursing through the `directory`
        // `Tree` until we get to our target directory, creating any missing
        // directories along the way.
        for part in path {
            let tree_item = directory
                .0
                .entry(part)
                .or_insert_with(|| Box::new(TreeItem::Directory(Directory::default())));
                .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default())));

            if let TreeItem::Directory(d) = tree_item.as_mut() {
            if let TreeItem::Tree(d) = tree_item.as_mut() {
                directory = d;
            } else {
                anyhow::bail!("one of the path items was a blob");
                // TODO: how should we handle this? one of items we tried to
                //  recurse into was a directory.
                anyhow::bail!("attempted to use a file as a directory");
            }
        }

        // wrap the file in a Blob so it's ready for writing into the packfile, and also
        // allows us to grab the hash of the file for use in the tree
        let entry = PackFileEntry::Blob(content);

        // todo: handle overwriting error
        let file_hash = entry.hash()?;

        // todo: what should we do on overwrite?
        directory
            .0
            .insert(file, Box::new(TreeItem::Blob(file_hash)));
        self.file_entries.insert(file_hash, entry);

        self.packfile_entries.insert(file_hash, entry);

        Ok(())
    }

    /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`,

    /// all the files currently in the `tree`, returning all the packfile entries

    /// and also the commit hash so it can be referred to by `ls-ref`s.

    pub fn commit(
        &'a mut self,
        name: &'static str,
        email: &'static str,
        message: &'static str,
    ) -> Result<(HashOutput, Vec<PackFileEntry<'a>>), anyhow::Error> {
        let tree_hash = self.tree.to_packfile_entries(&mut self.file_entries)?;
        // gets the hash of the entire tree from the root
        let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?;

        // build the commit using the given inputs
        let commit_user = CommitUserInfo {
            name,
            email,
@@ -105,10 +101,65 @@
            message,
        });

        // write the commit out to the packfile_entries
        let commit_hash = commit.hash()?;
        self.file_entries.insert(commit_hash, commit);
        self.packfile_entries.insert(commit_hash, commit);

        // TODO: make PackFileEntry copy and remove this clone
        Ok((commit_hash, self.file_entries.values().cloned().collect()))
        Ok((
            commit_hash,
            self.packfile_entries.values().cloned().collect(),
        ))
    }
}

/// An in-progress tree builder, containing file hashes along with their names or nested trees

#[derive(Default, Debug)]
struct Tree<'a>(IndexMap<&'a str, Box<TreeItem<'a>>>);

impl<'a> Tree<'a> {
    /// Recursively writes the the whole tree out to the given `pack_file`,

    /// the tree contains pointers to (hashes of) files contained within a

    /// directory, and pointers to other directories.

    fn to_packfile_entries(
        &self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>,
    ) -> Result<HashOutput, anyhow::Error> {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in &self.0 {
            tree.push(match item.as_ref() {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    name,
                    hash: *hash,
                },
                TreeItem::Tree(tree) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
                    name,
                    // we're essentially working through our tree from the bottom up,
                    // so we can grab the hash of each directory along the way and
                    // reference it from the parent directory
                    hash: tree.to_packfile_entries(pack_file)?,
                },
            });
        }

        // gets the hash of the tree we've just worked on, and
        // pushes it to the packfile
        let tree = PackFileEntry::Tree(tree);
        let hash = tree.hash()?;
        pack_file.insert(hash, tree);

        Ok(hash)
    }
}

/// An item within a `Tree`, this could be a file blob or another directory.

#[derive(Debug)]
enum TreeItem<'a> {
    /// Refers to a file by hash

    Blob(HashOutput),
    /// Refers to a nested directory

    Tree(Tree<'a>),
}