From f93f6915900f1a87300364c079737e7640f4b2ef Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 22 Oct 2022 15:21:45 +0100 Subject: [PATCH] Use packfile crate extracted from gitlab-cargo-shim --- Cargo.lock | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ chartered-git/Cargo.toml | 2 ++ chartered-git/src/main.rs | 39 +++++++++++++++------------------------ chartered-git/src/tree.rs | 26 +++++++++++++------------- chartered-git/src/command_handlers/fetch.rs | 16 +++++++--------- chartered-git/src/command_handlers/ls_refs.rs | 6 ++---- chartered-git/src/git/codec.rs | 138 -------------------------------------------------------------------------------- chartered-git/src/git/mod.rs | 74 -------------------------------------------------------------------------- chartered-git/src/git/packfile/high_level.rs | 183 -------------------------------------------------------------------------------- chartered-git/src/git/packfile/low_level.rs | 327 -------------------------------------------------------------------------------- chartered-git/src/git/packfile/mod.rs | 2 -- chartered-web/src/endpoints/web_api/organisations/info.rs | 2 +- 12 files changed, 119 insertions(+), 786 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2c800..22203ca 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -32,6 +32,17 @@ ] [[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] name = "aho-corasick" version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -118,7 +129,7 @@ "http", "hyper", "ring", - "time 0.3.14", + "time 0.3.15", "tokio", "tower", "tracing", @@ -251,7 +262,7 @@ "percent-encoding", "regex", "ring", - "time 0.3.14", + "time 0.3.15", "tracing", ] @@ -378,7 +389,7 @@ "itoa", "num-integer", "ryu", - "time 0.3.14", + "time 0.3.15", ] [[package]] @@ -704,6 +715,7 @@ "indexmap", "indoc", "itoa", + "packfile", "serde", "serde_json", "sha-1", @@ -716,6 +728,7 @@ "tracing", "tracing-subscriber", "url", + "ustr", ] [[package]] @@ -973,7 +986,7 @@ "hashbrown", "lock_api", "once_cell", - "parking_lot_core", + "parking_lot_core 0.9.3", ] [[package]] @@ -1301,7 +1314,7 @@ "futures-timer", "no-std-compat", "nonzero_ext", - "parking_lot", + "parking_lot 0.12.1", "quanta", "rand", "smallvec", @@ -2021,6 +2034,35 @@ version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" + +[[package]] +name = "packfile" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3eeb3adcc3e5e1d0bf59cd1a1bc0a663497c7f89a4dc1632d8568c15da66dbef" +dependencies = [ + "bytes", + "flate2", + "hex", + "indexmap", + "itoa", + "sha1", + "thiserror", + "time 0.3.15", + "tokio-util", + "tracing", +] + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.5", +] [[package]] name = "parking_lot" @@ -2029,7 +2071,21 @@ checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core", + "parking_lot_core 0.9.3", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", ] [[package]] @@ -2204,7 +2260,7 @@ checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" dependencies = [ "log", - "parking_lot", + "parking_lot 0.12.1", "scheduled-thread-pool", ] @@ -2436,7 +2492,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf" dependencies = [ - "parking_lot", + "parking_lot 0.12.1", ] [[package]] @@ -2815,9 +2871,9 @@ [[package]] name = "time" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b" +checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c" dependencies = [ "libc", "num_threads", @@ -2851,7 +2907,7 @@ "mio", "num_cpus", "once_cell", - "parking_lot", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3116,6 +3172,18 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68b90931029ab9b034b300b797048cf23723400aa757e8a2bfb9d748102f9821" + +[[package]] +name = "ustr" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "371436099f2980de56dc385b615696d3eabbdac9649a72b85f9d75f68474fa9c" +dependencies = [ + "ahash", + "byteorder", + "lazy_static", + "parking_lot 0.11.2", +] [[package]] name = "uuid" diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml index d7b2317..6f5aa6e 100644 --- a/chartered-git/Cargo.toml +++ a/chartered-git/Cargo.toml @@ -24,6 +24,7 @@ indexmap = "1" indoc = "1.0" itoa = "1" +packfile = "0.1" serde = { version = "1", features = ["derive"] } serde_json = "1" shlex = "1" @@ -36,6 +37,7 @@ tracing = "0.1" tracing-subscriber = "0.3" url = "2" +ustr = "0.9" [features] sqlite = ["chartered-db/sqlite"] diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index 35c70ff..0c8be2f 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -5,24 +5,17 @@ mod generators; mod tree; -#[allow(clippy::missing_errors_doc)] -pub mod git; - -use crate::{ - generators::CargoConfig, - git::{ - codec::{Encoder, GitCodec}, - packfile::high_level::GitRepository, - PktLine, - }, - tree::Tree, -}; +use crate::{generators::CargoConfig, tree::Tree}; -use arrayvec::ArrayVec; use bytes::BytesMut; use chartered_db::server_private_key::ServerPrivateKey; use clap::Parser; use futures::future::Future; +use packfile::{ + codec::{Encoder, GitCodec}, + high_level::GitRepository, + PktLine, +}; use std::{fmt::Write, path::PathBuf, pin::Pin, sync::Arc}; use thrussh::{ server::{self, Auth, Session}, @@ -85,7 +78,7 @@ let server = Server { db, - config: Arc::new(config), + config: Box::leak(Box::new(config)), }; info!("SSH server listening on {}", bind_address); @@ -98,7 +91,7 @@ #[derive(Clone)] struct Server { db: chartered_db::ConnectionPool, - config: Arc, + config: &'static config::Config, } impl server::Server for Server { @@ -113,7 +106,7 @@ Handler { ip, span, - config: self.config.clone(), + config: self.config, codec: GitCodec::default(), input_bytes: BytesMut::default(), output_bytes: BytesMut::default(), @@ -129,7 +122,7 @@ ip: Option, span: tracing::Span, codec: GitCodec, - config: Arc, + config: &'static config::Config, input_bytes: BytesMut, output_bytes: BytesMut, db: chartered_db::ConnectionPool, @@ -145,7 +138,7 @@ impl Handler { fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> { - Encoder {}.encode(packet, &mut self.output_bytes) + Ok(Encoder.encode(packet, &mut self.output_bytes)?) } fn flush(&mut self, session: &mut Session, channel: ChannelId) { @@ -275,7 +268,7 @@ let config = CargoConfig::new(&self.config.web_base_uri, &authed.auth_key, org_name); let config = serde_json::to_vec(&config)?; - packfile.insert(ArrayVec::<_, 0>::new(), "config.json", &config)?; + packfile.insert(&[], "config.json", config.into())?; // build the tree of all the crates the user has access to, then write them // to the in-memory repository. @@ -284,15 +277,13 @@ let tree = Tree::build(self.db.clone(), authed.user.id, org_name.to_string()).await; tree.write_to_packfile(&mut packfile)?; - - let config = self.config.clone(); // finalises the git repository, creating a commit and fetching the finalised // packfile and commit hash to return in `ls-refs` calls. let (commit_hash, packfile_entries) = packfile.commit( - &config.committer.name, - &config.committer.email, - &config.committer.message, + &self.config.committer.name, + &self.config.committer.email, + &self.config.committer.message, )?; match frame.command.as_ref() { diff --git a/chartered-git/src/tree.rs b/chartered-git/src/tree.rs index ac3a223..53847f9 100644 --- a/chartered-git/src/tree.rs +++ a/chartered-git/src/tree.rs @@ -1,11 +1,14 @@ //! Generates the Git folder/file tree that's returned back to the user //! containing the config & crate manifests. Only contains crates that //! the user has access to. -use crate::git::packfile::high_level::GitRepository; +use std::{collections::BTreeMap, sync::Arc}; + use arrayvec::ArrayVec; +use bytes::Bytes; use chartered_db::crates::Crate; -use std::collections::BTreeMap; +use packfile::high_level::GitRepository; +use ustr::ustr; #[derive(serde::Serialize)] pub struct CrateFileEntry<'a> { @@ -16,7 +19,7 @@ } pub struct Tree { - crates: BTreeMap, + crates: BTreeMap, Bytes>, } impl Tree { @@ -50,20 +53,17 @@ } // insert the crate into `self.crates` - crates.insert(crate_def.name, file); + crates.insert(crate_def.name.into(), file.into()); } Self { crates } } /// Writes all the crate manifests from `self.crates` out to the given `GitRepository`. - pub fn write_to_packfile<'a>( - &'a self, - repo: &mut GitRepository<'a>, - ) -> Result<(), anyhow::Error> { + pub fn write_to_packfile(&self, repo: &mut GitRepository) -> Result<(), anyhow::Error> { for (name, content) in &self.crates { let crate_folder = get_crate_folder(name); - repo.insert(crate_folder, name, content.as_bytes())?; + repo.insert(&crate_folder, name.clone(), content.clone())?; } Ok(()) @@ -74,7 +74,7 @@ /// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for /// using the frist four characters of the crate name, 2 for the first directory and the other 2 /// for the second. -fn get_crate_folder(crate_name: &str) -> ArrayVec<&str, 2> { +fn get_crate_folder(crate_name: &str) -> ArrayVec<&'static str, 2> { let mut folders = ArrayVec::new(); match crate_name.len() { @@ -83,11 +83,11 @@ 2 => folders.push("2"), 3 => { folders.push("3"); - folders.push(&crate_name[..1]); + folders.push(ustr(&crate_name[..1]).as_str()); } _ => { - folders.push(&crate_name[..2]); - folders.push(&crate_name[2..4]); + folders.push(ustr(&crate_name[..2]).as_str()); + folders.push(ustr(&crate_name[2..4]).as_str()); } } diff --git a/chartered-git/src/command_handlers/fetch.rs b/chartered-git/src/command_handlers/fetch.rs index aa5cfec..60bde07 100644 --- a/chartered-git/src/command_handlers/fetch.rs +++ a/chartered-git/src/command_handlers/fetch.rs @@ -1,20 +1,18 @@ use bytes::Bytes; +use packfile::{ + low_level::{PackFile, PackFileEntry}, + PktLine, +}; use thrussh::{server::Session, ChannelId}; -use crate::{ - git::{ - packfile::low_level::{PackFile, PackFileEntry}, - PktLine, - }, - Handler, -}; +use crate::Handler; pub(crate) fn handle( handle: &mut Handler, session: &mut Session, channel: ChannelId, metadata: Vec, - packfile_entries: Vec>, + packfile_entries: Vec, ) -> Result<(), anyhow::Error> { // the client sending us `done` in the metadata means they know there's no negotiation // required for which commits we need to send, they just want us to send whatever we @@ -37,7 +35,7 @@ handle.flush(session, channel); // send the complete packfile - let packfile = PackFile::new(packfile_entries); + let packfile = PackFile::new(&packfile_entries); handle.write(PktLine::SidebandData(packfile))?; handle.write(PktLine::Flush)?; handle.flush(session, channel); diff --git a/chartered-git/src/command_handlers/ls_refs.rs b/chartered-git/src/command_handlers/ls_refs.rs index 34f757b..2ab79f8 100644 --- a/chartered-git/src/command_handlers/ls_refs.rs +++ a/chartered-git/src/command_handlers/ls_refs.rs @@ -5,12 +5,10 @@ //! [lsr]: https://git-scm.com/docs/protocol-v2/2.19.0#_ls_refs use bytes::Bytes; +use packfile::{low_level::HashOutput, PktLine}; use thrussh::{server::Session, ChannelId}; -use crate::{ - git::{packfile::low_level::HashOutput, PktLine}, - Handler, -}; +use crate::Handler; pub(crate) fn handle( handle: &mut Handler, diff --git a/chartered-git/src/git/codec.rs b/chartered-git/src/git/codec.rs deleted file mode 100644 index 9f5cda7..0000000 100644 --- a/chartered-git/src/git/codec.rs +++ /dev/null @@ -1,138 +1,0 @@ -#![allow(clippy::module_name_repetitions)] - -use bytes::{Buf, Bytes, BytesMut}; -use tokio_util::codec; - -use super::PktLine; - -pub struct Encoder { - // buf: BytesMut, -} - -impl codec::Encoder> for Encoder { - type Error = anyhow::Error; - - fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> { - item.encode_to(dst)?; - Ok(()) - } -} - -#[derive(Debug, Default, PartialEq, Eq)] -pub struct GitCommand { - pub command: Bytes, - pub metadata: Vec, -} - -#[derive(Default)] -pub struct GitCodec { - command: GitCommand, -} - -impl codec::Decoder for GitCodec { - type Item = GitCommand; - type Error = anyhow::Error; - - fn decode(&mut self, src: &mut bytes::BytesMut) -> Result, Self::Error> { - loop { - if src.len() < 4 { - return Ok(None); - } - - let mut length_bytes = [0_u8; 4]; - length_bytes.copy_from_slice(&src[..4]); - let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize; - - if length == 0 { - // flush - src.advance(4); - return Ok(Some(std::mem::take(&mut self.command))); - } else if length == 1 || length == 2 { - src.advance(4); - continue; - } else if !(4..=65520).contains(&length) { - return Err( - std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(), - ); - } - - // not enough bytes in the buffer yet, ask for more - if src.len() < length { - src.reserve(length - src.len()); - return Ok(None); - } - - // length is inclusive of the 4 bytes that makes up itself - let mut data = src.split_to(length).freeze(); - data.advance(4); - - // strip newlines for conformity - if data.ends_with(b"\n") { - data.truncate(data.len() - 1); - } - - if self.command.command.is_empty() { - self.command.command = data; - } else { - self.command.metadata.push(data); - } - } - } -} - -#[cfg(test)] -mod test { - use bytes::{Bytes, BytesMut}; - use std::fmt::Write; - use tokio_util::codec::Decoder; - - #[test] - fn decode() { - let mut codec = super::GitCodec::default(); - - let mut bytes = BytesMut::new(); - - bytes.write_str("0015agent=git/2.32.0").unwrap(); - let res = codec.decode(&mut bytes).unwrap(); - assert_eq!(res, None); - - bytes.write_char('\n').unwrap(); - let res = codec.decode(&mut bytes).unwrap(); - assert_eq!(res, None); - - bytes.write_str("0000").unwrap(); - let res = codec.decode(&mut bytes).unwrap(); - assert_eq!( - res, - Some(super::GitCommand { - command: Bytes::from_static(b"agent=git/2.32.0"), - metadata: vec![], - }) - ); - - bytes.write_str("0000").unwrap(); - let res = codec.decode(&mut bytes).unwrap(); - assert_eq!( - res, - Some(super::GitCommand { - command: Bytes::new(), - metadata: vec![], - }) - ); - - bytes.write_str("0002").unwrap(); - bytes.write_str("0005a").unwrap(); - bytes.write_str("0001").unwrap(); - bytes.write_str("0005b").unwrap(); - bytes.write_str("0000").unwrap(); - - let res = codec.decode(&mut bytes).unwrap(); - assert_eq!( - res, - Some(super::GitCommand { - command: Bytes::from_static(b"a"), - metadata: vec![Bytes::from_static(b"b")], - }) - ); - } -} diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs deleted file mode 100644 index 888bd7d..0000000 100644 --- a/chartered-git/src/git/mod.rs +++ /dev/null @@ -1,74 +1,0 @@ -pub mod codec; -pub mod packfile; - -use bytes::{BufMut, BytesMut}; -use std::fmt::Write; - -use self::packfile::low_level::PackFile; - -/// Every packet sent to the client from us should be a `PktLine`. -pub enum PktLine<'a> { - Data(&'a [u8]), - /// Similar to a data packet, but used during packfile sending to indicate this - /// packet is a block of data by appending a byte containing the u8 `1`. - SidebandData(PackFile<'a>), - /// Similar to a data packet, but used during packfile sending to indicate this - /// packet is a status message by appending a byte containing the u8 `2`. - SidebandMsg(&'a [u8]), - Flush, - Delimiter, - ResponseEnd, -} - -impl PktLine<'_> { - pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> { - match self { - Self::Data(data) => { - write!(buf, "{:04x}", data.len() + 4)?; - buf.extend_from_slice(data); - } - Self::SidebandData(packfile) => { - // split the buf off so the cost of counting the bytes to put in the - // data line prefix is just the cost of `unsplit` (an atomic decrement) - let mut data_buf = buf.split_off(buf.len()); - - data_buf.put_u8(1); // sideband, 1 = data - packfile.encode_to(&mut data_buf)?; - - // write into the buf not the data buf so it's at the start of the msg - write!(buf, "{:04x}", data_buf.len() + 4)?; - buf.unsplit(data_buf); - } - Self::SidebandMsg(msg) => { - write!(buf, "{:04x}", msg.len() + 4 + 1)?; - buf.put_u8(2); // sideband, 2 = msg - buf.extend_from_slice(msg); - } - Self::Flush => buf.extend_from_slice(b"0000"), - Self::Delimiter => buf.extend_from_slice(b"0001"), - Self::ResponseEnd => buf.extend_from_slice(b"0002"), - } - - Ok(()) - } -} - -impl<'a> From<&'a str> for PktLine<'a> { - fn from(val: &'a str) -> Self { - PktLine::Data(val.as_bytes()) - } -} - -#[cfg(test)] -mod test { - use bytes::BytesMut; - - #[test] - fn test_pkt_line() { - let mut buffer = BytesMut::new(); - super::PktLine::Data(b"agent=git/2.32.0\n") - .encode_to(&mut buffer) - .unwrap(); - assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n"); - } -} diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs deleted file mode 100644 index 0b0d984..0000000 100644 --- a/chartered-git/src/git/packfile/high_level.rs +++ /dev/null @@ -1,183 +1,0 @@ -//! A high-level interface for building packfiles. Wraps the `low_level` module -//! making a much easier interface for writing files and generating the root -//! commit. -//! -//! The output packfile will only have a single commit in it, which is fine -//! for our purposes because `cargo` will `git pull --force` from our Git -//! server, allowing us to ignore any history the client may have. - -use std::borrow::Cow; - -use arrayvec::ArrayVec; -use indexmap::IndexMap; - -use super::low_level::{ - Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind, -}; - -/// The main way of interacting with the high level Packfile builder -/// -/// Builds a whole packfile containing files, directories and commits - essentially -/// building out a full Git repository in memory. -#[derive(Debug)] -pub struct GitRepository<'a> { - /// A map containing all the blobs and their corresponding hashes so they're - /// not inserted more than once for any files in the whole tree with the same - /// content. - packfile_entries: IndexMap>, - /// An in-progress `Tree` currently being built out, the tree refers to items - /// in `file_entries` by hash. - tree: Tree<'a>, -} - -impl Default for GitRepository<'_> { - fn default() -> Self { - Self { - packfile_entries: IndexMap::new(), - tree: Tree::default(), - } - } -} - -impl<'a> GitRepository<'a> { - /// Inserts a file into the repository, writing a file to the path - /// `path/to/my-file` would require a `path` of `["path", "to"]` - /// and a `file` of `"my-file"`. - pub fn insert( - &mut self, - path: ArrayVec<&'a str, N>, - file: &'a str, - content: &'a [u8], - ) -> Result<(), anyhow::Error> { - // we'll initialise the directory to the root of the tree, this means - // if a path isn't specified we'll just write it to the root directory - let mut directory = &mut self.tree; - - // loops through the parts in the path, recursing through the `directory` - // `Tree` until we get to our target directory, creating any missing - // directories along the way. - for part in path { - let tree_item = directory - .0 - .entry(part) - .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default()))); - - if let TreeItem::Tree(d) = tree_item.as_mut() { - directory = d; - } else { - // TODO: how should we handle this? one of items we tried to - // recurse into was a directory. - anyhow::bail!("attempted to use a file as a directory"); - } - } - - // wrap the file in a Blob so it's ready for writing into the packfile, and also - // allows us to grab the hash of the file for use in the tree - let entry = PackFileEntry::Blob(content); - let file_hash = entry.hash()?; - - // todo: what should we do on overwrite? - directory - .0 - .insert(file, Box::new(TreeItem::Blob(file_hash))); - - self.packfile_entries.insert(file_hash, entry); - - Ok(()) - } - - /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`, - /// all the files currently in the `tree`, returning all the packfile entries - /// and also the commit hash so it can be referred to by `ls-ref`s. - pub fn commit( - &'a mut self, - name: &'a str, - email: &'a str, - message: &'a str, - ) -> Result<(HashOutput, Vec>), anyhow::Error> { - // gets the hash of the entire tree from the root - let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?; - - // build the commit using the given inputs - let commit_user = CommitUserInfo { - name, - email, - time: chrono::Utc::now(), - }; - - let commit = PackFileEntry::Commit(Commit { - tree: tree_hash, - author: commit_user, - committer: commit_user, - message, - }); - - // write the commit out to the packfile_entries - let commit_hash = commit.hash()?; - self.packfile_entries.insert(commit_hash, commit); - - // TODO: make PackFileEntry copy and remove this clone - Ok(( - commit_hash, - self.packfile_entries.values().cloned().collect(), - )) - } -} - -/// An in-progress tree builder, containing file hashes along with their names or nested trees -#[derive(Default, Debug)] -struct Tree<'a>(IndexMap<&'a str, Box>>); - -impl<'a> Tree<'a> { - /// Recursively writes the the whole tree out to the given `pack_file`, - /// the tree contains pointers to (hashes of) files contained within a - /// directory, and pointers to other directories. - fn to_packfile_entries( - &self, - pack_file: &mut IndexMap>, - ) -> Result { - let mut tree = Vec::with_capacity(self.0.len()); - - for (name, item) in &self.0 { - tree.push(match item.as_ref() { - TreeItem::Blob(hash) => LowLevelTreeItem { - kind: TreeItemKind::File, - name, - sort_name: Cow::Borrowed(name), - hash: *hash, - }, - TreeItem::Tree(tree) => LowLevelTreeItem { - kind: TreeItemKind::Directory, - name, - sort_name: Cow::Owned(format!("{}/", name)), - // we're essentially working through our tree from the bottom up, - // so we can grab the hash of each directory along the way and - // reference it from the parent directory - hash: tree.to_packfile_entries(pack_file)?, - }, - }); - } - - // we need to sort our tree alphabetically, otherwise Git will silently - // stop parsing the rest of the tree once it comes across an unsorted - // tree entry. - tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name)); - - // gets the hash of the tree we've just worked on, and - // pushes it to the packfile - let tree = PackFileEntry::Tree(tree); - let hash = tree.hash()?; - pack_file.insert(hash, tree); - - Ok(hash) - } -} - -/// An item within a `Tree`, this could be a file blob or another directory. -#[derive(Debug)] -enum TreeItem<'a> { - /// Refers to a file by hash - Blob(HashOutput), - /// Refers to a nested directory - Tree(Tree<'a>), -} diff --git a/chartered-git/src/git/packfile/low_level.rs b/chartered-git/src/git/packfile/low_level.rs deleted file mode 100644 index d1cb6f8..0000000 100644 --- a/chartered-git/src/git/packfile/low_level.rs +++ /dev/null @@ -1,327 +1,0 @@ -use bytes::{BufMut, BytesMut}; -use flate2::{write::ZlibEncoder, Compression}; -use sha1::{ - digest::{generic_array::GenericArray, OutputSizeUser}, - Digest, Sha1, -}; -use std::{borrow::Cow, convert::TryInto, fmt::Write, io::Write as IoWrite}; - -pub type HashOutput = GenericArray::OutputSize>; // [u8; 20], but sha-1 returns a GenericArray - -// The packfile itself is a very simple format. There is a header, a -// series of packed objects (each with it's own header and body) and -// then a checksum trailer. The first four bytes is the string 'PACK', -// which is sort of used to make sure you're getting the start of the -// packfile correctly. This is followed by a 4-byte packfile version -// number and then a 4-byte number of entries in that file. -pub struct PackFile<'a> { - entries: Vec>, -} - -impl<'a> PackFile<'a> { - #[must_use] - pub fn new(entries: Vec>) -> Self { - Self { entries } - } - - #[must_use] - pub const fn header_size() -> usize { - "PACK".len() + std::mem::size_of::() + std::mem::size_of::() - } - - #[must_use] - pub const fn footer_size() -> usize { - 20 - } - - pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { - let mut buf = original_buf.split_off(original_buf.len()); - buf.reserve(Self::header_size() + Self::footer_size()); - - // header - buf.extend_from_slice(b"PACK"); // magic header - buf.put_u32(2); // version - buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile - - // body - for entry in &self.entries { - entry.encode_to(&mut buf)?; - } - - // footer - buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); - - original_buf.unsplit(buf); - - Ok(()) - } -} - -#[derive(Debug, Clone, Copy)] -pub struct Commit<'a> { - pub tree: HashOutput, - // pub parent: [u8; 20], - pub author: CommitUserInfo<'a>, - pub committer: CommitUserInfo<'a>, - // pub gpgsig: &str, - pub message: &'a str, -} - -impl Commit<'_> { - fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { - let mut tree_hex = [0_u8; 20 * 2]; - hex::encode_to_slice(self.tree, &mut tree_hex)?; - - out.write_str("tree ")?; - out.extend_from_slice(&tree_hex); - out.write_char('\n')?; - - writeln!(out, "author {}", self.author.encode())?; - writeln!(out, "committer {}", self.committer.encode())?; - write!(out, "\n{}", self.message)?; - - Ok(()) - } - - #[must_use] - pub fn size(&self) -> usize { - let mut len = 0; - len += "tree ".len() + (self.tree.len() * 2) + "\n".len(); - len += "author ".len() + self.author.size() + "\n".len(); - len += "committer ".len() + self.committer.size() + "\n".len(); - len += "\n".len() + self.message.len(); - len - } -} - -#[derive(Copy, Clone, Debug)] -pub struct CommitUserInfo<'a> { - pub name: &'a str, - pub email: &'a str, - pub time: chrono::DateTime, -} - -impl CommitUserInfo<'_> { - fn encode(&self) -> String { - // TODO: remove `format!`, `format_args!`? - format!( - "{} <{}> {} +0000", - self.name, - self.email, - self.time.timestamp() - ) - } - - #[must_use] - pub fn size(&self) -> usize { - let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len(); - - self.name.len() - + "< ".len() - + self.email.len() - + "> ".len() - + timestamp_len - + " +0000".len() - } -} - -#[derive(Debug, Copy, Clone)] -pub enum TreeItemKind { - File, - Directory, -} - -impl TreeItemKind { - #[must_use] - pub const fn mode(&self) -> &'static str { - match self { - Self::File => "100644", - Self::Directory => "40000", - } - } -} - -#[derive(Debug, Clone)] -pub struct TreeItem<'a> { - pub kind: TreeItemKind, - pub name: &'a str, - pub sort_name: Cow<'a, str>, - pub hash: HashOutput, -} - -// `[mode] [name]\0[hash]` -impl TreeItem<'_> { - fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { - out.write_str(self.kind.mode())?; - write!(out, " {}\0", self.name)?; - out.extend_from_slice(&self.hash); - Ok(()) - } - - #[must_use] - pub fn size(&self) -> usize { - self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len() - } -} - -#[derive(Debug, Clone)] // could be copy but Vec> -pub enum PackFileEntry<'a> { - // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc - // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c - // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 - // author Jordan Doyle 1630244577 +0100 - // committer Jordan Doyle 1630244577 +0100 - // gpgsig -----BEGIN PGP SIGNATURE----- - // - // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt - // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2 - // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ - // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6 - // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX - // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E - // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO - // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G - // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG - // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE - // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf - // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y= - // =fXoH - // -----END PGP SIGNATURE----- - // - // test - Commit(Commit<'a>), - // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc - // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� - // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� - Tree(Vec>), - // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc - // blob 23try and find me in .git - Blob(&'a [u8]), - // Tag, - // OfsDelta, - // RefDelta, -} - -impl PackFileEntry<'_> { - fn write_header(&self, buf: &mut BytesMut) { - let mut size = self.uncompressed_size(); - - // write header - { - let mut val = 0b1000_0000_u8; - - val |= match self { - Self::Commit(_) => 0b001, - Self::Tree(_) => 0b010, - Self::Blob(_) => 0b011, - // Self::Tag => 0b100, - // Self::OfsDelta => 0b110, - // Self::RefDelta => 0b111, - } << 4; - - // pack the 4 LSBs of the size into the header - #[allow(clippy::cast_possible_truncation)] // value is masked - { - val |= (size & 0b1111) as u8; - } - size >>= 4; - - buf.put_u8(val); - } - - // write size bytes - while size != 0 { - // read 7 LSBs from the `size` and push them off for the next iteration - #[allow(clippy::cast_possible_truncation)] // value is masked - let mut val = (size & 0b111_1111) as u8; - size >>= 7; - - if size != 0 { - // MSB set to 1 implies there's more size bytes to come, otherwise - // the data starts after this byte - val |= 1 << 7; - } - - buf.put_u8(val); - } - } - - pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> { - self.write_header(original_out); // TODO: this needs space reserving for it - - // todo is there a way to stream through the zlibencoder so we don't have to - // have this intermediate bytesmut and vec? - let mut out = BytesMut::new(); - - let size = self.uncompressed_size(); - original_out.reserve(size); - // the data ends up getting compressed but we'll need at least this many bytes - out.reserve(size); - - match self { - Self::Commit(commit) => { - commit.encode_to(&mut out)?; - } - Self::Tree(items) => { - for item in items { - item.encode_to(&mut out)?; - } - } - Self::Blob(data) => { - out.extend_from_slice(data); - } - } - - debug_assert_eq!(out.len(), size); - - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(&out)?; - let compressed_data = e.finish()?; - - original_out.extend_from_slice(&compressed_data); - - Ok(()) - } - - #[must_use] - pub fn uncompressed_size(&self) -> usize { - match self { - Self::Commit(commit) => commit.size(), - Self::Tree(items) => items.iter().map(TreeItem::size).sum(), - Self::Blob(data) => data.len(), - } - } - - // wen const generics for RustCrypto? :-( - pub fn hash(&self) -> Result { - let size = self.uncompressed_size(); - - let file_prefix = match self { - Self::Commit(_) => "commit", - Self::Tree(_) => "tree", - Self::Blob(_) => "blob", - }; - - let size_len = itoa::Buffer::new().format(size).len(); - - let mut out = - BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size); - - write!(out, "{} {}\0", file_prefix, size)?; - match self { - Self::Commit(commit) => { - commit.encode_to(&mut out)?; - } - Self::Tree(items) => { - for item in items { - item.encode_to(&mut out)?; - } - } - Self::Blob(blob) => { - out.extend_from_slice(blob); - } - } - - Ok(sha1::Sha1::digest(&out)) - } -} diff --git a/chartered-git/src/git/packfile/mod.rs b/chartered-git/src/git/packfile/mod.rs deleted file mode 100644 index a70e0a8..0000000 100644 --- a/chartered-git/src/git/packfile/mod.rs +++ /dev/null @@ -1,2 +1,0 @@ -pub mod high_level; -pub mod low_level; diff --git a/chartered-web/src/endpoints/web_api/organisations/info.rs b/chartered-web/src/endpoints/web_api/organisations/info.rs index 003e68c..e0e968c 100644 --- a/chartered-web/src/endpoints/web_api/organisations/info.rs +++ a/chartered-web/src/endpoints/web_api/organisations/info.rs @@ -48,7 +48,7 @@ uuid: user.uuid.0, display_name: user.display_name().to_string(), picture_url: user.picture_url, - permissions: can_manage_users.then(|| perms), + permissions: can_manage_users.then_some(perms), }) .collect(), public: organisation.organisation().public, -- rgit 0.1.3