From 63abc7f6dac6236b93b97c54cdab2f50d9e57b76 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Thu, 07 Oct 2021 23:18:27 +0100 Subject: [PATCH] Introduce high-level packfile generator --- Cargo.lock | 2 ++ chartered-db/Cargo.toml | 9 +++++++-- chartered-git/Cargo.toml | 2 ++ chartered-db/src/lib.rs | 15 ++++++++++++++- chartered-fs/src/lib.rs | 2 +- chartered-git/src/generators.rs | 41 +++++++++++++++++++++++++++++++++++++++++ chartered-git/src/main.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- chartered-git/src/git/codec.rs | 2 +- chartered-git/src/git/mod.rs | 2 +- chartered-git/src/git/packfile.rs | 326 -------------------------------------------------------------------------------- chartered-git/src/git/packfile/high_level.rs | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ chartered-git/src/git/packfile/low_level.rs | 326 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ chartered-git/src/git/packfile/mod.rs | 2 ++ 13 files changed, 575 insertions(+), 439 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a4329c..15f876d 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -303,6 +303,7 @@ "format-bytes", "futures", "hex", + "indexmap", "indoc", "itoa", "log", @@ -314,6 +315,7 @@ "thrussh-keys", "tokio", "tokio-util", + "url", ] [[package]] diff --git a/chartered-db/Cargo.toml b/chartered-db/Cargo.toml index efe6d99..d4583a6 100644 --- a/chartered-db/Cargo.toml +++ a/chartered-db/Cargo.toml @@ -12,14 +12,14 @@ base64 = "0.13" bitflags = "1" chrono = "0.4" -diesel = { version = "1", features = ["sqlite", "postgres", "r2d2", "chrono"] } +diesel = { version = "1", features = ["r2d2", "chrono"] } diesel_logger = "0.1" diesel_migrations = "1.4" displaydoc = "0.2" hex = "0.4" http = "0.2" itertools = "0.10" -libsqlite3-sys = { version = "*", features = ["bundled"] } # https://github.com/rusqlite/rusqlite/issues/914 +libsqlite3-sys = { version = "*", features = ["bundled"], optional = true } # https://github.com/rusqlite/rusqlite/issues/914 option_set = "0.1" rand = "0.8" reqwest = "0.11" @@ -30,3 +30,8 @@ uuid = "0.8" dotenv = "0.15" thrussh-keys = "0.21" + +[features] +sqlite = ["libsqlite3-sys", "diesel/sqlite"] +postgres = ["diesel/postgres"] + diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml index 3f3c3bb..2258169 100644 --- a/chartered-git/Cargo.toml +++ a/chartered-git/Cargo.toml @@ -20,6 +20,7 @@ format-bytes = "0.2" futures = "0.3" hex = "0.4" +indexmap = "1" indoc = "1.0" itoa = "0.4" log = "0.4" @@ -31,3 +32,4 @@ thrussh-keys = "0.21" tokio = { version = "1", features = ["full"] } tokio-util = { version = "0.6", features = ["codec"] } +url = "2" diff --git a/chartered-db/src/lib.rs b/chartered-db/src/lib.rs index 22cf0a1..02fe455 100644 --- a/chartered-db/src/lib.rs +++ a/chartered-db/src/lib.rs @@ -53,7 +53,20 @@ use std::sync::Arc; use thiserror::Error; -pub type ConnectionPool = Arc>>>; +#[cfg(feature = "sqlite")] +pub type Connection = diesel::SqliteConnection; + +#[cfg(feature = "postgres")] +pub type Connection = diesel::PostgresConnection; + +#[cfg(not(any(feature = "sqlite", feature = "postgres")))] +compile_error!( + "At least one database backend must be enabled using `--features [sqlite|postgres]`" +); +#[cfg(not(any(feature = "sqlite", feature = "postgres")))] +pub type Connection = unimplemented!(); + +pub type ConnectionPool = Arc>>>; pub type Result = std::result::Result; embed_migrations!(); diff --git a/chartered-fs/src/lib.rs b/chartered-fs/src/lib.rs index 309e238..cc6d99a 100644 --- a/chartered-fs/src/lib.rs +++ a/chartered-fs/src/lib.rs @@ -152,7 +152,7 @@ #[allow(clippy::pedantic)] async fn parse_filesystem() { // panic!("{:#?}", FS::from_str("s3://10.0.64.101:9000/my-bucket/my-location")); - FS::from_str("file:///tmp/chartered"); + // FS::from_str("file:///tmp/chartered"); } #[tokio::test] diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs new file mode 100644 index 0000000..cc987ba 100644 --- /dev/null +++ a/chartered-git/src/generators.rs @@ -1,0 +1,41 @@ +use serde::Serialize; +use url::Url; + +#[derive(Serialize, Debug, Clone)] +pub struct CargoConfig { + pub dl: String, + pub api: String, +} + +impl CargoConfig { + pub fn new(base: url::Url, api_key: &str, organisation: &str) -> Self { + let base = format!("{}a/{}/o/{}", base, api_key, organisation); + + Self { + dl: format!("{}/api/v1/crates", base), + api: base, + } + } +} + +#[cfg(test)] +mod test { + use super::CargoConfig; + + #[test] + fn test_cargo_config() { + let conf = CargoConfig::new( + url::Url::parse("https://127.0.0.1:1234").unwrap(), + "my-api-key", + "my-organisation", + ); + assert_eq!( + conf.dl.to_string(), + "https://127.0.0.1:1234/a/my-api-key/o/my-organisation/api/v1/crates" + ); + assert_eq!( + conf.api.to_string(), + "https://127.0.0.1:1234/a/my-api-key/o/my-organisation" + ); + } +} diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index 982c545..e982f05 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -1,11 +1,18 @@ #![deny(clippy::pedantic)] +mod generators; #[allow(clippy::missing_errors_doc)] pub mod git; -use crate::git::{ - codec::{Encoder, GitCodec}, - packfile::{Commit, CommitUserInfo, PackFileEntry, TreeItem, TreeItemKind}, - PktLine, +use crate::{ + generators::CargoConfig, + git::{ + codec::{Encoder, GitCodec}, + packfile::{ + high_level::GitRepository, + low_level::{Commit, CommitUserInfo, PackFile, PackFileEntry, TreeItem, TreeItemKind}, + }, + PktLine, + }, }; use bytes::BytesMut; @@ -20,6 +27,7 @@ }; use thrussh_keys::{key, PublicKeyBase64}; use tokio_util::codec::{Decoder, Encoder as TokioEncoder}; +use url::Url; #[tokio::main] #[allow(clippy::semicolon_if_nothing_returned)] // broken clippy lint @@ -56,8 +64,7 @@ input_bytes: BytesMut::default(), output_bytes: BytesMut::default(), db: self.db.clone(), - user: None, - user_ssh_key: None, + authed: None, organisation: None, } } @@ -69,11 +76,15 @@ input_bytes: BytesMut, output_bytes: BytesMut, db: chartered_db::ConnectionPool, - user: Option, - user_ssh_key: Option>, organisation: Option, + authed: Option, } +struct Authed { + user: chartered_db::users::User, + auth_key: String, +} + impl Handler { fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> { Encoder {}.encode(packet, &mut self.output_bytes) @@ -86,9 +97,9 @@ ); } - fn user(&self) -> Result<&chartered_db::users::User, anyhow::Error> { - match self.user { - Some(ref user) => Ok(user), + fn authed(&self) -> Result<&Authed, anyhow::Error> { + match self.authed { + Some(ref authed) => Ok(authed), None => anyhow::bail!("user not set after auth"), } } @@ -97,13 +108,6 @@ match self.organisation { Some(ref org) => Ok(org.as_str()), None => anyhow::bail!("org not set after auth"), - } - } - - fn user_ssh_key(&self) -> Result<&Arc, anyhow::Error> { - match self.user_ssh_key { - Some(ref ssh_key) => Ok(ssh_key), - None => anyhow::bail!("user not set after auth"), } } } @@ -131,7 +135,7 @@ fn shell_request(mut self, channel: ChannelId, mut session: Session) -> Self::FutureUnit { Box::pin(async move { - let username = self.user()?.username.clone(); // todo + let username = self.authed()?.user.username.clone(); // todo write!(&mut self.output_bytes, "Hi there, {}! You've successfully authenticated, but chartered does not provide shell access.\r\n", username)?; self.flush(&mut session, channel); session.close(channel); @@ -201,7 +205,7 @@ let public_key = key.public_key_bytes(); Box::pin(async move { - let (ssh_key, login_user) = + let (ssh_key, user) = match chartered_db::users::User::find_by_ssh_key(self.db.clone(), public_key) .await? { @@ -213,9 +217,14 @@ if let Err(e) = ssh_key.clone().update_last_used(self.db.clone()).await { warn!("Failed to update last used key: {:?}", e); } + + let auth_key = ssh_key + .clone() + .get_or_insert_session(self.db.clone(), self.ip.map(|v| v.to_string())) + .await? + .session_key; - self.user = Some(login_user); - self.user_ssh_key = Some(ssh_key); + self.authed = Some(Authed { user, auth_key }); self.finished_auth(server::Auth::Accept).await }) @@ -269,63 +278,31 @@ } } + let authed = self.authed()?; + let org_name = self.org_name()?; + if !ls_refs && !fetch && !done { return Ok((self, session)); } - // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n" - - let mut pack_file_entries = Vec::new(); - let mut root_tree = Vec::new(); - - // TODO: key should be cached - let config = format!( - r#"{{"dl":"http://127.0.0.1:8888/a/{key}/o/{organisation}/api/v1/crates","api":"http://127.0.0.1:8888/a/{key}/o/{organisation}"}}"#, - key = self - .user_ssh_key()? - .clone() - .get_or_insert_session(self.db.clone(), self.ip.map(|v| v.to_string())) - .await? - .session_key, - organisation = self.org_name()?, + let mut packfile = GitRepository::default(); + + let config = CargoConfig::new( + Url::parse("http://127.0.0.1:8888/")?, + &authed.auth_key, + org_name, ); - let config_file = PackFileEntry::Blob(config.as_bytes()); - - root_tree.push(TreeItem { - kind: TreeItemKind::File, - name: "config.json", - hash: config_file.hash()?, - }); - pack_file_entries.push(config_file); + let config = serde_json::to_vec(&config)?; + packfile.insert(vec![], "config.json".to_string(), &config); // todo: the whole tree needs caching and then we can filter in code rather than at // the database - let tree = fetch_tree( - self.db.clone(), - self.user()?.id, - self.org_name()?.to_string(), - ) - .await; - build_tree(&mut root_tree, &mut pack_file_entries, &tree)?; - - let root_tree = PackFileEntry::Tree(root_tree); - let root_tree_hash = root_tree.hash()?; - pack_file_entries.push(root_tree); - - let commit_user = CommitUserInfo { - name: "Jordan Doyle", - email: "jordan@doyle.la", - time: chrono::Utc.ymd(2021, 9, 8).and_hms(17, 46, 1), - }; - let commit = PackFileEntry::Commit(Commit { - tree: root_tree_hash, - author: commit_user, - committer: commit_user, - message: "Most recent crates", - }); - let commit_hash = commit.hash()?; - pack_file_entries.push(commit); + let tree = fetch_tree(self.db.clone(), authed.user.id, org_name.to_string()).await; + build_tree(&mut packfile, &tree)?; + let (commit_hash, packfile_entries) = + packfile.commit("computer", "john@computer.no", "Update crates"); + eprintln!("commit hash: {}", hex::encode(&commit_hash)); // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010009peel\n000csymrefs\n000bunborn\n0014ref-prefix HEAD\n0019ref-prefix refs/HEAD\n001eref-prefix refs/tags/HEAD\n001fref-prefix refs/heads/HEAD\n0021ref-prefix refs/remotes/HEAD\n0026ref-prefix refs/remotes/HEAD/HEAD\n001aref-prefix refs/tags/\n0000" @@ -358,7 +335,7 @@ self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?; self.flush(&mut session, channel); - let packfile = git::packfile::PackFile::new(pack_file_entries); + let packfile = PackFile::new(packfile_entries); self.write(PktLine::SidebandData(packfile))?; self.write(PktLine::Flush)?; self.flush(&mut session, channel); @@ -427,51 +404,23 @@ } fn build_tree<'a>( - root_tree: &mut Vec>, - pack_file_entries: &mut Vec>, + packfile: &mut GitRepository<'a>, tree: &'a TwoCharTree>>, ) -> Result<(), anyhow::Error> { - root_tree.reserve(tree.len()); - pack_file_entries.reserve(tree.iter().map(|(_, v)| 1 + v.len()).sum::() + tree.len()); - for (first_level_dir, second_level_dirs) in tree.iter() { - let mut first_level_tree = Vec::with_capacity(second_level_dirs.len()); + let first_level_dir = std::str::from_utf8(first_level_dir)?; for (second_level_dir, crates) in second_level_dirs.iter() { - let mut second_level_tree = Vec::with_capacity(crates.len()); + let second_level_dir = std::str::from_utf8(second_level_dir)?; for (crate_name, versions_def) in crates.iter() { - let file = PackFileEntry::Blob(versions_def.as_ref()); - let file_hash = file.hash()?; - pack_file_entries.push(file); - - second_level_tree.push(TreeItem { - kind: TreeItemKind::File, - name: crate_name, - hash: file_hash, - }); + packfile.insert( + vec![first_level_dir.to_string(), second_level_dir.to_string()], + crate_name.to_string(), + versions_def.as_ref(), + ); } - - let second_level_tree = PackFileEntry::Tree(second_level_tree); - let second_level_tree_hash = second_level_tree.hash()?; - pack_file_entries.push(second_level_tree); - - first_level_tree.push(TreeItem { - kind: TreeItemKind::Directory, - name: std::str::from_utf8(second_level_dir)?, - hash: second_level_tree_hash, - }); } - - let first_level_tree = PackFileEntry::Tree(first_level_tree); - let first_level_tree_hash = first_level_tree.hash()?; - pack_file_entries.push(first_level_tree); - - root_tree.push(TreeItem { - kind: TreeItemKind::Directory, - name: std::str::from_utf8(first_level_dir)?, - hash: first_level_tree_hash, - }); } Ok(()) diff --git a/chartered-git/src/git/codec.rs b/chartered-git/src/git/codec.rs index 6cba5e0..5d86ea8 100644 --- a/chartered-git/src/git/codec.rs +++ a/chartered-git/src/git/codec.rs @@ -107,7 +107,7 @@ assert_eq!( res, Some(super::GitCommand { - command: Bytes::from_static(b"agent=git/2.32.0\n"), + command: Bytes::from_static(b"agent=git/2.32.0"), metadata: vec![], }) ); diff --git a/chartered-git/src/git/mod.rs b/chartered-git/src/git/mod.rs index e06d861..06c3a1d 100644 --- a/chartered-git/src/git/mod.rs +++ a/chartered-git/src/git/mod.rs @@ -1,10 +1,10 @@ pub mod codec; pub mod packfile; use bytes::{BufMut, BytesMut}; use std::fmt::Write; -use self::packfile::PackFile; +use self::packfile::low_level::PackFile; pub enum PktLine<'a> { Data(&'a [u8]), diff --git a/chartered-git/src/git/packfile.rs b/chartered-git/src/git/packfile.rs deleted file mode 100644 index 464a9d0..0000000 100644 --- a/chartered-git/src/git/packfile.rs +++ /dev/null @@ -1,326 +1,0 @@ -use bytes::{BufMut, BytesMut}; -use flate2::{write::ZlibEncoder, Compression}; -use sha1::{ - digest::{generic_array::GenericArray, FixedOutputDirty}, - Digest, Sha1, -}; -use std::{convert::TryInto, fmt::Write, io::Write as IoWrite}; - -// The packfile itself is a very simple format. There is a header, a -// series of packed objects (each with it's own header and body) and -// then a checksum trailer. The first four bytes is the string 'PACK', -// which is sort of used to make sure you're getting the start of the -// packfile correctly. This is followed by a 4-byte packfile version -// number and then a 4-byte number of entries in that file. -pub struct PackFile<'a> { - entries: Vec>, -} - -impl<'a> PackFile<'a> { - #[must_use] - pub fn new(entries: Vec>) -> Self { - Self { entries } - } - - #[must_use] - pub const fn header_size() -> usize { - "PACK".len() + std::mem::size_of::() + std::mem::size_of::() - } - - #[must_use] - pub const fn footer_size() -> usize { - 20 - } - - pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { - let mut buf = original_buf.split_off(original_buf.len()); - buf.reserve(Self::header_size() + Self::footer_size()); - - // header - buf.extend_from_slice(b"PACK"); // magic header - buf.put_u32(2); // version - buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile - - // body - for entry in &self.entries { - entry.encode_to(&mut buf)?; - } - - // footer - buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); - - original_buf.unsplit(buf); - - Ok(()) - } -} - -#[derive(Debug)] -pub struct Commit<'a> { - pub tree: GenericArray::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray - // pub parent: [u8; 20], - pub author: CommitUserInfo<'a>, - pub committer: CommitUserInfo<'a>, - // pub gpgsig: &str, - pub message: &'a str, -} - -impl Commit<'_> { - fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { - let mut tree_hex = [0_u8; 20 * 2]; - hex::encode_to_slice(self.tree, &mut tree_hex)?; - - out.write_str("tree ")?; - out.extend_from_slice(&tree_hex); - out.write_char('\n')?; - - writeln!(out, "author {}", self.author.encode())?; - writeln!(out, "committer {}", self.committer.encode())?; - write!(out, "\n{}", self.message)?; - - Ok(()) - } - - #[must_use] - pub fn size(&self) -> usize { - let mut len = 0; - len += "tree ".len() + (self.tree.len() * 2) + "\n".len(); - len += "author ".len() + self.author.size() + "\n".len(); - len += "committer ".len() + self.committer.size() + "\n".len(); - len += "\n".len() + self.message.len(); - len - } -} - -#[derive(Copy, Clone, Debug)] -pub struct CommitUserInfo<'a> { - pub name: &'a str, - pub email: &'a str, - pub time: chrono::DateTime, -} - -impl CommitUserInfo<'_> { - fn encode(&self) -> String { - // TODO: remove `format!`, `format_args!`? - format!( - "{} <{}> {} +0000", - self.name, - self.email, - self.time.timestamp() - ) - } - - #[must_use] - pub fn size(&self) -> usize { - let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len(); - - self.name.len() - + "< ".len() - + self.email.len() - + "> ".len() - + timestamp_len - + " +0000".len() - } -} - -#[derive(Debug)] -pub enum TreeItemKind { - File, - Directory, -} - -impl TreeItemKind { - #[must_use] - pub const fn mode(&self) -> &'static str { - match self { - Self::File => "100644", - Self::Directory => "40000", - } - } -} - -#[derive(Debug)] -pub struct TreeItem<'a> { - pub kind: TreeItemKind, - pub name: &'a str, - pub hash: GenericArray::OutputSize>, // [u8; 20] - but we have to deal with GenericArrays -} - -// `[mode] [name]\0[hash]` -impl TreeItem<'_> { - fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { - out.write_str(self.kind.mode())?; - write!(out, " {}\0", self.name)?; - out.extend_from_slice(&self.hash); - Ok(()) - } - - #[must_use] - pub fn size(&self) -> usize { - self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len() - } -} - -#[derive(Debug)] -pub enum PackFileEntry<'a> { - // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc - // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c - // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 - // author Jordan Doyle 1630244577 +0100 - // committer Jordan Doyle 1630244577 +0100 - // gpgsig -----BEGIN PGP SIGNATURE----- - // - // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt - // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2 - // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ - // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6 - // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX - // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E - // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO - // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G - // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG - // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE - // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf - // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y= - // =fXoH - // -----END PGP SIGNATURE----- - // - // test - Commit(Commit<'a>), - // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc - // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� - // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� - Tree(Vec>), - // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc - // blob 23try and find me in .git - Blob(&'a [u8]), - // Tag, - // OfsDelta, - // RefDelta, -} - -impl PackFileEntry<'_> { - fn write_header(&self, buf: &mut BytesMut) { - let mut size = self.uncompressed_size(); - - // write header - { - let mut val = 0b1000_0000_u8; - - val |= match self { - Self::Commit(_) => 0b001, - Self::Tree(_) => 0b010, - Self::Blob(_) => 0b011, - // Self::Tag => 0b100, - // Self::OfsDelta => 0b110, - // Self::RefDelta => 0b111, - } << 4; - - // pack the 4 LSBs of the size into the header - #[allow(clippy::cast_possible_truncation)] // value is masked - { - val |= (size & 0b1111) as u8; - } - size >>= 4; - - buf.put_u8(val); - } - - // write size bytes - while size != 0 { - // read 7 LSBs from the `size` and push them off for the next iteration - #[allow(clippy::cast_possible_truncation)] // value is masked - let mut val = (size & 0b111_1111) as u8; - size >>= 7; - - if size != 0 { - // MSB set to 1 implies there's more size bytes to come, otherwise - // the data starts after this byte - val |= 1 << 7; - } - - buf.put_u8(val); - } - } - - pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> { - self.write_header(original_out); // TODO: this needs space reserving for it - - // todo is there a way to stream through the zlibencoder so we don't have to - // have this intermediate bytesmut and vec? - let mut out = BytesMut::new(); - - let size = self.uncompressed_size(); - original_out.reserve(size); - // the data ends up getting compressed but we'll need at least this many bytes - out.reserve(size); - - match self { - Self::Commit(commit) => { - commit.encode_to(&mut out)?; - } - Self::Tree(items) => { - for item in items { - item.encode_to(&mut out)?; - } - } - Self::Blob(data) => { - out.extend_from_slice(data); - } - } - - debug_assert_eq!(out.len(), size); - - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(&out)?; - let compressed_data = e.finish()?; - - original_out.extend_from_slice(&compressed_data); - - Ok(()) - } - - #[must_use] - pub fn uncompressed_size(&self) -> usize { - match self { - Self::Commit(commit) => commit.size(), - Self::Tree(items) => items.iter().map(TreeItem::size).sum(), - Self::Blob(data) => data.len(), - } - } - - // wen const generics for RustCrypto? :-( - pub fn hash( - &self, - ) -> Result::OutputSize>, anyhow::Error> { - let size = self.uncompressed_size(); - - let file_prefix = match self { - Self::Commit(_) => "commit", - Self::Tree(_) => "tree", - Self::Blob(_) => "blob", - }; - - let size_len = itoa::Buffer::new().format(size).len(); - - let mut out = - BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size); - - write!(out, "{} {}\0", file_prefix, size)?; - match self { - Self::Commit(commit) => { - commit.encode_to(&mut out)?; - } - Self::Tree(items) => { - for item in items { - item.encode_to(&mut out)?; - } - } - Self::Blob(blob) => { - out.extend_from_slice(blob); - } - } - - Ok(sha1::Sha1::digest(&out)) - } -} diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs new file mode 100644 index 0000000..b2af3e2 100644 --- /dev/null +++ a/chartered-git/src/git/packfile/high_level.rs @@ -1,0 +1,122 @@ +use indexmap::IndexMap; + +use super::low_level::{ + Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind, +}; + +#[derive(Default, Debug)] +pub struct Directory(IndexMap>); + +impl Directory { + fn into_packfile_entries<'a>( + &'a self, + pack_file: &mut IndexMap>, + ) -> HashOutput { + let mut tree = Vec::with_capacity(self.0.len()); + + for (name, item) in &self.0 { + tree.push(match item.as_ref() { + TreeItem::Blob(hash) => LowLevelTreeItem { + kind: TreeItemKind::File, + name: &name, + hash: *hash, + }, + TreeItem::Directory(dir) => LowLevelTreeItem { + kind: TreeItemKind::Directory, + name: &name, + hash: dir.into_packfile_entries(pack_file), + }, + }) + } + + let tree = PackFileEntry::Tree(tree); + let hash = tree.hash().unwrap(); + pack_file.insert(hash, tree); + + hash + } +} + +#[derive(Debug)] +pub enum TreeItem { + Blob(HashOutput), + Directory(Directory), +} + +#[derive(Default, Debug)] +pub struct GitRepository<'a> { + file_entries: IndexMap>, + tree: Directory, +} + +impl<'a> GitRepository<'a> { + pub fn insert(&mut self, path: Vec, file: String, content: &'a [u8]) { + let mut directory = &mut self.tree; + + for part in path { + let tree_item = directory + .0 + .entry(part) + .or_insert_with(|| Box::new(TreeItem::Directory(Directory::default()))); + + if let TreeItem::Directory(d) = tree_item.as_mut() { + directory = d; + } else { + panic!("one of the path items was a blob"); + } + } + + let entry = PackFileEntry::Blob(content); + + // todo: handle overwriting error + let file_hash = entry.hash().unwrap(); + directory + .0 + .insert(file, Box::new(TreeItem::Blob(file_hash))); + self.file_entries.insert(file_hash, entry); + } + + pub fn commit( + &'a mut self, + name: &'static str, + email: &'static str, + message: &'static str, + ) -> (HashOutput, Vec>) { + let tree_hash = self.tree.into_packfile_entries(&mut self.file_entries); + + let commit_user = CommitUserInfo { + name, + email, + time: chrono::Utc::now(), + }; + + let commit = PackFileEntry::Commit(Commit { + tree: tree_hash, + author: commit_user, + committer: commit_user, + message, + }); + + let commit_hash = commit.hash().unwrap(); + self.file_entries.insert(commit_hash, commit); + + // TODO: make PackFileEntry copy and remove this clone + (commit_hash, self.file_entries.values().cloned().collect()) + } +} + +#[cfg(test)] +mod test { + #[test] + fn test() { + let mut x = super::GitRepository::default(); + // x.insert(vec![], "a".to_string(), "nerd".as_ref()); + x.insert( + vec!["a".to_string(), "b".to_string()], + "c".to_string(), + "nerd".as_ref(), + ); + x.insert(vec![], "b".to_string(), "nerd".as_ref()); + panic!("{:#?}", x); + } +} diff --git a/chartered-git/src/git/packfile/low_level.rs b/chartered-git/src/git/packfile/low_level.rs new file mode 100644 index 0000000..2440925 100644 --- /dev/null +++ a/chartered-git/src/git/packfile/low_level.rs @@ -1,0 +1,326 @@ +use bytes::{BufMut, BytesMut}; +use flate2::{write::ZlibEncoder, Compression}; +use sha1::{ + digest::{generic_array::GenericArray, FixedOutputDirty}, + Digest, Sha1, +}; +use std::{convert::TryInto, fmt::Write, io::Write as IoWrite}; + +pub type HashOutput = GenericArray::OutputSize>; // [u8; 20], but sha-1 returns a GenericArray + +// The packfile itself is a very simple format. There is a header, a +// series of packed objects (each with it's own header and body) and +// then a checksum trailer. The first four bytes is the string 'PACK', +// which is sort of used to make sure you're getting the start of the +// packfile correctly. This is followed by a 4-byte packfile version +// number and then a 4-byte number of entries in that file. +pub struct PackFile<'a> { + entries: Vec>, +} + +impl<'a> PackFile<'a> { + #[must_use] + pub fn new(entries: Vec>) -> Self { + Self { entries } + } + + #[must_use] + pub const fn header_size() -> usize { + "PACK".len() + std::mem::size_of::() + std::mem::size_of::() + } + + #[must_use] + pub const fn footer_size() -> usize { + 20 + } + + pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { + let mut buf = original_buf.split_off(original_buf.len()); + buf.reserve(Self::header_size() + Self::footer_size()); + + // header + buf.extend_from_slice(b"PACK"); // magic header + buf.put_u32(2); // version + buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile + + // body + for entry in &self.entries { + entry.encode_to(&mut buf)?; + } + + // footer + buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); + + original_buf.unsplit(buf); + + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Commit<'a> { + pub tree: HashOutput, + // pub parent: [u8; 20], + pub author: CommitUserInfo<'a>, + pub committer: CommitUserInfo<'a>, + // pub gpgsig: &str, + pub message: &'a str, +} + +impl Commit<'_> { + fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { + let mut tree_hex = [0_u8; 20 * 2]; + hex::encode_to_slice(self.tree, &mut tree_hex)?; + + out.write_str("tree ")?; + out.extend_from_slice(&tree_hex); + out.write_char('\n')?; + + writeln!(out, "author {}", self.author.encode())?; + writeln!(out, "committer {}", self.committer.encode())?; + write!(out, "\n{}", self.message)?; + + Ok(()) + } + + #[must_use] + pub fn size(&self) -> usize { + let mut len = 0; + len += "tree ".len() + (self.tree.len() * 2) + "\n".len(); + len += "author ".len() + self.author.size() + "\n".len(); + len += "committer ".len() + self.committer.size() + "\n".len(); + len += "\n".len() + self.message.len(); + len + } +} + +#[derive(Copy, Clone, Debug)] +pub struct CommitUserInfo<'a> { + pub name: &'a str, + pub email: &'a str, + pub time: chrono::DateTime, +} + +impl CommitUserInfo<'_> { + fn encode(&self) -> String { + // TODO: remove `format!`, `format_args!`? + format!( + "{} <{}> {} +0000", + self.name, + self.email, + self.time.timestamp() + ) + } + + #[must_use] + pub fn size(&self) -> usize { + let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len(); + + self.name.len() + + "< ".len() + + self.email.len() + + "> ".len() + + timestamp_len + + " +0000".len() + } +} + +#[derive(Debug, Copy, Clone)] +pub enum TreeItemKind { + File, + Directory, +} + +impl TreeItemKind { + #[must_use] + pub const fn mode(&self) -> &'static str { + match self { + Self::File => "100644", + Self::Directory => "40000", + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct TreeItem<'a> { + pub kind: TreeItemKind, + pub name: &'a str, + pub hash: HashOutput, +} + +// `[mode] [name]\0[hash]` +impl TreeItem<'_> { + fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { + out.write_str(self.kind.mode())?; + write!(out, " {}\0", self.name)?; + out.extend_from_slice(&self.hash); + Ok(()) + } + + #[must_use] + pub fn size(&self) -> usize { + self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len() + } +} + +#[derive(Debug, Clone)] // could be copy but Vec> +pub enum PackFileEntry<'a> { + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc + // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c + // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 + // author Jordan Doyle 1630244577 +0100 + // committer Jordan Doyle 1630244577 +0100 + // gpgsig -----BEGIN PGP SIGNATURE----- + // + // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt + // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2 + // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ + // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6 + // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX + // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E + // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO + // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G + // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG + // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE + // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf + // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y= + // =fXoH + // -----END PGP SIGNATURE----- + // + // test + Commit(Commit<'a>), + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc + // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� + // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� + Tree(Vec>), + // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc + // blob 23try and find me in .git + Blob(&'a [u8]), + // Tag, + // OfsDelta, + // RefDelta, +} + +impl PackFileEntry<'_> { + fn write_header(&self, buf: &mut BytesMut) { + let mut size = self.uncompressed_size(); + + // write header + { + let mut val = 0b1000_0000_u8; + + val |= match self { + Self::Commit(_) => 0b001, + Self::Tree(_) => 0b010, + Self::Blob(_) => 0b011, + // Self::Tag => 0b100, + // Self::OfsDelta => 0b110, + // Self::RefDelta => 0b111, + } << 4; + + // pack the 4 LSBs of the size into the header + #[allow(clippy::cast_possible_truncation)] // value is masked + { + val |= (size & 0b1111) as u8; + } + size >>= 4; + + buf.put_u8(val); + } + + // write size bytes + while size != 0 { + // read 7 LSBs from the `size` and push them off for the next iteration + #[allow(clippy::cast_possible_truncation)] // value is masked + let mut val = (size & 0b111_1111) as u8; + size >>= 7; + + if size != 0 { + // MSB set to 1 implies there's more size bytes to come, otherwise + // the data starts after this byte + val |= 1 << 7; + } + + buf.put_u8(val); + } + } + + pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> { + self.write_header(original_out); // TODO: this needs space reserving for it + + // todo is there a way to stream through the zlibencoder so we don't have to + // have this intermediate bytesmut and vec? + let mut out = BytesMut::new(); + + let size = self.uncompressed_size(); + original_out.reserve(size); + // the data ends up getting compressed but we'll need at least this many bytes + out.reserve(size); + + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(data) => { + out.extend_from_slice(data); + } + } + + debug_assert_eq!(out.len(), size); + + let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); + e.write_all(&out)?; + let compressed_data = e.finish()?; + + original_out.extend_from_slice(&compressed_data); + + Ok(()) + } + + #[must_use] + pub fn uncompressed_size(&self) -> usize { + match self { + Self::Commit(commit) => commit.size(), + Self::Tree(items) => items.iter().map(TreeItem::size).sum(), + Self::Blob(data) => data.len(), + } + } + + // wen const generics for RustCrypto? :-( + pub fn hash(&self) -> Result { + let size = self.uncompressed_size(); + + let file_prefix = match self { + Self::Commit(_) => "commit", + Self::Tree(_) => "tree", + Self::Blob(_) => "blob", + }; + + let size_len = itoa::Buffer::new().format(size).len(); + + let mut out = + BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size); + + write!(out, "{} {}\0", file_prefix, size)?; + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(blob) => { + out.extend_from_slice(blob); + } + } + + Ok(sha1::Sha1::digest(&out)) + } +} diff --git a/chartered-git/src/git/packfile/mod.rs b/chartered-git/src/git/packfile/mod.rs new file mode 100644 index 0000000..a70e0a8 100644 --- /dev/null +++ a/chartered-git/src/git/packfile/mod.rs @@ -1,0 +1,2 @@ +pub mod high_level; +pub mod low_level; -- rgit 0.1.3