From 8a38228920b7edb4e161db96c68b4f84781caa26 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Fri, 08 Oct 2021 02:45:46 +0100 Subject: [PATCH] abstract out tree building --- Cargo.lock | 7 +++++++ chartered-git/Cargo.toml | 1 + chartered-web/.gitignore | 1 + chartered-git/src/generators.rs | 1 - chartered-git/src/main.rs | 92 +++++++++----------------------------------------------------------------------- chartered-git/src/tree.rs | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ chartered-types/src/cargo.rs | 8 ++++++-- chartered-git/src/git/packfile/high_level.rs | 40 +++++++++++++++++----------------------- chartered-web/src/endpoints/cargo_api/publish.rs | 13 ++++++++++--- 9 files changed, 152 insertions(+), 116 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 15f876d..4abddc8 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -47,6 +47,12 @@ checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" [[package]] +name = "arrayvec" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" + +[[package]] name = "async-trait" version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -291,6 +297,7 @@ version = "0.1.0" dependencies = [ "anyhow", + "arrayvec", "async-trait", "bytes", "chartered-db", diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml index 2258169..83143b7 100644 --- a/chartered-git/Cargo.toml +++ a/chartered-git/Cargo.toml @@ -10,6 +10,7 @@ chartered-types = { path = "../chartered-types" } anyhow = "1" +arrayvec = "0.7" async-trait = "0" bytes = "1" chrono = "0.4" diff --git a/chartered-web/.gitignore b/chartered-web/.gitignore new file mode 100644 index 0000000..5b6c096 100644 --- /dev/null +++ a/chartered-web/.gitignore @@ -1,0 +1,1 @@ +config.toml diff --git a/chartered-git/src/generators.rs b/chartered-git/src/generators.rs index cc987ba..32bc87c 100644 --- a/chartered-git/src/generators.rs +++ a/chartered-git/src/generators.rs @@ -1,5 +1,4 @@ use serde::Serialize; -use url::Url; #[derive(Serialize, Debug, Clone)] pub struct CargoConfig { diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index e982f05..3dc69f3 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -1,25 +1,23 @@ #![deny(clippy::pedantic)] mod generators; #[allow(clippy::missing_errors_doc)] pub mod git; +mod tree; use crate::{ generators::CargoConfig, git::{ codec::{Encoder, GitCodec}, - packfile::{ - high_level::GitRepository, - low_level::{Commit, CommitUserInfo, PackFile, PackFileEntry, TreeItem, TreeItemKind}, - }, + packfile::{high_level::GitRepository, low_level::PackFile}, PktLine, }, + tree::Tree, }; +use arrayvec::ArrayVec; use bytes::BytesMut; -use chrono::TimeZone; use futures::future::Future; use log::warn; -use std::collections::BTreeMap; use std::{fmt::Write, pin::Pin, sync::Arc}; use thrussh::{ server::{self, Auth, Session}, @@ -293,12 +291,12 @@ org_name, ); let config = serde_json::to_vec(&config)?; - packfile.insert(vec![], "config.json".to_string(), &config); + packfile.insert(ArrayVec::<_, 0>::new(), "config.json", &config); // todo: the whole tree needs caching and then we can filter in code rather than at // the database - let tree = fetch_tree(self.db.clone(), authed.user.id, org_name.to_string()).await; - build_tree(&mut packfile, &tree)?; + let tree = Tree::build(self.db.clone(), authed.user.id, org_name.to_string()).await; + tree.write_to_packfile(&mut packfile); let (commit_hash, packfile_entries) = packfile.commit("computer", "john@computer.no", "Update crates"); @@ -348,80 +346,4 @@ Ok((self, session)) }) } -} - -#[derive(serde::Serialize)] -pub struct CrateFileEntry<'a> { - #[serde(flatten)] - inner: &'a chartered_types::cargo::CrateVersion<'a>, - cksum: &'a str, - yanked: bool, -} - -pub type TwoCharTree = BTreeMap<[u8; 2], T>; - -async fn fetch_tree( - db: chartered_db::ConnectionPool, - user_id: i32, - org_name: String, -) -> TwoCharTree>> { - use chartered_db::crates::Crate; - - let mut tree: TwoCharTree>> = BTreeMap::new(); - - // todo: handle files with 1/2/3 characters - for (crate_def, versions) in Crate::list_with_versions(db, user_id, org_name) - .await - .unwrap() - { - let mut name_chars = crate_def.name.as_bytes().iter(); - let first_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()]; - let second_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()]; - - let first_dir = tree.entry(first_dir).or_default(); - let second_dir = first_dir.entry(second_dir).or_default(); - - let mut file = String::new(); - for version in versions { - let cksum = version.checksum.clone(); - let yanked = version.yanked; - let version = version.into_cargo_format(&crate_def); - - let entry = CrateFileEntry { - inner: &version, - cksum: &cksum, - yanked, - }; - - file.push_str(&serde_json::to_string(&entry).unwrap()); - file.push('\n'); - } - - second_dir.insert(crate_def.name, file); - } - - tree -} - -fn build_tree<'a>( - packfile: &mut GitRepository<'a>, - tree: &'a TwoCharTree>>, -) -> Result<(), anyhow::Error> { - for (first_level_dir, second_level_dirs) in tree.iter() { - let first_level_dir = std::str::from_utf8(first_level_dir)?; - - for (second_level_dir, crates) in second_level_dirs.iter() { - let second_level_dir = std::str::from_utf8(second_level_dir)?; - - for (crate_name, versions_def) in crates.iter() { - packfile.insert( - vec![first_level_dir.to_string(), second_level_dir.to_string()], - crate_name.to_string(), - versions_def.as_ref(), - ); - } - } - } - - Ok(()) } diff --git a/chartered-git/src/tree.rs b/chartered-git/src/tree.rs new file mode 100644 index 0000000..b6e4975 100644 --- /dev/null +++ a/chartered-git/src/tree.rs @@ -1,0 +1,105 @@ +//! Generates the Git folder/file tree that's returned back to the user +//! containing the config & crate manifests. + +use crate::git::packfile::high_level::GitRepository; +use arrayvec::ArrayVec; +use chartered_db::crates::Crate; +use std::collections::BTreeMap; + +#[derive(serde::Serialize)] +pub struct CrateFileEntry<'a> { + #[serde(flatten)] + inner: &'a chartered_types::cargo::CrateVersion<'a>, + cksum: &'a str, + yanked: bool, +} + +pub struct Tree { + crates: BTreeMap, +} + +impl Tree { + pub async fn build(db: chartered_db::ConnectionPool, user_id: i32, org_name: String) -> Self { + let mut crates = BTreeMap::new(); + + for (crate_def, versions) in Crate::list_with_versions(db, user_id, org_name) + .await + .unwrap() + { + let mut file = String::new(); + + for version in versions { + let cksum = version.checksum.clone(); + let yanked = version.yanked; + let version = version.into_cargo_format(&crate_def); + + let entry = CrateFileEntry { + inner: &version, + cksum: &cksum, + yanked, + }; + + file.push_str(&serde_json::to_string(&entry).unwrap()); + file.push('\n'); + } + + crates.insert(crate_def.name, file); + } + + Self { crates } + } + + pub fn write_to_packfile<'a>(&'a self, repo: &mut GitRepository<'a>) { + for (name, content) in &self.crates { + let crate_folder = get_crate_folder(&name); + repo.insert(crate_folder, &name, content.as_bytes()); + } + } +} + +fn get_crate_folder(crate_name: &str) -> ArrayVec<&str, 2> { + let mut folders = ArrayVec::new(); + + match crate_name.len() { + 0 => {} + 1 => folders.push("1"), + 2 => folders.push("2"), + 3 => folders.push("3"), + _ => { + folders.push(&crate_name[..2]); + folders.push(&crate_name[2..4]); + } + } + + folders +} + +#[cfg(test)] +mod test { + #[test] + fn get_crate_folder() { + let folder = super::get_crate_folder(""); + let mut folder = folder.iter(); + assert_eq!(folder.next(), None); + + let folder = super::get_crate_folder("a"); + let mut folder = folder.iter(); + assert_eq!(folder.next(), Some(&"1")); + assert_eq!(folder.next(), None); + + let folder = super::get_crate_folder("ab"); + let mut folder = folder.iter(); + assert_eq!(folder.next(), Some(&"2")); + assert_eq!(folder.next(), None); + + let folder = super::get_crate_folder("abc"); + let mut folder = folder.iter(); + assert_eq!(folder.next(), Some(&"3")); + assert_eq!(folder.next(), None); + + let folder = super::get_crate_folder("abcd"); + let mut folder = folder.iter(); + assert_eq!(folder.next(), Some(&"ab")); + assert_eq!(folder.next(), Some(&"cd")); + } +} diff --git a/chartered-types/src/cargo.rs b/chartered-types/src/cargo.rs index 0c907b5..23c9d61 100644 --- a/chartered-types/src/cargo.rs +++ a/chartered-types/src/cargo.rs @@ -45,7 +45,7 @@ pub optional: bool, pub default_features: bool, pub target: Option>, // a string such as "cfg(windows)" - pub kind: Cow<'a, str>, // dev, build or normal + pub kind: Cow<'a, str>, // dev, build or normal #[serde(skip_serializing_if = "Option::is_none")] pub registry: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -57,7 +57,11 @@ CrateDependency { name: Cow::Owned(self.name.into_owned()), req: Cow::Owned(self.req.into_owned()), - features: self.features.into_iter().map(|v| Cow::Owned(v.into_owned())).collect(), + features: self + .features + .into_iter() + .map(|v| Cow::Owned(v.into_owned())) + .collect(), optional: self.optional, default_features: self.default_features, target: self.target.map(|v| Cow::Owned(v.into_owned())), diff --git a/chartered-git/src/git/packfile/high_level.rs b/chartered-git/src/git/packfile/high_level.rs index b2af3e2..2e4deba 100644 --- a/chartered-git/src/git/packfile/high_level.rs +++ a/chartered-git/src/git/packfile/high_level.rs @@ -1,3 +1,4 @@ +use arrayvec::ArrayVec; use indexmap::IndexMap; use super::low_level::{ @@ -5,11 +6,11 @@ }; #[derive(Default, Debug)] -pub struct Directory(IndexMap>); - -impl Directory { - fn into_packfile_entries<'a>( - &'a self, +pub struct Directory<'a>(IndexMap<&'a str, Box>>); + +impl<'a> Directory<'a> { + fn into_packfile_entries( + &self, pack_file: &mut IndexMap>, ) -> HashOutput { let mut tree = Vec::with_capacity(self.0.len()); @@ -38,19 +39,24 @@ } #[derive(Debug)] -pub enum TreeItem { +pub enum TreeItem<'a> { Blob(HashOutput), - Directory(Directory), + Directory(Directory<'a>), } #[derive(Default, Debug)] pub struct GitRepository<'a> { file_entries: IndexMap>, - tree: Directory, + tree: Directory<'a>, } impl<'a> GitRepository<'a> { - pub fn insert(&mut self, path: Vec, file: String, content: &'a [u8]) { + pub fn insert( + &mut self, + path: ArrayVec<&'a str, N>, + file: &'a str, + content: &'a [u8], + ) { let mut directory = &mut self.tree; for part in path { @@ -102,21 +108,5 @@ // TODO: make PackFileEntry copy and remove this clone (commit_hash, self.file_entries.values().cloned().collect()) - } -} - -#[cfg(test)] -mod test { - #[test] - fn test() { - let mut x = super::GitRepository::default(); - // x.insert(vec![], "a".to_string(), "nerd".as_ref()); - x.insert( - vec!["a".to_string(), "b".to_string()], - "c".to_string(), - "nerd".as_ref(), - ); - x.insert(vec![], "b".to_string(), "nerd".as_ref()); - panic!("{:#?}", x); } } diff --git a/chartered-web/src/endpoints/cargo_api/publish.rs b/chartered-web/src/endpoints/cargo_api/publish.rs index 9de7c80..83b4ebd 100644 --- a/chartered-web/src/endpoints/cargo_api/publish.rs +++ a/chartered-web/src/endpoints/cargo_api/publish.rs @@ -1,8 +1,8 @@ use axum::extract; use bytes::Bytes; use chartered_db::{crates::Crate, users::User, ConnectionPool}; use chartered_fs::FileSystem; -use chartered_types::cargo::{CrateFeatures, CrateVersion, CrateDependency}; +use chartered_types::cargo::{CrateDependency, CrateFeatures, CrateVersion}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::{borrow::Cow, convert::TryInto, sync::Arc}; @@ -171,7 +171,10 @@ impl From> for CrateDependency<'static> { fn from(us: MetadataCrateDependency<'_>) -> CrateDependency<'static> { let (name, package) = if let Some(explicit_name_in_toml) = us.explicit_name_in_toml { - (explicit_name_in_toml.into_owned(), Some(us.name.into_owned())) + ( + explicit_name_in_toml.into_owned(), + Some(us.name.into_owned()), + ) } else { (us.name.into_owned(), None) }; @@ -179,7 +182,11 @@ Self { name: Cow::Owned(name), req: Cow::Owned(us.version_req.into_owned()), - features: us.features.into_iter().map(|v| Cow::Owned(v.into_owned())).collect(), + features: us + .features + .into_iter() + .map(|v| Cow::Owned(v.into_owned())) + .collect(), optional: us.optional, default_features: us.default_features, target: us.target.map(|v| Cow::Owned(v.into_owned())), -- rgit 0.1.3