From e978b2dc5592168cb22f1adda5f1035874726c77 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 05 Sep 2021 19:40:37 +0100 Subject: [PATCH] First iteration of crate metadata from database --- Cargo.lock | 19 +++++++++++++++++++ chartered-db/Cargo.toml | 1 + chartered-git/Cargo.toml | 4 ++++ chartered-db/src/lib.rs | 46 ++++++++++++++++++++++++++++++++++++++-------- chartered-git/src/main.rs | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- chartered-git/src/git/packfile.rs | 4 ++++ 6 files changed, 203 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fed164..9779f0a 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -189,6 +189,7 @@ "chartered-fs", "diesel", "dotenv", + "itertools", "tokio", ] @@ -208,6 +209,7 @@ "anyhow", "async-trait", "bytes", + "chartered-db", "chrono", "const-sha1", "crc", @@ -217,6 +219,8 @@ "futures", "hex", "itoa", + "serde", + "serde_json", "sha-1", "thrussh", "thrussh-keys", @@ -394,6 +398,12 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "env_logger" @@ -668,6 +678,15 @@ checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d" dependencies = [ "cfg-if", +] + +[[package]] +name = "itertools" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +dependencies = [ + "either", ] [[package]] diff --git a/chartered-db/Cargo.toml b/chartered-db/Cargo.toml index 4b4f1f2..19c613d 100644 --- a/chartered-db/Cargo.toml +++ a/chartered-db/Cargo.toml @@ -9,5 +9,6 @@ chartered-fs = { path = "../chartered-fs" } diesel = { version = "1", features = ["sqlite", "r2d2"] } +itertools = "0.10" tokio = { version = "1" } dotenv = "0.15" diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml index 688bb7b..97cdfa5 100644 --- a/chartered-git/Cargo.toml +++ a/chartered-git/Cargo.toml @@ -6,6 +6,8 @@ # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +chartered-db = { path = "../chartered-db" } + anyhow = "1" async-trait = "0" bytes = "1" @@ -18,6 +20,8 @@ futures = "0.3" hex = "0.4" itoa = "0.4" +serde = { version = "1", features = ["derive"] } +serde_json = "1" sha-1 = "0.9" thrussh = "0.33" thrussh-keys = "0.21" diff --git a/chartered-db/src/lib.rs b/chartered-db/src/lib.rs index 4e28847..c95e1a2 100644 --- a/chartered-db/src/lib.rs +++ a/chartered-db/src/lib.rs @@ -1,11 +1,17 @@ pub mod schema; #[macro_use] extern crate diesel; -use diesel::{Associations, Identifiable, Queryable, insert_into, insert_or_ignore_into, prelude::*, r2d2::{ConnectionManager, Pool}}; +use diesel::{ + insert_into, insert_or_ignore_into, + prelude::*, + r2d2::{ConnectionManager, Pool}, + Associations, Identifiable, Queryable, +}; +use itertools::Itertools; use schema::{crate_versions, crates}; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; pub type ConnectionPool = Arc>>; @@ -13,20 +19,20 @@ Arc::new(Pool::new(ConnectionManager::new("chartered.db")).unwrap()) } -#[derive(Identifiable, Queryable, PartialEq, Debug)] +#[derive(Identifiable, Queryable, PartialEq, Eq, Hash, Debug)] pub struct Crate { - id: i32, - name: String, + pub id: i32, + pub name: String, } #[derive(Identifiable, Queryable, Associations, PartialEq, Debug)] #[belongs_to(Crate)] pub struct CrateVersion { - id: i32, - crate_id: i32, - version: String, - filesystem_object: String, - yanked: bool, + pub id: i32, + pub crate_id: i32, + pub version: String, + pub filesystem_object: String, + pub yanked: bool, } pub async fn get_crate_versions(conn: ConnectionPool, crate_name: String) -> Vec { @@ -39,11 +45,25 @@ .filter(name.eq(crate_name)) .first::(&conn) .expect("no crate"); - let selected_crate_versions = CrateVersion::belonging_to(&selected_crate) + + CrateVersion::belonging_to(&selected_crate) .load::(&conn) - .expect("no crate versions"); + .expect("no crate versions") + }) + .await + .unwrap() +} + +pub async fn get_crates(conn: ConnectionPool) -> HashMap> { + tokio::task::spawn_blocking(move || { + let conn = conn.get().unwrap(); + + let crate_versions = crates::table + .inner_join(crate_versions::table) + .load(&conn) + .unwrap(); - selected_crate_versions + crate_versions.into_iter().into_grouping_map().collect() }) .await .unwrap() diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs index 48bf033..71cafd5 100644 --- a/chartered-git/src/main.rs +++ a/chartered-git/src/main.rs @@ -9,7 +9,9 @@ }; use bytes::BytesMut; +use chrono::TimeZone; use futures::future::Future; +use std::collections::BTreeMap; use std::{fmt::Write, pin::Pin, sync::Arc}; use thrussh::{ server::{self, Auth, Session}, @@ -26,27 +28,39 @@ let mut config = thrussh::server::Config::default(); config.keys.push(key::KeyPair::generate_ed25519().unwrap()); let config = Arc::new(config); - thrussh::server::run(config, "127.0.0.1:2233", Server) + + let server = Server { + db: chartered_db::init(), + }; + + thrussh::server::run(config, "127.0.0.1:2233", server) .await .unwrap(); } #[derive(Clone)] -struct Server; +struct Server { + db: chartered_db::ConnectionPool, +} impl server::Server for Server { type Handler = Handler; fn new(&mut self, _: Option) -> Self::Handler { - Handler::default() + Handler { + codec: GitCodec::default(), + input_bytes: BytesMut::default(), + output_bytes: BytesMut::default(), + db: self.db.clone(), + } } } -#[derive(Default)] struct Handler { codec: GitCodec, input_bytes: BytesMut, output_bytes: BytesMut, + db: chartered_db::ConnectionPool, } impl Handler { @@ -169,51 +183,43 @@ // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n" - let test_crate_file = PackFileEntry::Blob(br#"{"name":"charteredtest","vers":"1.0.0","deps":[],"cksum":"7b821735f0211fd00032a9892d1bf2323c9d05d9c59b9303eb382f5ec1898bfc","features":{},"yanked":false,"links":null}"#); + let mut pack_file_entries = Vec::new(); + let mut root_tree = Vec::new(); + let config_file = PackFileEntry::Blob( - br#"{ - "dl": "http://127.0.0.1:8888/a/abc/api/v1/crates", - "api": "http://127.0.0.1:8888/a/abc" - }"#, + br#"{"dl":"http://127.0.0.1:8888/a/abc/api/v1/crates","api":"http://127.0.0.1:8888/a/abc"}"#, ); - let ch_ar_tree = PackFileEntry::Tree(vec![TreeItem { + root_tree.push(TreeItem { kind: TreeItemKind::File, - name: "charteredtest", - hash: test_crate_file.hash()?, - }]); - - let ch_tree = PackFileEntry::Tree(vec![TreeItem { - kind: TreeItemKind::Directory, - name: "ar", - hash: ch_ar_tree.hash()?, - }]); - - let root_tree = PackFileEntry::Tree(vec![ - TreeItem { - kind: TreeItemKind::Directory, - name: "ch", - hash: ch_tree.hash()?, - }, - TreeItem { - kind: TreeItemKind::File, - name: "config.json", - hash: config_file.hash()?, - }, - ]); + name: "config.json", + hash: config_file.hash()?, + }); + pack_file_entries.push(config_file); + // todo: this needs caching and filtering + let tree = fetch_tree(self.db.clone()).await; + build_tree(&mut root_tree, &mut pack_file_entries, &tree)?; + + let root_tree = PackFileEntry::Tree(root_tree); + let root_tree_hash = root_tree.hash()?; + pack_file_entries.push(root_tree); + let commit_user = CommitUserInfo { name: "Jordan Doyle", email: "jordan@doyle.la", - time: chrono::Utc::now(), + time: chrono::Utc.ymd(2021, 9, 8).and_hms(17, 46, 1), }; - let commit = PackFileEntry::Commit(Commit { - tree: root_tree.hash()?, + tree: root_tree_hash, author: commit_user, committer: commit_user, - message: "cool commit", + message: "Most recent crates", }); + let commit_hash = commit.hash()?; + pack_file_entries.push(commit); + + eprintln!("commit hash: {}", hex::encode(&commit_hash)); // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010009peel\n000csymrefs\n000bunborn\n0014ref-prefix HEAD\n0019ref-prefix refs/HEAD\n001eref-prefix refs/tags/HEAD\n001fref-prefix refs/heads/HEAD\n0021ref-prefix refs/remotes/HEAD\n0026ref-prefix refs/remotes/HEAD/HEAD\n001aref-prefix refs/tags/\n0000" // GIT_PROTOCOL=version=2 ssh -o SendEnv=GIT_PROTOCOL git@github.com git-upload-pack '/w4/chartered.git' @@ -222,7 +228,7 @@ // sends a 000dpackfile back // https://shafiul.github.io/gitbook/7_the_packfile.html if ls_refs { - let commit_hash = hex::encode(&commit.hash()?); + let commit_hash = hex::encode(&commit_hash); self.write(PktLine::Data( format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(), ))?; @@ -245,14 +251,7 @@ self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?; self.flush(&mut session, channel); - let packfile = git::packfile::PackFile::new(vec![ - commit, - test_crate_file, - ch_tree, - ch_ar_tree, - config_file, - root_tree, - ]); + let packfile = git::packfile::PackFile::new(pack_file_entries); self.write(PktLine::SidebandData(packfile))?; self.write(PktLine::Flush)?; self.flush(&mut session, channel); @@ -264,5 +263,103 @@ Ok((self, session)) }) + } +} + +#[derive(serde::Serialize)] +pub struct CrateFileEntry<'a> { + name: &'a str, + vers: &'a str, + deps: &'a [&'a str], + cksum: &'a str, + features: BTreeMap>, + yanked: bool, + links: Option<()>, +} + +async fn fetch_tree( + db: chartered_db::ConnectionPool, +) -> BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap>> { + let mut tree: BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap>> = BTreeMap::new(); + + // todo: handle files with 1/2/3 characters + for (crate_def, versions) in chartered_db::get_crates(db).await { + let mut name_chars = crate_def.name.as_bytes().iter(); + let first_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()]; + let second_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()]; + + let first_dir = tree.entry(first_dir).or_default(); + let second_dir = first_dir.entry(second_dir).or_default(); + + let mut file = String::new(); + for version in versions { + let entry = CrateFileEntry { + name: &crate_def.name, + vers: &version.version, + deps: &[], + cksum: "cool-checksum-dude", + features: BTreeMap::new(), + yanked: version.yanked, + links: None, + }; + + file.push_str(&serde_json::to_string(&entry).unwrap()); + file.push('\n'); + } + + second_dir.insert(crate_def.name, file); + } + + tree +} + +fn build_tree<'a>( + root_tree: &mut Vec>, + pack_file_entries: &mut Vec>, + tree: &'a BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap>>, +) -> Result<(), anyhow::Error> { + root_tree.reserve(tree.len()); + pack_file_entries.reserve(tree.iter().map(|(_, v)| 1 + v.len()).sum::() + tree.len()); + + for (first_level_dir, second_level_dirs) in tree.iter() { + let mut first_level_tree = Vec::with_capacity(second_level_dirs.len()); + + for (second_level_dir, crates) in second_level_dirs.iter() { + let mut second_level_tree = Vec::with_capacity(crates.len()); + + for (crate_name, versions_def) in crates.iter() { + let file = PackFileEntry::Blob(versions_def.as_ref()); + let file_hash = file.hash()?; + pack_file_entries.push(file); + + second_level_tree.push(TreeItem { + kind: TreeItemKind::File, + name: crate_name, + hash: file_hash, + }); + } + + let second_level_tree = PackFileEntry::Tree(second_level_tree); + let second_level_tree_hash = second_level_tree.hash()?; + pack_file_entries.push(second_level_tree); + + first_level_tree.push(TreeItem { + kind: TreeItemKind::Directory, + name: std::str::from_utf8(second_level_dir)?, + hash: second_level_tree_hash, + }); + } + + let first_level_tree = PackFileEntry::Tree(first_level_tree); + let first_level_tree_hash = first_level_tree.hash()?; + pack_file_entries.push(first_level_tree); + + root_tree.push(TreeItem { + kind: TreeItemKind::Directory, + name: std::str::from_utf8(first_level_dir)?, + hash: first_level_tree_hash, + }); } + + Ok(()) } diff --git a/chartered-git/src/git/packfile.rs b/chartered-git/src/git/packfile.rs index 99fccd7..464a9d0 100644 --- a/chartered-git/src/git/packfile.rs +++ a/chartered-git/src/git/packfile.rs @@ -55,6 +55,7 @@ } } +#[derive(Debug)] pub struct Commit<'a> { pub tree: GenericArray::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray // pub parent: [u8; 20], @@ -122,6 +123,7 @@ } } +#[derive(Debug)] pub enum TreeItemKind { File, Directory, @@ -137,6 +139,7 @@ } } +#[derive(Debug)] pub struct TreeItem<'a> { pub kind: TreeItemKind, pub name: &'a str, @@ -158,6 +161,7 @@ } } +#[derive(Debug)] pub enum PackFileEntry<'a> { // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c -- rgit 0.1.3