🏡 index : ~doyle/chartered.git

author Jordan Doyle <jordan@doyle.la> 2021-09-05 19:40:37.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-09-05 19:43:31.0 +01:00:00
commit
e978b2dc5592168cb22f1adda5f1035874726c77 [patch]
tree
8f1f6b3831b55f5bf0fd3c9edb03cf8b2c92eabd
parent
603d7a9c72ee401ba80b53c3de7651a57ad73039
download
e978b2dc5592168cb22f1adda5f1035874726c77.tar.gz

First iteration of crate metadata from database



Diff

 Cargo.lock                        |  19 +++++++++++++++++++
 chartered-db/Cargo.toml           |   1 +
 chartered-git/Cargo.toml          |   4 ++++
 chartered-db/src/lib.rs           |  46 ++++++++++++++++++++++++++++++++++++++--------
 chartered-git/src/main.rs         | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
 chartered-git/src/git/packfile.rs |   4 ++++
 6 files changed, 203 insertions(+), 58 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5fed164..9779f0a 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -189,6 +189,7 @@
 "chartered-fs",
 "diesel",
 "dotenv",
 "itertools",
 "tokio",
]

@@ -208,6 +209,7 @@
 "anyhow",
 "async-trait",
 "bytes",
 "chartered-db",
 "chrono",
 "const-sha1",
 "crc",
@@ -217,6 +219,8 @@
 "futures",
 "hex",
 "itoa",
 "serde",
 "serde_json",
 "sha-1",
 "thrussh",
 "thrussh-keys",
@@ -394,6 +398,12 @@
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"

[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"

[[package]]
name = "env_logger"
@@ -668,6 +678,15 @@
checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d"
dependencies = [
 "cfg-if",
]

[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
 "either",
]

[[package]]
diff --git a/chartered-db/Cargo.toml b/chartered-db/Cargo.toml
index 4b4f1f2..19c613d 100644
--- a/chartered-db/Cargo.toml
+++ a/chartered-db/Cargo.toml
@@ -9,5 +9,6 @@
chartered-fs = { path = "../chartered-fs" }

diesel = { version = "1", features = ["sqlite", "r2d2"] }
itertools = "0.10"
tokio = { version = "1" }
dotenv = "0.15"
diff --git a/chartered-git/Cargo.toml b/chartered-git/Cargo.toml
index 688bb7b..97cdfa5 100644
--- a/chartered-git/Cargo.toml
+++ a/chartered-git/Cargo.toml
@@ -6,6 +6,8 @@
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
chartered-db = { path = "../chartered-db" }

anyhow = "1"
async-trait = "0"
bytes = "1"
@@ -18,6 +20,8 @@
futures = "0.3"
hex = "0.4"
itoa = "0.4"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha-1 = "0.9"
thrussh = "0.33"
thrussh-keys = "0.21"
diff --git a/chartered-db/src/lib.rs b/chartered-db/src/lib.rs
index 4e28847..c95e1a2 100644
--- a/chartered-db/src/lib.rs
+++ a/chartered-db/src/lib.rs
@@ -1,11 +1,17 @@
pub mod schema;

#[macro_use]
extern crate diesel;

use diesel::{Associations, Identifiable, Queryable, insert_into, insert_or_ignore_into, prelude::*, r2d2::{ConnectionManager, Pool}};
use diesel::{
    insert_into, insert_or_ignore_into,
    prelude::*,
    r2d2::{ConnectionManager, Pool},
    Associations, Identifiable, Queryable,
};
use itertools::Itertools;
use schema::{crate_versions, crates};
use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};

pub type ConnectionPool = Arc<Pool<ConnectionManager<diesel::SqliteConnection>>>;

@@ -13,20 +19,20 @@
    Arc::new(Pool::new(ConnectionManager::new("chartered.db")).unwrap())
}

#[derive(Identifiable, Queryable, PartialEq, Debug)]
#[derive(Identifiable, Queryable, PartialEq, Eq, Hash, Debug)]
pub struct Crate {
    id: i32,
    name: String,
    pub id: i32,
    pub name: String,
}

#[derive(Identifiable, Queryable, Associations, PartialEq, Debug)]
#[belongs_to(Crate)]
pub struct CrateVersion {
    id: i32,
    crate_id: i32,
    version: String,
    filesystem_object: String,
    yanked: bool,
    pub id: i32,
    pub crate_id: i32,
    pub version: String,
    pub filesystem_object: String,
    pub yanked: bool,
}

pub async fn get_crate_versions(conn: ConnectionPool, crate_name: String) -> Vec<CrateVersion> {
@@ -39,11 +45,25 @@
            .filter(name.eq(crate_name))
            .first::<Crate>(&conn)
            .expect("no crate");
        let selected_crate_versions = CrateVersion::belonging_to(&selected_crate)

        CrateVersion::belonging_to(&selected_crate)
            .load::<CrateVersion>(&conn)
            .expect("no crate versions");
            .expect("no crate versions")
    })
    .await
    .unwrap()
}

pub async fn get_crates(conn: ConnectionPool) -> HashMap<Crate, Vec<CrateVersion>> {
    tokio::task::spawn_blocking(move || {
        let conn = conn.get().unwrap();

        let crate_versions = crates::table
            .inner_join(crate_versions::table)
            .load(&conn)
            .unwrap();

        selected_crate_versions
        crate_versions.into_iter().into_grouping_map().collect()
    })
    .await
    .unwrap()
diff --git a/chartered-git/src/main.rs b/chartered-git/src/main.rs
index 48bf033..71cafd5 100644
--- a/chartered-git/src/main.rs
+++ a/chartered-git/src/main.rs
@@ -9,7 +9,9 @@
};

use bytes::BytesMut;
use chrono::TimeZone;
use futures::future::Future;
use std::collections::BTreeMap;
use std::{fmt::Write, pin::Pin, sync::Arc};
use thrussh::{
    server::{self, Auth, Session},
@@ -26,27 +28,39 @@
    let mut config = thrussh::server::Config::default();
    config.keys.push(key::KeyPair::generate_ed25519().unwrap());
    let config = Arc::new(config);
    thrussh::server::run(config, "127.0.0.1:2233", Server)

    let server = Server {
        db: chartered_db::init(),
    };

    thrussh::server::run(config, "127.0.0.1:2233", server)
        .await
        .unwrap();
}

#[derive(Clone)]
struct Server;
struct Server {
    db: chartered_db::ConnectionPool,
}

impl server::Server for Server {
    type Handler = Handler;

    fn new(&mut self, _: Option<std::net::SocketAddr>) -> Self::Handler {
        Handler::default()
        Handler {
            codec: GitCodec::default(),
            input_bytes: BytesMut::default(),
            output_bytes: BytesMut::default(),
            db: self.db.clone(),
        }
    }
}

#[derive(Default)]
struct Handler {
    codec: GitCodec,
    input_bytes: BytesMut,
    output_bytes: BytesMut,
    db: chartered_db::ConnectionPool,
}

impl Handler {
@@ -169,51 +183,43 @@

            // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n"

            let test_crate_file = PackFileEntry::Blob(br#"{"name":"charteredtest","vers":"1.0.0","deps":[],"cksum":"7b821735f0211fd00032a9892d1bf2323c9d05d9c59b9303eb382f5ec1898bfc","features":{},"yanked":false,"links":null}"#);
            let mut pack_file_entries = Vec::new();
            let mut root_tree = Vec::new();

            let config_file = PackFileEntry::Blob(
                br#"{
                "dl": "http://127.0.0.1:8888/a/abc/api/v1/crates",
                "api": "http://127.0.0.1:8888/a/abc"
            }"#,
                br#"{"dl":"http://127.0.0.1:8888/a/abc/api/v1/crates","api":"http://127.0.0.1:8888/a/abc"}"#,
            );

            let ch_ar_tree = PackFileEntry::Tree(vec![TreeItem {
            root_tree.push(TreeItem {
                kind: TreeItemKind::File,
                name: "charteredtest",
                hash: test_crate_file.hash()?,
            }]);

            let ch_tree = PackFileEntry::Tree(vec![TreeItem {
                kind: TreeItemKind::Directory,
                name: "ar",
                hash: ch_ar_tree.hash()?,
            }]);

            let root_tree = PackFileEntry::Tree(vec![
                TreeItem {
                    kind: TreeItemKind::Directory,
                    name: "ch",
                    hash: ch_tree.hash()?,
                },
                TreeItem {
                    kind: TreeItemKind::File,
                    name: "config.json",
                    hash: config_file.hash()?,
                },
            ]);
                name: "config.json",
                hash: config_file.hash()?,
            });
            pack_file_entries.push(config_file);

            // todo: this needs caching and filtering
            let tree = fetch_tree(self.db.clone()).await;
            build_tree(&mut root_tree, &mut pack_file_entries, &tree)?;

            let root_tree = PackFileEntry::Tree(root_tree);
            let root_tree_hash = root_tree.hash()?;
            pack_file_entries.push(root_tree);

            let commit_user = CommitUserInfo {
                name: "Jordan Doyle",
                email: "jordan@doyle.la",
                time: chrono::Utc::now(),
                time: chrono::Utc.ymd(2021, 9, 8).and_hms(17, 46, 1),
            };

            let commit = PackFileEntry::Commit(Commit {
                tree: root_tree.hash()?,
                tree: root_tree_hash,
                author: commit_user,
                committer: commit_user,
                message: "cool commit",
                message: "Most recent crates",
            });
            let commit_hash = commit.hash()?;
            pack_file_entries.push(commit);

            eprintln!("commit hash: {}", hex::encode(&commit_hash));

            // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010009peel\n000csymrefs\n000bunborn\n0014ref-prefix HEAD\n0019ref-prefix refs/HEAD\n001eref-prefix refs/tags/HEAD\n001fref-prefix refs/heads/HEAD\n0021ref-prefix refs/remotes/HEAD\n0026ref-prefix refs/remotes/HEAD/HEAD\n001aref-prefix refs/tags/\n0000"
            // GIT_PROTOCOL=version=2 ssh -o SendEnv=GIT_PROTOCOL git@github.com git-upload-pack '/w4/chartered.git'
@@ -222,7 +228,7 @@
            // sends a 000dpackfile back
            // https://shafiul.github.io/gitbook/7_the_packfile.html
            if ls_refs {
                let commit_hash = hex::encode(&commit.hash()?);
                let commit_hash = hex::encode(&commit_hash);
                self.write(PktLine::Data(
                    format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(),
                ))?;
@@ -245,14 +251,7 @@
                self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?;
                self.flush(&mut session, channel);

                let packfile = git::packfile::PackFile::new(vec![
                    commit,
                    test_crate_file,
                    ch_tree,
                    ch_ar_tree,
                    config_file,
                    root_tree,
                ]);
                let packfile = git::packfile::PackFile::new(pack_file_entries);
                self.write(PktLine::SidebandData(packfile))?;
                self.write(PktLine::Flush)?;
                self.flush(&mut session, channel);
@@ -264,5 +263,103 @@

            Ok((self, session))
        })
    }
}

#[derive(serde::Serialize)]
pub struct CrateFileEntry<'a> {
    name: &'a str,
    vers: &'a str,
    deps: &'a [&'a str],
    cksum: &'a str,
    features: BTreeMap<String, Vec<String>>,
    yanked: bool,
    links: Option<()>,
}

async fn fetch_tree(
    db: chartered_db::ConnectionPool,
) -> BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap<String, String>>> {
    let mut tree: BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap<String, String>>> = BTreeMap::new();

    // todo: handle files with 1/2/3 characters
    for (crate_def, versions) in chartered_db::get_crates(db).await {
        let mut name_chars = crate_def.name.as_bytes().iter();
        let first_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()];
        let second_dir = [*name_chars.next().unwrap(), *name_chars.next().unwrap()];

        let first_dir = tree.entry(first_dir).or_default();
        let second_dir = first_dir.entry(second_dir).or_default();

        let mut file = String::new();
        for version in versions {
            let entry = CrateFileEntry {
                name: &crate_def.name,
                vers: &version.version,
                deps: &[],
                cksum: "cool-checksum-dude",
                features: BTreeMap::new(),
                yanked: version.yanked,
                links: None,
            };

            file.push_str(&serde_json::to_string(&entry).unwrap());
            file.push('\n');
        }

        second_dir.insert(crate_def.name, file);
    }

    tree
}

fn build_tree<'a>(
    root_tree: &mut Vec<TreeItem<'a>>,
    pack_file_entries: &mut Vec<PackFileEntry<'a>>,
    tree: &'a BTreeMap<[u8; 2], BTreeMap<[u8; 2], BTreeMap<String, String>>>,
) -> Result<(), anyhow::Error> {
    root_tree.reserve(tree.len());
    pack_file_entries.reserve(tree.iter().map(|(_, v)| 1 + v.len()).sum::<usize>() + tree.len());

    for (first_level_dir, second_level_dirs) in tree.iter() {
        let mut first_level_tree = Vec::with_capacity(second_level_dirs.len());

        for (second_level_dir, crates) in second_level_dirs.iter() {
            let mut second_level_tree = Vec::with_capacity(crates.len());

            for (crate_name, versions_def) in crates.iter() {
                let file = PackFileEntry::Blob(versions_def.as_ref());
                let file_hash = file.hash()?;
                pack_file_entries.push(file);

                second_level_tree.push(TreeItem {
                    kind: TreeItemKind::File,
                    name: crate_name,
                    hash: file_hash,
                });
            }

            let second_level_tree = PackFileEntry::Tree(second_level_tree);
            let second_level_tree_hash = second_level_tree.hash()?;
            pack_file_entries.push(second_level_tree);

            first_level_tree.push(TreeItem {
                kind: TreeItemKind::Directory,
                name: std::str::from_utf8(second_level_dir)?,
                hash: second_level_tree_hash,
            });
        }

        let first_level_tree = PackFileEntry::Tree(first_level_tree);
        let first_level_tree_hash = first_level_tree.hash()?;
        pack_file_entries.push(first_level_tree);

        root_tree.push(TreeItem {
            kind: TreeItemKind::Directory,
            name: std::str::from_utf8(first_level_dir)?,
            hash: first_level_tree_hash,
        });
    }

    Ok(())
}
diff --git a/chartered-git/src/git/packfile.rs b/chartered-git/src/git/packfile.rs
index 99fccd7..464a9d0 100644
--- a/chartered-git/src/git/packfile.rs
+++ a/chartered-git/src/git/packfile.rs
@@ -55,6 +55,7 @@
    }
}

#[derive(Debug)]
pub struct Commit<'a> {
    pub tree: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray
    // pub parent: [u8; 20],
@@ -122,6 +123,7 @@
    }
}

#[derive(Debug)]
pub enum TreeItemKind {
    File,
    Directory,
@@ -137,6 +139,7 @@
    }
}

#[derive(Debug)]
pub struct TreeItem<'a> {
    pub kind: TreeItemKind,
    pub name: &'a str,
@@ -158,6 +161,7 @@
    }
}

#[derive(Debug)]
pub enum PackFileEntry<'a> {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c