🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2022-07-16 14:36:36.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-07-16 14:36:36.0 +01:00:00
commit
9ad3b95a4ebcf11de1f46b3061bd6d818c835f5d [patch]
tree
d9949bef9382f5dffaffe5a14500dfcb1472515c
parent
9db6315eb4ed3bea716eda6f1e7b91cd519c71dc
download
9ad3b95a4ebcf11de1f46b3061bd6d818c835f5d.tar.gz

Start reading repository metadata from sled



Diff

 Cargo.lock                        | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 Cargo.toml                        |   5 +++--
 src/git.rs                        |  98 ++------------------------------------------------------------------------------
 src/main.rs                       |  15 ++++++++++++++-
 src/database/indexer.rs           | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/database/mod.rs               |   2 ++
 src/methods/index.rs              |  22 +++++++++++++++++-----
 src/database/schema/commit.rs     |  20 ++++++++++++++++++++
 src/database/schema/mod.rs        |   3 +++
 src/database/schema/prefixes.rs   |  35 +++++++++++++++++++++++++++++++++++
 src/database/schema/repository.rs |  82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 378 insertions(+), 118 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 03badbd..22eda4e 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -42,12 +42,6 @@
checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"

[[package]]
name = "arc-swap"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"

[[package]]
name = "askama"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -280,6 +274,12 @@
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a"

[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"

[[package]]
name = "bytes"
@@ -473,7 +473,21 @@
 "crossbeam-utils 0.7.2",
 "lazy_static",
 "maybe-uninit",
 "memoffset",
 "memoffset 0.5.6",
 "scopeguard",
]

[[package]]
name = "crossbeam-epoch"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
dependencies = [
 "autocfg 1.1.0",
 "cfg-if 1.0.0",
 "crossbeam-utils 0.8.10",
 "memoffset 0.6.5",
 "once_cell",
 "scopeguard",
]

@@ -619,6 +633,16 @@
]

[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
 "libc",
 "winapi",
]

[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -726,6 +750,15 @@
 "pin-project-lite",
 "pin-utils",
 "slab",
]

[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
 "byteorder",
]

[[package]]
@@ -1063,6 +1096,15 @@
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa"
dependencies = [
 "autocfg 1.1.0",
]

[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
 "autocfg 1.1.0",
]
@@ -1119,12 +1161,12 @@
 "async-io",
 "async-lock",
 "crossbeam-channel",
 "crossbeam-epoch",
 "crossbeam-epoch 0.8.2",
 "crossbeam-utils 0.8.10",
 "futures-util",
 "num_cpus",
 "once_cell",
 "parking_lot",
 "parking_lot 0.12.1",
 "quanta",
 "scheduled-thread-pool",
 "skeptic",
@@ -1274,6 +1316,17 @@
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"

[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
 "instant",
 "lock_api",
 "parking_lot_core 0.8.5",
]

[[package]]
name = "parking_lot"
@@ -1282,7 +1335,21 @@
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
 "lock_api",
 "parking_lot_core",
 "parking_lot_core 0.9.3",
]

[[package]]
name = "parking_lot_core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
dependencies = [
 "cfg-if 1.0.0",
 "instant",
 "libc",
 "redox_syscall",
 "smallvec",
 "winapi",
]

[[package]]
@@ -1640,10 +1707,10 @@
version = "0.1.0"
dependencies = [
 "anyhow",
 "arc-swap",
 "askama",
 "axum",
 "bat",
 "bincode",
 "clap",
 "futures",
 "git2",
@@ -1651,10 +1718,11 @@
 "humantime",
 "md5",
 "moka",
 "parking_lot",
 "parking_lot 0.12.1",
 "path-clean",
 "rsass",
 "serde",
 "sled",
 "syntect",
 "time 0.3.11",
 "timeago",
@@ -1712,7 +1780,7 @@
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf"
dependencies = [
 "parking_lot",
 "parking_lot 0.12.1",
]

[[package]]
@@ -1829,6 +1897,22 @@
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"

[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
 "crc32fast",
 "crossbeam-epoch 0.9.9",
 "crossbeam-utils 0.8.10",
 "fs2",
 "fxhash",
 "libc",
 "log",
 "parking_lot 0.11.2",
]

[[package]]
name = "smallvec"
@@ -1991,6 +2075,7 @@
 "itoa",
 "libc",
 "num_threads",
 "serde",
]

[[package]]
@@ -2030,7 +2115,7 @@
 "mio",
 "num_cpus",
 "once_cell",
 "parking_lot",
 "parking_lot 0.12.1",
 "pin-project-lite",
 "signal-hook-registry",
 "socket2",
diff --git a/Cargo.toml b/Cargo.toml
index 90a1901..2409754 100644
--- a/Cargo.toml
+++ a/Cargo.toml
@@ -7,9 +7,9 @@

[dependencies]
askama = "0.11"
arc-swap = "1.5"
axum = "0.5"
bat = { version = "0.21", default-features = false, features = ["build-assets"] }
bincode = "1.3"
clap = { version = "3.2", features = ["cargo"] }
futures = "0.3"
git2 = "0.14"
@@ -21,7 +21,8 @@
parking_lot = "0.12"
serde = { version = "1.0", features = ["derive"] }
syntect = "5"
time = "0.3"
sled = "0.34"
time = { version = "0.3", features = ["serde"] }
timeago = "0.3"
tokio = { version = "1.19", features = ["full"] }
tower = "0.4"
diff --git a/src/git.rs b/src/git.rs
index 38aa78b..11e3811 100644
--- a/src/git.rs
+++ a/src/git.rs
@@ -1,16 +1,7 @@
use std::{
    borrow::Cow,
    collections::BTreeMap,
    fmt::Write,
    path::{Path, PathBuf},
    sync::Arc,
    time::Duration,
};
use std::{borrow::Cow, fmt::Write, path::PathBuf, sync::Arc, time::Duration};

use arc_swap::ArcSwapOption;
use git2::{
    BranchType, DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, ObjectType, Oid,
    Repository, Signature,
    BranchType, DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, ObjectType, Oid, Signature,
};
use moka::future::Cache;
use parking_lot::Mutex;
@@ -20,13 +11,10 @@
use time::OffsetDateTime;
use tracing::instrument;

pub type RepositoryMetadataList = BTreeMap<Option<String>, Vec<RepositoryMetadata>>;

pub struct Git {
    commits: Cache<Oid, Arc<Commit>>,
    readme_cache: Cache<PathBuf, Option<Arc<str>>>,
    refs: Cache<PathBuf, Arc<Refs>>,
    repository_metadata: ArcSwapOption<RepositoryMetadataList>,
    syntax_set: SyntaxSet,
}

@@ -46,7 +34,6 @@
                .time_to_live(Duration::from_secs(10))
                .max_capacity(100)
                .build(),
            repository_metadata: ArcSwapOption::default(),
            syntax_set,
        }
    }
@@ -66,29 +53,7 @@
            git: self,
            cache_key: repo_path,
            repo: Mutex::new(repo),
        })
    }

    #[instrument(skip(self))]
    pub async fn fetch_repository_metadata(&self) -> Arc<RepositoryMetadataList> {
        if let Some(metadata) = self.repository_metadata.load().as_ref() {
            return Arc::clone(metadata);
        }

        let start = Path::new("../test-git").canonicalize().unwrap();

        let repos = tokio::task::spawn_blocking(move || {
            let mut repos: RepositoryMetadataList = RepositoryMetadataList::new();
            fetch_repository_metadata_impl(&start, &start, &mut repos);
            repos
        })
        .await
        .unwrap();

        let repos = Arc::new(repos);
        self.repository_metadata.store(Some(repos.clone()));

        repos
    }
}

@@ -445,14 +410,6 @@
pub struct Tag {
    pub name: String,
    pub tagger: Option<CommitUser>,
}

#[derive(Debug)]
pub struct RepositoryMetadata {
    pub name: String,
    pub description: Option<Cow<'static, str>>,
    pub owner: Option<String>,
    pub last_modified: OffsetDateTime,
}

#[derive(Debug)]
@@ -648,55 +605,4 @@
    .unwrap();

    diff_output
}

#[instrument(skip(repos))]
fn fetch_repository_metadata_impl(
    start: &Path,
    current: &Path,
    repos: &mut RepositoryMetadataList,
) {
    let dirs = std::fs::read_dir(current)
        .unwrap()
        .map(|v| v.unwrap().path())
        .filter(|path| path.is_dir());

    for dir in dirs {
        let repository = match Repository::open_bare(&dir) {
            Ok(v) => v,
            Err(_e) => {
                fetch_repository_metadata_impl(start, &dir, repos);
                continue;
            }
        };

        let repo_path = Some(
            current
                .strip_prefix(start)
                .unwrap()
                .to_string_lossy()
                .into_owned(),
        )
        .filter(|v| !v.is_empty());
        let repos = repos.entry(repo_path).or_default();

        let description = std::fs::read_to_string(dir.join("description"))
            .map(Cow::Owned)
            .ok();
        let last_modified = std::fs::metadata(&dir).unwrap().modified().unwrap();
        let owner = repository.config().unwrap().get_string("gitweb.owner").ok();

        repos.push(RepositoryMetadata {
            name: dir
                .components()
                .last()
                .unwrap()
                .as_os_str()
                .to_string_lossy()
                .into_owned(),
            description,
            owner,
            last_modified: OffsetDateTime::from(last_modified),
        });
    }
}
diff --git a/src/main.rs b/src/main.rs
index 2e7d5f5..3fc07d3 100644
--- a/src/main.rs
+++ a/src/main.rs
@@ -14,6 +14,7 @@

use crate::{git::Git, layers::logger::LoggingMiddleware};

mod database;
mod git;
mod layers;
mod methods;
@@ -27,6 +28,17 @@
    let subscriber = subscriber.pretty();
    subscriber.init();

    let db = sled::open("/tmp/some-sled.db").unwrap();

    std::thread::spawn({
        let db = db.clone();

        move || {
            crate::database::indexer::run_indexer(&db);
            eprintln!("finished indexer");
        }
    });

    let bat_assets = HighlightingAssets::from_binary();
    let syntax_set = bat_assets.get_syntax_set().unwrap().clone();
    let theme = bat_assets.get_theme("GitHub");
@@ -49,7 +61,8 @@
        .route("/highlight.css", get(static_css(css)))
        .fallback(methods::repo::service.into_service())
        .layer(layer_fn(LoggingMiddleware))
        .layer(Extension(Arc::new(Git::new(syntax_set))));
        .layer(Extension(Arc::new(Git::new(syntax_set))))
        .layer(Extension(db));

    axum::Server::bind(&"127.0.0.1:3333".parse().unwrap())
        .serve(app.into_make_service_with_connect_info::<std::net::SocketAddr>())
diff --git a/src/database/indexer.rs b/src/database/indexer.rs
new file mode 100644
index 0000000..56bf671 100644
--- /dev/null
+++ a/src/database/indexer.rs
@@ -1,0 +1,101 @@
use std::path::{Path, PathBuf};
use time::OffsetDateTime;

use crate::database::schema::repository::{Repository, RepositoryId};

pub fn run_indexer(db: &sled::Db) {
    let scan_path = Path::new("/Users/jordan/Code/test-git");
    update_repository_metadata(scan_path, &db);

    for (relative_path, _repository) in Repository::fetch_all(&db) {
        let git_repository = git2::Repository::open(scan_path.join(relative_path)).unwrap();

        for reference in git_repository.references().unwrap() {
            let _reference = if let Some(reference) = reference.as_ref().ok().and_then(|v| v.name())
            {
                reference
            } else {
                continue;
            };

            // let mut revwalk = git_repository.revwalk().unwrap();
            // revwalk.set_sorting(Sort::REVERSE).unwrap();
            // revwalk.push_ref(reference).unwrap();
            //
            // for rev in revwalk {
            //     let rev = rev.unwrap();
            //     let commit = git_repository.find_commit(rev).unwrap();
            // }
        }
    }
}

fn update_repository_metadata(scan_path: &Path, db: &sled::Db) {
    let mut discovered = Vec::new();
    discover_repositories(scan_path, &mut discovered);

    for repository in discovered {
        let relative = get_relative_path(scan_path, &repository);

        let id = Repository::open(db, relative)
            .map(|v| v.id)
            .unwrap_or_else(|| RepositoryId::new(db));
        let name = relative.file_name().unwrap().to_string_lossy().to_string();
        let description = Some(
            String::from_utf8_lossy(
                &std::fs::read(repository.join("description")).unwrap_or_default(),
            )
            .to_string(),
        )
        .filter(|v| !v.is_empty());

        Repository {
            id,
            name,
            description,
            owner: None, // TODO read this from config
            last_modified: OffsetDateTime::now_utc(),
        }
        .insert(db, relative);
    }
}

// util

fn get_relative_path<'a>(relative_to: &Path, full_path: &'a Path) -> &'a Path {
    full_path.strip_prefix(relative_to).unwrap()
}

fn discover_repositories(current: &Path, discovered_repos: &mut Vec<PathBuf>) {
    let dirs = std::fs::read_dir(current)
        .unwrap()
        .map(|v| v.unwrap().path())
        .filter(|path| path.is_dir());

    for dir in dirs {
        if dir.join("packed-refs").is_file() {
            // we've hit what looks like a bare git repo, lets take it
            discovered_repos.push(dir);
        } else {
            // probably not a bare git repo, lets recurse deeper
            discover_repositories(&dir, discovered_repos);
        }
    }
}

#[cfg(test)]
mod test {
    use crate::database::schema::repository::Repository;
    use time::Instant;

    #[test]
    fn test_discovery() {
        let db = sled::open(std::env::temp_dir().join("sled-test.db")).unwrap();

        let start = Instant::now();
        super::run_indexer(&db);
        let repo = Repository::open(&db, "1p.git");

        panic!("{} - {:#?}", start.elapsed(), repo);
    }
}
diff --git a/src/database/mod.rs b/src/database/mod.rs
new file mode 100644
index 0000000..aa8039f 100644
--- /dev/null
+++ a/src/database/mod.rs
@@ -1,0 +1,2 @@
pub mod indexer;
pub mod schema;
diff --git a/src/methods/index.rs b/src/methods/index.rs
index 9497930..a10ed46 100644
--- a/src/methods/index.rs
+++ a/src/methods/index.rs
@@ -1,19 +1,31 @@
use std::collections::BTreeMap;

use askama::Template;
use axum::response::Response;
use axum::Extension;
use std::sync::Arc;

use super::filters;
use crate::{git::RepositoryMetadataList, into_response, Git};
use crate::database::schema::repository::Repository;
use crate::into_response;

#[derive(Template)]
#[template(path = "index.html")]
pub struct View {
    pub repositories: Arc<RepositoryMetadataList>,
    pub repositories: BTreeMap<Option<String>, Vec<Repository>>,
}

pub async fn handle(Extension(db): Extension<sled::Db>) -> Response {
    let mut repositories: BTreeMap<Option<String>, Vec<Repository>> = BTreeMap::new();

    for (k, v) in Repository::fetch_all(&db) {
        // TODO: fixme
        let mut split: Vec<_> = k.split('/').collect();
        split.pop();
        let key = Some(split.join("/")).filter(|v| !v.is_empty());

pub async fn handle(Extension(git): Extension<Arc<Git>>) -> Response {
    let repositories = git.fetch_repository_metadata().await;
        let k = repositories.entry(key).or_default();
        k.push(v);
    }

    into_response(&View { repositories })
}
diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs
new file mode 100644
index 0000000..eca799a 100644
--- /dev/null
+++ a/src/database/schema/commit.rs
@@ -1,0 +1,20 @@
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Debug)]
pub struct Commit {
    age: String,
    message: String,
    author: String,
}

impl Commit {}

pub struct CommitVault {
    _tree: sled::Tree,
}

impl CommitVault {
    pub(super) fn new(tree: sled::Tree) -> Self {
        Self { _tree: tree }
    }
}
diff --git a/src/database/schema/mod.rs b/src/database/schema/mod.rs
new file mode 100644
index 0000000..ae2930f 100644
--- /dev/null
+++ a/src/database/schema/mod.rs
@@ -1,0 +1,3 @@
pub mod commit;
pub mod prefixes;
pub mod repository;
diff --git a/src/database/schema/prefixes.rs b/src/database/schema/prefixes.rs
new file mode 100644
index 0000000..e088bba 100644
--- /dev/null
+++ a/src/database/schema/prefixes.rs
@@ -1,0 +1,35 @@
use crate::database::schema::repository::RepositoryId;
use std::path::Path;

#[repr(u8)]
pub enum TreePrefix {
    Repository = 0,
    Commit = 100,
    _Tag = 101,
}

impl TreePrefix {
    pub fn repository_id<T: AsRef<Path>>(path: T) -> Vec<u8> {
        let path = path.as_ref().to_string_lossy();
        let path_bytes = path.as_bytes();

        let mut prefixed = Vec::with_capacity(path_bytes.len() + std::mem::size_of::<TreePrefix>());
        prefixed.push(Self::Repository as u8);
        prefixed.extend_from_slice(path_bytes);

        prefixed
    }

    pub fn commit_id<T: AsRef<[u8]>>(repository: RepositoryId, commit: T) -> Vec<u8> {
        let commit = commit.as_ref();

        let mut prefixed = Vec::with_capacity(
            commit.len() + std::mem::size_of::<RepositoryId>() + std::mem::size_of::<TreePrefix>(),
        );
        prefixed.push(TreePrefix::Commit as u8);
        prefixed.extend_from_slice(&repository.to_ne_bytes());
        prefixed.extend_from_slice(&commit);

        prefixed
    }
}
diff --git a/src/database/schema/repository.rs b/src/database/schema/repository.rs
new file mode 100644
index 0000000..da272e6 100644
--- /dev/null
+++ a/src/database/schema/repository.rs
@@ -1,0 +1,82 @@
use crate::database::schema::commit::CommitVault;
use crate::database::schema::prefixes::TreePrefix;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::ops::Deref;
use std::path::Path;
use time::OffsetDateTime;

#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
pub struct Repository {
    /// The ID of the repository, as stored in `sled`

    pub id: RepositoryId,
    /// The "clean name" of the repository (ie. `hello-world.git`)

    pub name: String,
    /// The description of the repository, as it is stored in the `description` file in the

    /// bare repo root

    pub description: Option<String>,
    /// The owner of the repository (`gitweb.owner` in the repository configuration)

    pub owner: Option<String>,
    /// The last time this repository was updated, currently read from the directory mtime

    pub last_modified: OffsetDateTime,
}

impl Repository {
    pub fn fetch_all(database: &sled::Db) -> HashMap<String, Repository> {
        database
            .scan_prefix([TreePrefix::Repository as u8])
            .filter_map(Result::ok)
            .map(|(k, v)| {
                let key = String::from_utf8_lossy(&k[1..]).to_string();
                let value = bincode::deserialize(&v).unwrap();

                (key, value)
            })
            .collect()
    }

    pub fn insert<P: AsRef<Path>>(&self, database: &sled::Db, path: P) {
        database
            .insert(
                TreePrefix::repository_id(path),
                bincode::serialize(self).unwrap(),
            )
            .unwrap();
    }

    pub fn open<P: AsRef<Path>>(database: &sled::Db, path: P) -> Option<Repository> {
        database
            .get(TreePrefix::repository_id(path))
            .unwrap()
            .map(|v| bincode::deserialize(&v))
            .transpose()
            .unwrap()
    }

    #[allow(dead_code)]
    pub fn commit_vault(&self, database: &sled::Db, commit: &str) -> CommitVault {
        let commit = hex::decode(commit).unwrap();
        let tree = database
            .open_tree(TreePrefix::commit_id(self.id, commit))
            .unwrap();

        CommitVault::new(tree)
    }
}

#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct RepositoryId(pub(super) u64);

impl RepositoryId {
    pub fn new(db: &sled::Db) -> Self {
        Self(db.generate_id().unwrap())
    }
}

impl Deref for RepositoryId {
    type Target = u64;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}