Start reading repository metadata from sled
Diff
Cargo.lock | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Cargo.toml | 5 +++--
src/git.rs | 98 ++------------------------------------------------------------------------------
src/main.rs | 15 ++++++++++++++-
src/database/indexer.rs | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
src/database/mod.rs | 2 ++
src/methods/index.rs | 22 +++++++++++++++++-----
src/database/schema/commit.rs | 20 ++++++++++++++++++++
src/database/schema/mod.rs | 3 +++
src/database/schema/prefixes.rs | 35 +++++++++++++++++++++++++++++++++++
src/database/schema/repository.rs | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
11 files changed, 378 insertions(+), 118 deletions(-)
@@ -42,12 +42,6 @@
checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
[[package]]
name = "arc-swap"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
[[package]]
name = "askama"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -280,6 +274,12 @@
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
@@ -473,7 +473,21 @@
"crossbeam-utils 0.7.2",
"lazy_static",
"maybe-uninit",
"memoffset",
"memoffset 0.5.6",
"scopeguard",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
dependencies = [
"autocfg 1.1.0",
"cfg-if 1.0.0",
"crossbeam-utils 0.8.10",
"memoffset 0.6.5",
"once_cell",
"scopeguard",
]
@@ -619,6 +633,16 @@
]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -726,6 +750,15 @@
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
@@ -1063,6 +1096,15 @@
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa"
dependencies = [
"autocfg 1.1.0",
]
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg 1.1.0",
]
@@ -1119,12 +1161,12 @@
"async-io",
"async-lock",
"crossbeam-channel",
"crossbeam-epoch",
"crossbeam-epoch 0.8.2",
"crossbeam-utils 0.8.10",
"futures-util",
"num_cpus",
"once_cell",
"parking_lot",
"parking_lot 0.12.1",
"quanta",
"scheduled-thread-pool",
"skeptic",
@@ -1274,6 +1316,17 @@
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core 0.8.5",
]
[[package]]
name = "parking_lot"
@@ -1282,7 +1335,21 @@
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
"parking_lot_core 0.9.3",
]
[[package]]
name = "parking_lot_core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
dependencies = [
"cfg-if 1.0.0",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
@@ -1640,10 +1707,10 @@
version = "0.1.0"
dependencies = [
"anyhow",
"arc-swap",
"askama",
"axum",
"bat",
"bincode",
"clap",
"futures",
"git2",
@@ -1651,10 +1718,11 @@
"humantime",
"md5",
"moka",
"parking_lot",
"parking_lot 0.12.1",
"path-clean",
"rsass",
"serde",
"sled",
"syntect",
"time 0.3.11",
"timeago",
@@ -1712,7 +1780,7 @@
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf"
dependencies = [
"parking_lot",
"parking_lot 0.12.1",
]
[[package]]
@@ -1829,6 +1897,22 @@
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch 0.9.9",
"crossbeam-utils 0.8.10",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot 0.11.2",
]
[[package]]
name = "smallvec"
@@ -1991,6 +2075,7 @@
"itoa",
"libc",
"num_threads",
"serde",
]
[[package]]
@@ -2030,7 +2115,7 @@
"mio",
"num_cpus",
"once_cell",
"parking_lot",
"parking_lot 0.12.1",
"pin-project-lite",
"signal-hook-registry",
"socket2",
@@ -7,9 +7,9 @@
[dependencies]
askama = "0.11"
arc-swap = "1.5"
axum = "0.5"
bat = { version = "0.21", default-features = false, features = ["build-assets"] }
bincode = "1.3"
clap = { version = "3.2", features = ["cargo"] }
futures = "0.3"
git2 = "0.14"
@@ -21,7 +21,8 @@
parking_lot = "0.12"
serde = { version = "1.0", features = ["derive"] }
syntect = "5"
time = "0.3"
sled = "0.34"
time = { version = "0.3", features = ["serde"] }
timeago = "0.3"
tokio = { version = "1.19", features = ["full"] }
tower = "0.4"
@@ -1,16 +1,7 @@
use std::{
borrow::Cow,
collections::BTreeMap,
fmt::Write,
path::{Path, PathBuf},
sync::Arc,
time::Duration,
};
use std::{borrow::Cow, fmt::Write, path::PathBuf, sync::Arc, time::Duration};
use arc_swap::ArcSwapOption;
use git2::{
BranchType, DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, ObjectType, Oid,
Repository, Signature,
BranchType, DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, ObjectType, Oid, Signature,
};
use moka::future::Cache;
use parking_lot::Mutex;
@@ -20,13 +11,10 @@
use time::OffsetDateTime;
use tracing::instrument;
pub type RepositoryMetadataList = BTreeMap<Option<String>, Vec<RepositoryMetadata>>;
pub struct Git {
commits: Cache<Oid, Arc<Commit>>,
readme_cache: Cache<PathBuf, Option<Arc<str>>>,
refs: Cache<PathBuf, Arc<Refs>>,
repository_metadata: ArcSwapOption<RepositoryMetadataList>,
syntax_set: SyntaxSet,
}
@@ -46,7 +34,6 @@
.time_to_live(Duration::from_secs(10))
.max_capacity(100)
.build(),
repository_metadata: ArcSwapOption::default(),
syntax_set,
}
}
@@ -66,29 +53,7 @@
git: self,
cache_key: repo_path,
repo: Mutex::new(repo),
})
}
#[instrument(skip(self))]
pub async fn fetch_repository_metadata(&self) -> Arc<RepositoryMetadataList> {
if let Some(metadata) = self.repository_metadata.load().as_ref() {
return Arc::clone(metadata);
}
let start = Path::new("../test-git").canonicalize().unwrap();
let repos = tokio::task::spawn_blocking(move || {
let mut repos: RepositoryMetadataList = RepositoryMetadataList::new();
fetch_repository_metadata_impl(&start, &start, &mut repos);
repos
})
.await
.unwrap();
let repos = Arc::new(repos);
self.repository_metadata.store(Some(repos.clone()));
repos
}
}
@@ -445,14 +410,6 @@
pub struct Tag {
pub name: String,
pub tagger: Option<CommitUser>,
}
#[derive(Debug)]
pub struct RepositoryMetadata {
pub name: String,
pub description: Option<Cow<'static, str>>,
pub owner: Option<String>,
pub last_modified: OffsetDateTime,
}
#[derive(Debug)]
@@ -648,55 +605,4 @@
.unwrap();
diff_output
}
#[instrument(skip(repos))]
fn fetch_repository_metadata_impl(
start: &Path,
current: &Path,
repos: &mut RepositoryMetadataList,
) {
let dirs = std::fs::read_dir(current)
.unwrap()
.map(|v| v.unwrap().path())
.filter(|path| path.is_dir());
for dir in dirs {
let repository = match Repository::open_bare(&dir) {
Ok(v) => v,
Err(_e) => {
fetch_repository_metadata_impl(start, &dir, repos);
continue;
}
};
let repo_path = Some(
current
.strip_prefix(start)
.unwrap()
.to_string_lossy()
.into_owned(),
)
.filter(|v| !v.is_empty());
let repos = repos.entry(repo_path).or_default();
let description = std::fs::read_to_string(dir.join("description"))
.map(Cow::Owned)
.ok();
let last_modified = std::fs::metadata(&dir).unwrap().modified().unwrap();
let owner = repository.config().unwrap().get_string("gitweb.owner").ok();
repos.push(RepositoryMetadata {
name: dir
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
.into_owned(),
description,
owner,
last_modified: OffsetDateTime::from(last_modified),
});
}
}
@@ -14,6 +14,7 @@
use crate::{git::Git, layers::logger::LoggingMiddleware};
mod database;
mod git;
mod layers;
mod methods;
@@ -27,6 +28,17 @@
let subscriber = subscriber.pretty();
subscriber.init();
let db = sled::open("/tmp/some-sled.db").unwrap();
std::thread::spawn({
let db = db.clone();
move || {
crate::database::indexer::run_indexer(&db);
eprintln!("finished indexer");
}
});
let bat_assets = HighlightingAssets::from_binary();
let syntax_set = bat_assets.get_syntax_set().unwrap().clone();
let theme = bat_assets.get_theme("GitHub");
@@ -49,7 +61,8 @@
.route("/highlight.css", get(static_css(css)))
.fallback(methods::repo::service.into_service())
.layer(layer_fn(LoggingMiddleware))
.layer(Extension(Arc::new(Git::new(syntax_set))));
.layer(Extension(Arc::new(Git::new(syntax_set))))
.layer(Extension(db));
axum::Server::bind(&"127.0.0.1:3333".parse().unwrap())
.serve(app.into_make_service_with_connect_info::<std::net::SocketAddr>())
@@ -1,0 +1,101 @@
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use crate::database::schema::repository::{Repository, RepositoryId};
pub fn run_indexer(db: &sled::Db) {
let scan_path = Path::new("/Users/jordan/Code/test-git");
update_repository_metadata(scan_path, &db);
for (relative_path, _repository) in Repository::fetch_all(&db) {
let git_repository = git2::Repository::open(scan_path.join(relative_path)).unwrap();
for reference in git_repository.references().unwrap() {
let _reference = if let Some(reference) = reference.as_ref().ok().and_then(|v| v.name())
{
reference
} else {
continue;
};
}
}
}
fn update_repository_metadata(scan_path: &Path, db: &sled::Db) {
let mut discovered = Vec::new();
discover_repositories(scan_path, &mut discovered);
for repository in discovered {
let relative = get_relative_path(scan_path, &repository);
let id = Repository::open(db, relative)
.map(|v| v.id)
.unwrap_or_else(|| RepositoryId::new(db));
let name = relative.file_name().unwrap().to_string_lossy().to_string();
let description = Some(
String::from_utf8_lossy(
&std::fs::read(repository.join("description")).unwrap_or_default(),
)
.to_string(),
)
.filter(|v| !v.is_empty());
Repository {
id,
name,
description,
owner: None,
last_modified: OffsetDateTime::now_utc(),
}
.insert(db, relative);
}
}
fn get_relative_path<'a>(relative_to: &Path, full_path: &'a Path) -> &'a Path {
full_path.strip_prefix(relative_to).unwrap()
}
fn discover_repositories(current: &Path, discovered_repos: &mut Vec<PathBuf>) {
let dirs = std::fs::read_dir(current)
.unwrap()
.map(|v| v.unwrap().path())
.filter(|path| path.is_dir());
for dir in dirs {
if dir.join("packed-refs").is_file() {
discovered_repos.push(dir);
} else {
discover_repositories(&dir, discovered_repos);
}
}
}
#[cfg(test)]
mod test {
use crate::database::schema::repository::Repository;
use time::Instant;
#[test]
fn test_discovery() {
let db = sled::open(std::env::temp_dir().join("sled-test.db")).unwrap();
let start = Instant::now();
super::run_indexer(&db);
let repo = Repository::open(&db, "1p.git");
panic!("{} - {:#?}", start.elapsed(), repo);
}
}
@@ -1,0 +1,2 @@
pub mod indexer;
pub mod schema;
@@ -1,19 +1,31 @@
use std::collections::BTreeMap;
use askama::Template;
use axum::response::Response;
use axum::Extension;
use std::sync::Arc;
use super::filters;
use crate::{git::RepositoryMetadataList, into_response, Git};
use crate::database::schema::repository::Repository;
use crate::into_response;
#[derive(Template)]
#[template(path = "index.html")]
pub struct View {
pub repositories: Arc<RepositoryMetadataList>,
pub repositories: BTreeMap<Option<String>, Vec<Repository>>,
}
pub async fn handle(Extension(db): Extension<sled::Db>) -> Response {
let mut repositories: BTreeMap<Option<String>, Vec<Repository>> = BTreeMap::new();
for (k, v) in Repository::fetch_all(&db) {
let mut split: Vec<_> = k.split('/').collect();
split.pop();
let key = Some(split.join("/")).filter(|v| !v.is_empty());
pub async fn handle(Extension(git): Extension<Arc<Git>>) -> Response {
let repositories = git.fetch_repository_metadata().await;
let k = repositories.entry(key).or_default();
k.push(v);
}
into_response(&View { repositories })
}
@@ -1,0 +1,20 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
pub struct Commit {
age: String,
message: String,
author: String,
}
impl Commit {}
pub struct CommitVault {
_tree: sled::Tree,
}
impl CommitVault {
pub(super) fn new(tree: sled::Tree) -> Self {
Self { _tree: tree }
}
}
@@ -1,0 +1,3 @@
pub mod commit;
pub mod prefixes;
pub mod repository;
@@ -1,0 +1,35 @@
use crate::database::schema::repository::RepositoryId;
use std::path::Path;
#[repr(u8)]
pub enum TreePrefix {
Repository = 0,
Commit = 100,
_Tag = 101,
}
impl TreePrefix {
pub fn repository_id<T: AsRef<Path>>(path: T) -> Vec<u8> {
let path = path.as_ref().to_string_lossy();
let path_bytes = path.as_bytes();
let mut prefixed = Vec::with_capacity(path_bytes.len() + std::mem::size_of::<TreePrefix>());
prefixed.push(Self::Repository as u8);
prefixed.extend_from_slice(path_bytes);
prefixed
}
pub fn commit_id<T: AsRef<[u8]>>(repository: RepositoryId, commit: T) -> Vec<u8> {
let commit = commit.as_ref();
let mut prefixed = Vec::with_capacity(
commit.len() + std::mem::size_of::<RepositoryId>() + std::mem::size_of::<TreePrefix>(),
);
prefixed.push(TreePrefix::Commit as u8);
prefixed.extend_from_slice(&repository.to_ne_bytes());
prefixed.extend_from_slice(&commit);
prefixed
}
}
@@ -1,0 +1,82 @@
use crate::database::schema::commit::CommitVault;
use crate::database::schema::prefixes::TreePrefix;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::ops::Deref;
use std::path::Path;
use time::OffsetDateTime;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
pub struct Repository {
pub id: RepositoryId,
pub name: String,
pub description: Option<String>,
pub owner: Option<String>,
pub last_modified: OffsetDateTime,
}
impl Repository {
pub fn fetch_all(database: &sled::Db) -> HashMap<String, Repository> {
database
.scan_prefix([TreePrefix::Repository as u8])
.filter_map(Result::ok)
.map(|(k, v)| {
let key = String::from_utf8_lossy(&k[1..]).to_string();
let value = bincode::deserialize(&v).unwrap();
(key, value)
})
.collect()
}
pub fn insert<P: AsRef<Path>>(&self, database: &sled::Db, path: P) {
database
.insert(
TreePrefix::repository_id(path),
bincode::serialize(self).unwrap(),
)
.unwrap();
}
pub fn open<P: AsRef<Path>>(database: &sled::Db, path: P) -> Option<Repository> {
database
.get(TreePrefix::repository_id(path))
.unwrap()
.map(|v| bincode::deserialize(&v))
.transpose()
.unwrap()
}
#[allow(dead_code)]
pub fn commit_vault(&self, database: &sled::Db, commit: &str) -> CommitVault {
let commit = hex::decode(commit).unwrap();
let tree = database
.open_tree(TreePrefix::commit_id(self.id, commit))
.unwrap();
CommitVault::new(tree)
}
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct RepositoryId(pub(super) u64);
impl RepositoryId {
pub fn new(db: &sled::Db) -> Self {
Self(db.generate_id().unwrap())
}
}
impl Deref for RepositoryId {
type Target = u64;
fn deref(&self) -> &Self::Target {
&self.0
}
}