From 2753b229df245063be41bd7107a2c83832fbe862 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 17 Jul 2022 23:28:39 +0100 Subject: [PATCH] Zero-copy serialize/deserialization for repositories to/from sled --- src/database/indexer.rs | 15 +++++---------- src/methods/index.rs | 11 ++++++----- src/methods/repo.rs | 2 +- src/database/schema/commit.rs | 40 +++++++++++++++++++++++++++++++++++++--- src/database/schema/mod.rs | 5 +++++ src/database/schema/repository.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 83 insertions(+), 40 deletions(-) diff --git a/src/database/indexer.rs b/src/database/indexer.rs index 7ab67d1..f4c76dd 100644 --- a/src/database/indexer.rs +++ a/src/database/indexer.rs @@ -22,16 +22,11 @@ let relative = get_relative_path(scan_path, &repository); let id = Repository::open(db, relative) - .map(|v| v.id) + .map(|v| v.get().id) .unwrap_or_else(|| RepositoryId::new(db)); - let name = relative.file_name().unwrap().to_string_lossy().to_string(); - let description = Some( - String::from_utf8_lossy( - &std::fs::read(repository.join("description")).unwrap_or_default(), - ) - .to_string(), - ) - .filter(|v| !v.is_empty()); + let name = relative.file_name().unwrap().to_string_lossy(); + let description = std::fs::read(repository.join("description")).unwrap_or_default(); + let description = Some(String::from_utf8_lossy(&description)).filter(|v| !v.is_empty()); Repository { id, @@ -62,7 +57,7 @@ info!("Updating indexes for {} on {}", reference, relative_path); - let commit_tree = db_repository.commit_tree(db, reference); + let commit_tree = db_repository.get().commit_tree(db, reference); // TODO: only scan revs from the last time we looked let mut revwalk = git_repository.revwalk().unwrap(); diff --git a/src/methods/index.rs b/src/methods/index.rs index a10ed46..dcf7c67 100644 --- a/src/methods/index.rs +++ a/src/methods/index.rs @@ -10,21 +10,22 @@ #[derive(Template)] #[template(path = "index.html")] -pub struct View { - pub repositories: BTreeMap, Vec>, +pub struct View<'a> { + pub repositories: BTreeMap, Vec<&'a Repository<'a>>>, } pub async fn handle(Extension(db): Extension) -> Response { - let mut repositories: BTreeMap, Vec> = BTreeMap::new(); + let mut repositories: BTreeMap, Vec<&Repository<'_>>> = BTreeMap::new(); - for (k, v) in Repository::fetch_all(&db) { + let fetched = Repository::fetch_all(&db); + for (k, v) in fetched.iter() { // TODO: fixme let mut split: Vec<_> = k.split('/').collect(); split.pop(); let key = Some(split.join("/")).filter(|v| !v.is_empty()); let k = repositories.entry(key).or_default(); - k.push(v); + k.push(v.get()); } into_response(&View { repositories }) diff --git a/src/methods/repo.rs b/src/methods/repo.rs index 12a1a8f..6964b03 100644 --- a/src/methods/repo.rs +++ a/src/methods/repo.rs @@ -172,7 +172,7 @@ let reference = format!("refs/heads/{}", query.branch.as_deref().unwrap_or("master")); let repository = crate::database::schema::repository::Repository::open(&db, &*repo).unwrap(); - let commit_tree = repository.commit_tree(&db, &reference); + let commit_tree = repository.get().commit_tree(&db, &reference); let mut commits = commit_tree.fetch_latest(101, offset).await; let next_offset = if commits.len() == 101 { diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs index 4ac6d28..47627d9 100644 --- a/src/database/schema/commit.rs +++ a/src/database/schema/commit.rs @@ -1,7 +1,8 @@ -use std::borrow::Cow; +use crate::database::schema::Yoked; use git2::{Oid, Signature}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use sled::IVec; +use std::borrow::Cow; use std::ops::Deref; use time::OffsetDateTime; use yoke::{Yoke, Yokeable}; @@ -18,13 +19,20 @@ } impl<'a> Commit<'a> { - pub fn new(commit: &'a git2::Commit<'_>, author: &'a git2::Signature<'_>, committer: &'a git2::Signature<'_>) -> Self { + pub fn new( + commit: &'a git2::Commit<'_>, + author: &'a git2::Signature<'_>, + committer: &'a git2::Signature<'_>, + ) -> Self { Self { summary: commit .summary_bytes() + .map(String::from_utf8_lossy) + .unwrap_or(Cow::Borrowed("")), + message: commit + .body_bytes() .map(String::from_utf8_lossy) .unwrap_or(Cow::Borrowed("")), - message: commit.body_bytes().map(String::from_utf8_lossy).unwrap_or(Cow::Borrowed("")), committer: committer.into(), author: author.into(), hash: CommitHash::Oid(commit.id()), @@ -32,7 +40,9 @@ } pub fn insert(&self, batch: &CommitTree, id: usize) { - batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()).unwrap(); + batch + .insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()) + .unwrap(); } } @@ -54,7 +64,10 @@ } impl Serialize for CommitHash<'_> { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { match self { CommitHash::Oid(v) => v.as_bytes().serialize(serializer), CommitHash::Bytes(v) => v.serialize(serializer), @@ -63,7 +76,10 @@ } impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { let bytes = <&'a [u8]>::deserialize(deserializer)?; Ok(Self::Bytes(bytes)) } @@ -97,14 +113,14 @@ } } -pub type CommitYoke = Yoke, Box>; +pub type YokedCommit = Yoked>; impl CommitTree { pub(super) fn new(tree: sled::Tree) -> Self { Self(tree) } - pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec { + pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec { let latest_key = if let Some((latest_key, _)) = self.last().unwrap() { let mut latest_key_bytes = [0; std::mem::size_of::()]; latest_key_bytes.copy_from_slice(&latest_key); @@ -119,8 +135,7 @@ let iter = self.range(start.to_be_bytes()..end.to_be_bytes()); tokio::task::spawn_blocking(move || { - iter - .rev() + iter.rev() .map(|res| { let (_, value) = res?; @@ -129,7 +144,10 @@ // to box the value as well to get a stablederef... let value = Box::new(value); - Ok(Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)).unwrap()) + Ok( + Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)) + .unwrap(), + ) }) .collect::, sled::Error>>() .unwrap() diff --git a/src/database/schema/mod.rs b/src/database/schema/mod.rs index ae2930f..16364b9 100644 --- a/src/database/schema/mod.rs +++ a/src/database/schema/mod.rs @@ -1,3 +1,8 @@ +use sled::IVec; +use yoke::Yoke; + pub mod commit; pub mod prefixes; pub mod repository; + +pub type Yoked = Yoke>; diff --git a/src/database/schema/repository.rs b/src/database/schema/repository.rs index 7a063b1..1889418 100644 --- a/src/database/schema/repository.rs +++ a/src/database/schema/repository.rs @@ -1,36 +1,53 @@ use crate::database::schema::commit::CommitTree; use crate::database::schema::prefixes::TreePrefix; +use crate::database::schema::Yoked; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use sled::IVec; +use std::borrow::Cow; +use std::collections::BTreeMap; use std::ops::Deref; use std::path::Path; use time::OffsetDateTime; +use yoke::{Yoke, Yokeable}; -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash)] -pub struct Repository { +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Yokeable)] +pub struct Repository<'a> { /// The ID of the repository, as stored in `sled` pub id: RepositoryId, /// The "clean name" of the repository (ie. `hello-world.git`) - pub name: String, + #[serde(borrow)] + pub name: Cow<'a, str>, /// The description of the repository, as it is stored in the `description` file in the /// bare repo root - pub description: Option, + #[serde(borrow)] + pub description: Option>, /// The owner of the repository (`gitweb.owner` in the repository configuration) - pub owner: Option, + #[serde(borrow)] + pub owner: Option>, /// The last time this repository was updated, currently read from the directory mtime pub last_modified: OffsetDateTime, } -impl Repository { - pub fn fetch_all(database: &sled::Db) -> HashMap { +pub type YokedRepository = Yoked>; + +impl Repository<'_> { + pub fn fetch_all(database: &sled::Db) -> BTreeMap { database .scan_prefix([TreePrefix::Repository as u8]) .filter_map(Result::ok) - .map(|(k, v)| { + .map(|(key, value)| { // strip the prefix we've just scanned for - let key = String::from_utf8_lossy(&k[1..]).to_string(); - let value = bincode::deserialize(&v).unwrap(); + let key = String::from_utf8_lossy(&key[1..]).to_string(); + + // internally value is an Arc so it should already be stablederef but because + // of reasons unbeknownst to me, sled has its own Arc implementation so we need + // to box the value as well to get a stablederef... + let value = Box::new(value); + let value = + Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)) + .unwrap(); + (key, value) }) .collect() @@ -45,11 +62,18 @@ .unwrap(); } - pub fn open>(database: &sled::Db, path: P) -> Option { + pub fn open>(database: &sled::Db, path: P) -> Option { database .get(TreePrefix::repository_id(path)) .unwrap() - .map(|v| bincode::deserialize(&v)) + .map(|value| { + // internally value is an Arc so it should already be stablederef but because + // of reasons unbeknownst to me, sled has its own Arc implementation so we need + // to box the value as well to get a stablederef... + let value = Box::new(value); + + Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)) + }) .transpose() .unwrap() } -- rgit 0.1.3