From 897feb1096d019ed3bcb97261c5a47845e60317d Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 17 Jul 2022 23:11:16 +0100 Subject: [PATCH] Zero-copy serialize/deserialization for commits to/from sled --- Cargo.lock | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/database/indexer.rs | 12 +++++------- src/methods/repo.rs | 8 +++++--- src/database/schema/commit.rs | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------- 5 files changed, 179 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b150182..24237ed 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -1734,6 +1734,7 @@ "tracing-subscriber", "unix_mode", "uuid", + "yoke", ] [[package]] @@ -1932,6 +1933,12 @@ ] [[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] name = "std_prelude" version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1959,6 +1966,18 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" + +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] [[package]] name = "syntect" @@ -2299,6 +2318,12 @@ checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] +name = "unicode-xid" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" + +[[package]] name = "unix_mode" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2542,6 +2567,51 @@ checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" dependencies = [ "linked-hash-map", +] + +[[package]] +name = "yoke" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5f3fb616365adc65cf20f6993b4c053a48a7807375648ab8a3127d1088befd5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c2c5bb7c929b85c1b9ec69091b0d835f0878b4fd9eb67973b25936e06c4374" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aed578cc7fa1c85290bdaca18fa5ac8a9365ddd9ed54af4380a6c5e13d9fc484" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8785f47d6062c1932866147f91297286a9f350b3070e9d9f0b6078e37d623c1a" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 26719fc..b15e34a 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -32,6 +32,7 @@ tracing-subscriber = "0.3" unix_mode = "0.1" uuid = { version = "1.1", features = ["v4"] } +yoke = { version = "0.6", features = ["derive"] } [build-dependencies] anyhow = "1.0" diff --git a/src/database/indexer.rs b/src/database/indexer.rs index 275f33e..7ab67d1 100644 --- a/src/database/indexer.rs +++ a/src/database/indexer.rs @@ -1,5 +1,4 @@ use git2::Sort; -use sled::Batch; use std::path::{Path, PathBuf}; use time::OffsetDateTime; use tracing::info; @@ -70,22 +69,21 @@ revwalk.set_sorting(Sort::REVERSE).unwrap(); revwalk.push_ref(reference).unwrap(); - let mut update_batch = Batch::default(); - let mut i = 0; for rev in revwalk { let commit = git_repository.find_commit(rev.unwrap()).unwrap(); - Commit::from(commit).insert(&mut update_batch, i); + let author = commit.author(); + let committer = commit.committer(); + + Commit::new(&commit, &author, &committer).insert(&commit_tree, i); i += 1; } // a complete and utter hack to remove potentially dropped commits from our tree, // we'll need to add `clear()` to sled's tx api to remove this for to_remove in (i + 1)..(i + 100) { - update_batch.remove(&to_remove.to_be_bytes()); + commit_tree.remove(&to_remove.to_be_bytes()).unwrap(); } - - commit_tree.apply_batch(update_batch).unwrap(); } } } diff --git a/src/methods/repo.rs b/src/methods/repo.rs index bd0b00c..12a1a8f 100644 --- a/src/methods/repo.rs +++ a/src/methods/repo.rs @@ -156,9 +156,9 @@ #[derive(Template)] #[template(path = "repo/log.html")] -pub struct LogView { +pub struct LogView<'a> { repo: Repository, - commits: Vec, + commits: Vec<&'a crate::database::schema::commit::Commit<'a>>, next_offset: Option, branch: Option, } @@ -173,7 +173,7 @@ let reference = format!("refs/heads/{}", query.branch.as_deref().unwrap_or("master")); let repository = crate::database::schema::repository::Repository::open(&db, &*repo).unwrap(); let commit_tree = repository.commit_tree(&db, &reference); - let mut commits = commit_tree.fetch_latest(101, offset); + let mut commits = commit_tree.fetch_latest(101, offset).await; let next_offset = if commits.len() == 101 { commits.pop(); @@ -181,6 +181,8 @@ } else { None }; + + let commits = commits.iter().map(|v| v.get()).collect(); into_response(&LogView { repo, diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs index 23279db..4ac6d28 100644 --- a/src/database/schema/commit.rs +++ a/src/database/schema/commit.rs @@ -1,51 +1,86 @@ -use git2::Signature; -use serde::{Deserialize, Serialize}; -use sled::Batch; +use std::borrow::Cow; +use git2::{Oid, Signature}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use sled::IVec; use std::ops::Deref; use time::OffsetDateTime; +use yoke::{Yoke, Yokeable}; -#[derive(Serialize, Deserialize, Debug)] -pub struct Commit { - pub summary: String, - pub message: String, - pub author: Author, - pub committer: Author, - pub hash: Vec, -} - -impl From> for Commit { - fn from(commit: git2::Commit<'_>) -> Self { - Commit { +#[derive(Serialize, Deserialize, Debug, Yokeable)] +pub struct Commit<'a> { + #[serde(borrow)] + pub summary: Cow<'a, str>, + #[serde(borrow)] + pub message: Cow<'a, str>, + pub author: Author<'a>, + pub committer: Author<'a>, + pub hash: CommitHash<'a>, +} + +impl<'a> Commit<'a> { + pub fn new(commit: &'a git2::Commit<'_>, author: &'a git2::Signature<'_>, committer: &'a git2::Signature<'_>) -> Self { + Self { summary: commit - .summary() - .map(ToString::to_string) - .unwrap_or_default(), - message: commit.body().map(ToString::to_string).unwrap_or_default(), - committer: commit.committer().into(), - author: commit.author().into(), - hash: commit.id().as_bytes().to_vec(), + .summary_bytes() + .map(String::from_utf8_lossy) + .unwrap_or(Cow::Borrowed("")), + message: commit.body_bytes().map(String::from_utf8_lossy).unwrap_or(Cow::Borrowed("")), + committer: committer.into(), + author: author.into(), + hash: CommitHash::Oid(commit.id()), + } + } + + pub fn insert(&self, batch: &CommitTree, id: usize) { + batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()).unwrap(); + } +} + +#[derive(Debug)] +pub enum CommitHash<'a> { + Oid(Oid), + Bytes(&'a [u8]), +} + +impl<'a> Deref for CommitHash<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + CommitHash::Oid(v) => v.as_bytes(), + CommitHash::Bytes(v) => v, + } + } +} + +impl Serialize for CommitHash<'_> { + fn serialize(&self, serializer: S) -> Result where S: Serializer { + match self { + CommitHash::Oid(v) => v.as_bytes().serialize(serializer), + CommitHash::Bytes(v) => v.serialize(serializer), } } } -impl Commit { - pub fn insert(&self, batch: &mut Batch, id: usize) { - batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()); +impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + let bytes = <&'a [u8]>::deserialize(deserializer)?; + Ok(Self::Bytes(bytes)) } } #[derive(Serialize, Deserialize, Debug)] -pub struct Author { - pub name: String, - pub email: String, +pub struct Author<'a> { + pub name: Cow<'a, str>, + pub email: Cow<'a, str>, pub time: OffsetDateTime, } -impl From> for Author { - fn from(author: Signature<'_>) -> Self { +impl<'a> From<&'a git2::Signature<'_>> for Author<'a> { + fn from(author: &'a Signature<'_>) -> Self { Self { - name: author.name().map(ToString::to_string).unwrap_or_default(), - email: author.email().map(ToString::to_string).unwrap_or_default(), + name: String::from_utf8_lossy(author.name_bytes()), + email: String::from_utf8_lossy(author.email_bytes()), // TODO: this needs to deal with offset time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(), } @@ -61,29 +96,45 @@ &self.0 } } + +pub type CommitYoke = Yoke, Box>; impl CommitTree { pub(super) fn new(tree: sled::Tree) -> Self { Self(tree) } - pub fn fetch_latest(&self, amount: usize, offset: usize) -> Vec { - let (latest_key, _) = self.last().unwrap().unwrap(); - let mut latest_key_bytes = [0; std::mem::size_of::()]; - latest_key_bytes.copy_from_slice(&latest_key); + pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec { + let latest_key = if let Some((latest_key, _)) = self.last().unwrap() { + let mut latest_key_bytes = [0; std::mem::size_of::()]; + latest_key_bytes.copy_from_slice(&latest_key); + usize::from_be_bytes(latest_key_bytes) + } else { + return vec![]; + }; - let end = usize::from_be_bytes(latest_key_bytes).saturating_sub(offset); + let end = latest_key.saturating_sub(offset); let start = end.saturating_sub(amount); + + let iter = self.range(start.to_be_bytes()..end.to_be_bytes()); + + tokio::task::spawn_blocking(move || { + iter + .rev() + .map(|res| { + let (_, value) = res?; + + // internally value is an Arc so it should already be stablederef but because + // of reasons unbeknownst to me, sled has its own Arc implementation so we need + // to box the value as well to get a stablederef... + let value = Box::new(value); - self.range(start.to_be_bytes()..end.to_be_bytes()) - .rev() - .map(|res| { - let (_, value) = res?; - let details = bincode::deserialize(&value).unwrap(); - - Ok(details) - }) - .collect::, sled::Error>>() - .unwrap() + Ok(Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)).unwrap()) + }) + .collect::, sled::Error>>() + .unwrap() + }) + .await + .unwrap() } } -- rgit 0.1.3