Zero-copy serialize/deserialization for commits to/from sled
Diff
Cargo.lock | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Cargo.toml | 1 +
src/database/indexer.rs | 12 +++++-------
src/methods/repo.rs | 8 +++++---
src/database/schema/commit.rs | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
5 files changed, 179 insertions(+), 57 deletions(-)
@@ -1734,6 +1734,7 @@
"tracing-subscriber",
"unix_mode",
"uuid",
"yoke",
]
[[package]]
@@ -1932,6 +1933,12 @@
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "std_prelude"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1959,6 +1966,18 @@
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"
[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn",
"unicode-xid",
]
[[package]]
name = "syntect"
@@ -2299,6 +2318,12 @@
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
[[package]]
name = "unicode-xid"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04"
[[package]]
name = "unix_mode"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2542,6 +2567,51 @@
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]
[[package]]
name = "yoke"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5f3fb616365adc65cf20f6993b4c053a48a7807375648ab8a3127d1088befd5"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58c2c5bb7c929b85c1b9ec69091b0d835f0878b4fd9eb67973b25936e06c4374"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerofrom"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aed578cc7fa1c85290bdaca18fa5ac8a9365ddd9ed54af4380a6c5e13d9fc484"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8785f47d6062c1932866147f91297286a9f350b3070e9d9f0b6078e37d623c1a"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
@@ -32,6 +32,7 @@
tracing-subscriber = "0.3"
unix_mode = "0.1"
uuid = { version = "1.1", features = ["v4"] }
yoke = { version = "0.6", features = ["derive"] }
[build-dependencies]
anyhow = "1.0"
@@ -1,5 +1,4 @@
use git2::Sort;
use sled::Batch;
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use tracing::info;
@@ -70,22 +69,21 @@
revwalk.set_sorting(Sort::REVERSE).unwrap();
revwalk.push_ref(reference).unwrap();
let mut update_batch = Batch::default();
let mut i = 0;
for rev in revwalk {
let commit = git_repository.find_commit(rev.unwrap()).unwrap();
Commit::from(commit).insert(&mut update_batch, i);
let author = commit.author();
let committer = commit.committer();
Commit::new(&commit, &author, &committer).insert(&commit_tree, i);
i += 1;
}
for to_remove in (i + 1)..(i + 100) {
update_batch.remove(&to_remove.to_be_bytes());
commit_tree.remove(&to_remove.to_be_bytes()).unwrap();
}
commit_tree.apply_batch(update_batch).unwrap();
}
}
}
@@ -156,9 +156,9 @@
#[derive(Template)]
#[template(path = "repo/log.html")]
pub struct LogView {
pub struct LogView<'a> {
repo: Repository,
commits: Vec<crate::database::schema::commit::Commit>,
commits: Vec<&'a crate::database::schema::commit::Commit<'a>>,
next_offset: Option<usize>,
branch: Option<String>,
}
@@ -173,7 +173,7 @@
let reference = format!("refs/heads/{}", query.branch.as_deref().unwrap_or("master"));
let repository = crate::database::schema::repository::Repository::open(&db, &*repo).unwrap();
let commit_tree = repository.commit_tree(&db, &reference);
let mut commits = commit_tree.fetch_latest(101, offset);
let mut commits = commit_tree.fetch_latest(101, offset).await;
let next_offset = if commits.len() == 101 {
commits.pop();
@@ -181,6 +181,8 @@
} else {
None
};
let commits = commits.iter().map(|v| v.get()).collect();
into_response(&LogView {
repo,
@@ -1,51 +1,86 @@
use git2::Signature;
use serde::{Deserialize, Serialize};
use sled::Batch;
use std::borrow::Cow;
use git2::{Oid, Signature};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use sled::IVec;
use std::ops::Deref;
use time::OffsetDateTime;
use yoke::{Yoke, Yokeable};
#[derive(Serialize, Deserialize, Debug)]
pub struct Commit {
pub summary: String,
pub message: String,
pub author: Author,
pub committer: Author,
pub hash: Vec<u8>,
}
impl From<git2::Commit<'_>> for Commit {
fn from(commit: git2::Commit<'_>) -> Self {
Commit {
#[derive(Serialize, Deserialize, Debug, Yokeable)]
pub struct Commit<'a> {
#[serde(borrow)]
pub summary: Cow<'a, str>,
#[serde(borrow)]
pub message: Cow<'a, str>,
pub author: Author<'a>,
pub committer: Author<'a>,
pub hash: CommitHash<'a>,
}
impl<'a> Commit<'a> {
pub fn new(commit: &'a git2::Commit<'_>, author: &'a git2::Signature<'_>, committer: &'a git2::Signature<'_>) -> Self {
Self {
summary: commit
.summary()
.map(ToString::to_string)
.unwrap_or_default(),
message: commit.body().map(ToString::to_string).unwrap_or_default(),
committer: commit.committer().into(),
author: commit.author().into(),
hash: commit.id().as_bytes().to_vec(),
.summary_bytes()
.map(String::from_utf8_lossy)
.unwrap_or(Cow::Borrowed("")),
message: commit.body_bytes().map(String::from_utf8_lossy).unwrap_or(Cow::Borrowed("")),
committer: committer.into(),
author: author.into(),
hash: CommitHash::Oid(commit.id()),
}
}
pub fn insert(&self, batch: &CommitTree, id: usize) {
batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()).unwrap();
}
}
#[derive(Debug)]
pub enum CommitHash<'a> {
Oid(Oid),
Bytes(&'a [u8]),
}
impl<'a> Deref for CommitHash<'a> {
type Target = [u8];
fn deref(&self) -> &Self::Target {
match self {
CommitHash::Oid(v) => v.as_bytes(),
CommitHash::Bytes(v) => v,
}
}
}
impl Serialize for CommitHash<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
match self {
CommitHash::Oid(v) => v.as_bytes().serialize(serializer),
CommitHash::Bytes(v) => v.serialize(serializer),
}
}
}
impl Commit {
pub fn insert(&self, batch: &mut Batch, id: usize) {
batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap());
impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
let bytes = <&'a [u8]>::deserialize(deserializer)?;
Ok(Self::Bytes(bytes))
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Author {
pub name: String,
pub email: String,
pub struct Author<'a> {
pub name: Cow<'a, str>,
pub email: Cow<'a, str>,
pub time: OffsetDateTime,
}
impl From<git2::Signature<'_>> for Author {
fn from(author: Signature<'_>) -> Self {
impl<'a> From<&'a git2::Signature<'_>> for Author<'a> {
fn from(author: &'a Signature<'_>) -> Self {
Self {
name: author.name().map(ToString::to_string).unwrap_or_default(),
email: author.email().map(ToString::to_string).unwrap_or_default(),
name: String::from_utf8_lossy(author.name_bytes()),
email: String::from_utf8_lossy(author.email_bytes()),
time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(),
}
@@ -61,29 +96,45 @@
&self.0
}
}
pub type CommitYoke = Yoke<Commit<'static>, Box<IVec>>;
impl CommitTree {
pub(super) fn new(tree: sled::Tree) -> Self {
Self(tree)
}
pub fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<Commit> {
let (latest_key, _) = self.last().unwrap().unwrap();
let mut latest_key_bytes = [0; std::mem::size_of::<usize>()];
latest_key_bytes.copy_from_slice(&latest_key);
pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<CommitYoke> {
let latest_key = if let Some((latest_key, _)) = self.last().unwrap() {
let mut latest_key_bytes = [0; std::mem::size_of::<usize>()];
latest_key_bytes.copy_from_slice(&latest_key);
usize::from_be_bytes(latest_key_bytes)
} else {
return vec![];
};
let end = usize::from_be_bytes(latest_key_bytes).saturating_sub(offset);
let end = latest_key.saturating_sub(offset);
let start = end.saturating_sub(amount);
let iter = self.range(start.to_be_bytes()..end.to_be_bytes());
tokio::task::spawn_blocking(move || {
iter
.rev()
.map(|res| {
let (_, value) = res?;
let value = Box::new(value);
self.range(start.to_be_bytes()..end.to_be_bytes())
.rev()
.map(|res| {
let (_, value) = res?;
let details = bincode::deserialize(&value).unwrap();
Ok(details)
})
.collect::<Result<Vec<_>, sled::Error>>()
.unwrap()
Ok(Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)).unwrap())
})
.collect::<Result<Vec<_>, sled::Error>>()
.unwrap()
})
.await
.unwrap()
}
}