🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2022-07-17 23:11:16.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-07-17 23:11:16.0 +01:00:00
commit
897feb1096d019ed3bcb97261c5a47845e60317d [patch]
tree
1ee63b59a5340d1590a12bb6695890bd1b73d506
parent
4dc5fc9d8039035e9af01e4c04b9c865f0b3bf6e
download
897feb1096d019ed3bcb97261c5a47845e60317d.tar.gz

Zero-copy serialize/deserialization for commits to/from sled



Diff

 Cargo.lock                    |  70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Cargo.toml                    |   1 +
 src/database/indexer.rs       |  12 +++++-------
 src/methods/repo.rs           |   8 +++++---
 src/database/schema/commit.rs | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
 5 files changed, 179 insertions(+), 57 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b150182..24237ed 100644
--- a/Cargo.lock
+++ a/Cargo.lock
@@ -1734,6 +1734,7 @@
 "tracing-subscriber",
 "unix_mode",
 "uuid",
 "yoke",
]

[[package]]
@@ -1932,6 +1933,12 @@
]

[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"

[[package]]
name = "std_prelude"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1959,6 +1966,18 @@
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"

[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 "unicode-xid",
]

[[package]]
name = "syntect"
@@ -2299,6 +2318,12 @@
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"

[[package]]
name = "unicode-xid"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04"

[[package]]
name = "unix_mode"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2542,6 +2567,51 @@
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
 "linked-hash-map",
]

[[package]]
name = "yoke"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5f3fb616365adc65cf20f6993b4c053a48a7807375648ab8a3127d1088befd5"
dependencies = [
 "serde",
 "stable_deref_trait",
 "yoke-derive",
 "zerofrom",
]

[[package]]
name = "yoke-derive"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58c2c5bb7c929b85c1b9ec69091b0d835f0878b4fd9eb67973b25936e06c4374"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 "synstructure",
]

[[package]]
name = "zerofrom"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aed578cc7fa1c85290bdaca18fa5ac8a9365ddd9ed54af4380a6c5e13d9fc484"
dependencies = [
 "zerofrom-derive",
]

[[package]]
name = "zerofrom-derive"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8785f47d6062c1932866147f91297286a9f350b3070e9d9f0b6078e37d623c1a"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 "synstructure",
]

[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 26719fc..b15e34a 100644
--- a/Cargo.toml
+++ a/Cargo.toml
@@ -32,6 +32,7 @@
tracing-subscriber = "0.3"
unix_mode = "0.1"
uuid = { version = "1.1", features = ["v4"] }
yoke = { version = "0.6", features = ["derive"] }

[build-dependencies]
anyhow = "1.0"
diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index 275f33e..7ab67d1 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -1,5 +1,4 @@
use git2::Sort;
use sled::Batch;
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use tracing::info;
@@ -70,22 +69,21 @@
            revwalk.set_sorting(Sort::REVERSE).unwrap();
            revwalk.push_ref(reference).unwrap();

            let mut update_batch = Batch::default();

            let mut i = 0;
            for rev in revwalk {
                let commit = git_repository.find_commit(rev.unwrap()).unwrap();
                Commit::from(commit).insert(&mut update_batch, i);
                let author = commit.author();
                let committer = commit.committer();

                Commit::new(&commit, &author, &committer).insert(&commit_tree, i);
                i += 1;
            }

            // a complete and utter hack to remove potentially dropped commits from our tree,
            // we'll need to add `clear()` to sled's tx api to remove this
            for to_remove in (i + 1)..(i + 100) {
                update_batch.remove(&to_remove.to_be_bytes());
                commit_tree.remove(&to_remove.to_be_bytes()).unwrap();
            }

            commit_tree.apply_batch(update_batch).unwrap();
        }
    }
}
diff --git a/src/methods/repo.rs b/src/methods/repo.rs
index bd0b00c..12a1a8f 100644
--- a/src/methods/repo.rs
+++ a/src/methods/repo.rs
@@ -156,9 +156,9 @@

#[derive(Template)]
#[template(path = "repo/log.html")]
pub struct LogView {
pub struct LogView<'a> {
    repo: Repository,
    commits: Vec<crate::database::schema::commit::Commit>,
    commits: Vec<&'a crate::database::schema::commit::Commit<'a>>,
    next_offset: Option<usize>,
    branch: Option<String>,
}
@@ -173,7 +173,7 @@
    let reference = format!("refs/heads/{}", query.branch.as_deref().unwrap_or("master"));
    let repository = crate::database::schema::repository::Repository::open(&db, &*repo).unwrap();
    let commit_tree = repository.commit_tree(&db, &reference);
    let mut commits = commit_tree.fetch_latest(101, offset);
    let mut commits = commit_tree.fetch_latest(101, offset).await;

    let next_offset = if commits.len() == 101 {
        commits.pop();
@@ -181,6 +181,8 @@
    } else {
        None
    };

    let commits = commits.iter().map(|v| v.get()).collect();

    into_response(&LogView {
        repo,
diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs
index 23279db..4ac6d28 100644
--- a/src/database/schema/commit.rs
+++ a/src/database/schema/commit.rs
@@ -1,51 +1,86 @@
use git2::Signature;
use serde::{Deserialize, Serialize};
use sled::Batch;
use std::borrow::Cow;
use git2::{Oid, Signature};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use sled::IVec;
use std::ops::Deref;
use time::OffsetDateTime;
use yoke::{Yoke, Yokeable};

#[derive(Serialize, Deserialize, Debug)]
pub struct Commit {
    pub summary: String,
    pub message: String,
    pub author: Author,
    pub committer: Author,
    pub hash: Vec<u8>,
}

impl From<git2::Commit<'_>> for Commit {
    fn from(commit: git2::Commit<'_>) -> Self {
        Commit {
#[derive(Serialize, Deserialize, Debug, Yokeable)]
pub struct Commit<'a> {
    #[serde(borrow)]
    pub summary: Cow<'a, str>,
    #[serde(borrow)]
    pub message: Cow<'a, str>,
    pub author: Author<'a>,
    pub committer: Author<'a>,
    pub hash: CommitHash<'a>,
}

impl<'a> Commit<'a> {
    pub fn new(commit: &'a git2::Commit<'_>, author: &'a git2::Signature<'_>, committer: &'a git2::Signature<'_>) -> Self {
        Self {
            summary: commit
                .summary()
                .map(ToString::to_string)
                .unwrap_or_default(),
            message: commit.body().map(ToString::to_string).unwrap_or_default(),
            committer: commit.committer().into(),
            author: commit.author().into(),
            hash: commit.id().as_bytes().to_vec(),
                .summary_bytes()
                .map(String::from_utf8_lossy)
                .unwrap_or(Cow::Borrowed("")),
            message: commit.body_bytes().map(String::from_utf8_lossy).unwrap_or(Cow::Borrowed("")),
            committer: committer.into(),
            author: author.into(),
            hash: CommitHash::Oid(commit.id()),
        }
    }

    pub fn insert(&self, batch: &CommitTree, id: usize) {
        batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()).unwrap();
    }
}

#[derive(Debug)]
pub enum CommitHash<'a> {
    Oid(Oid),
    Bytes(&'a [u8]),
}

impl<'a> Deref for CommitHash<'a> {
    type Target = [u8];

    fn deref(&self) -> &Self::Target {
        match self {
            CommitHash::Oid(v) => v.as_bytes(),
            CommitHash::Bytes(v) => v,
        }
    }
}

impl Serialize for CommitHash<'_> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
        match self {
            CommitHash::Oid(v) => v.as_bytes().serialize(serializer),
            CommitHash::Bytes(v) => v.serialize(serializer),
        }
    }
}

impl Commit {
    pub fn insert(&self, batch: &mut Batch, id: usize) {
        batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap());
impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
        let bytes = <&'a [u8]>::deserialize(deserializer)?;
        Ok(Self::Bytes(bytes))
    }
}

#[derive(Serialize, Deserialize, Debug)]
pub struct Author {
    pub name: String,
    pub email: String,
pub struct Author<'a> {
    pub name: Cow<'a, str>,
    pub email: Cow<'a, str>,
    pub time: OffsetDateTime,
}

impl From<git2::Signature<'_>> for Author {
    fn from(author: Signature<'_>) -> Self {
impl<'a> From<&'a git2::Signature<'_>> for Author<'a> {
    fn from(author: &'a Signature<'_>) -> Self {
        Self {
            name: author.name().map(ToString::to_string).unwrap_or_default(),
            email: author.email().map(ToString::to_string).unwrap_or_default(),
            name: String::from_utf8_lossy(author.name_bytes()),
            email: String::from_utf8_lossy(author.email_bytes()),
            // TODO: this needs to deal with offset
            time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(),
        }
@@ -61,29 +96,45 @@
        &self.0
    }
}

pub type CommitYoke = Yoke<Commit<'static>, Box<IVec>>;

impl CommitTree {
    pub(super) fn new(tree: sled::Tree) -> Self {
        Self(tree)
    }

    pub fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<Commit> {
        let (latest_key, _) = self.last().unwrap().unwrap();
        let mut latest_key_bytes = [0; std::mem::size_of::<usize>()];
        latest_key_bytes.copy_from_slice(&latest_key);
    pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<CommitYoke> {
        let latest_key = if let Some((latest_key, _)) = self.last().unwrap() {
            let mut latest_key_bytes = [0; std::mem::size_of::<usize>()];
            latest_key_bytes.copy_from_slice(&latest_key);
            usize::from_be_bytes(latest_key_bytes)
        } else {
            return vec![];
        };

        let end = usize::from_be_bytes(latest_key_bytes).saturating_sub(offset);
        let end = latest_key.saturating_sub(offset);
        let start = end.saturating_sub(amount);

        let iter = self.range(start.to_be_bytes()..end.to_be_bytes());

        tokio::task::spawn_blocking(move || {
            iter
                .rev()
                .map(|res| {
                    let (_, value) = res?;

                    // internally value is an Arc so it should already be stablederef but because
                    // of reasons unbeknownst to me, sled has its own Arc implementation so we need
                    // to box the value as well to get a stablederef...
                    let value = Box::new(value);

        self.range(start.to_be_bytes()..end.to_be_bytes())
            .rev()
            .map(|res| {
                let (_, value) = res?;
                let details = bincode::deserialize(&value).unwrap();

                Ok(details)
            })
            .collect::<Result<Vec<_>, sled::Error>>()
            .unwrap()
                    Ok(Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)).unwrap())
                })
                .collect::<Result<Vec<_>, sled::Error>>()
                .unwrap()
        })
        .await
        .unwrap()
    }
}