🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2022-07-17 23:28:39.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-07-17 23:28:39.0 +01:00:00
commit
2753b229df245063be41bd7107a2c83832fbe862 [patch]
tree
7b8421c569f88589a41cd930326967ed9252a28c
parent
897feb1096d019ed3bcb97261c5a47845e60317d
download
2753b229df245063be41bd7107a2c83832fbe862.tar.gz

Zero-copy serialize/deserialization for repositories to/from sled



Diff

 src/database/indexer.rs           | 15 +++++----------
 src/methods/index.rs              | 11 ++++++-----
 src/methods/repo.rs               |  2 +-
 src/database/schema/commit.rs     | 40 +++++++++++++++++++++++++++++++++++++---
 src/database/schema/mod.rs        |  5 +++++
 src/database/schema/repository.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++-----
 6 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index 7ab67d1..f4c76dd 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -22,16 +22,11 @@
        let relative = get_relative_path(scan_path, &repository);

        let id = Repository::open(db, relative)
            .map(|v| v.id)
            .map(|v| v.get().id)
            .unwrap_or_else(|| RepositoryId::new(db));
        let name = relative.file_name().unwrap().to_string_lossy().to_string();
        let description = Some(
            String::from_utf8_lossy(
                &std::fs::read(repository.join("description")).unwrap_or_default(),
            )
            .to_string(),
        )
        .filter(|v| !v.is_empty());
        let name = relative.file_name().unwrap().to_string_lossy();
        let description = std::fs::read(repository.join("description")).unwrap_or_default();
        let description = Some(String::from_utf8_lossy(&description)).filter(|v| !v.is_empty());

        Repository {
            id,
@@ -62,7 +57,7 @@

            info!("Updating indexes for {} on {}", reference, relative_path);

            let commit_tree = db_repository.commit_tree(db, reference);
            let commit_tree = db_repository.get().commit_tree(db, reference);

            // TODO: only scan revs from the last time we looked
            let mut revwalk = git_repository.revwalk().unwrap();
diff --git a/src/methods/index.rs b/src/methods/index.rs
index a10ed46..dcf7c67 100644
--- a/src/methods/index.rs
+++ a/src/methods/index.rs
@@ -10,21 +10,22 @@

#[derive(Template)]
#[template(path = "index.html")]
pub struct View {
    pub repositories: BTreeMap<Option<String>, Vec<Repository>>,
pub struct View<'a> {
    pub repositories: BTreeMap<Option<String>, Vec<&'a Repository<'a>>>,
}

pub async fn handle(Extension(db): Extension<sled::Db>) -> Response {
    let mut repositories: BTreeMap<Option<String>, Vec<Repository>> = BTreeMap::new();
    let mut repositories: BTreeMap<Option<String>, Vec<&Repository<'_>>> = BTreeMap::new();

    for (k, v) in Repository::fetch_all(&db) {
    let fetched = Repository::fetch_all(&db);
    for (k, v) in fetched.iter() {
        // TODO: fixme
        let mut split: Vec<_> = k.split('/').collect();
        split.pop();
        let key = Some(split.join("/")).filter(|v| !v.is_empty());

        let k = repositories.entry(key).or_default();
        k.push(v);
        k.push(v.get());
    }

    into_response(&View { repositories })
diff --git a/src/methods/repo.rs b/src/methods/repo.rs
index 12a1a8f..6964b03 100644
--- a/src/methods/repo.rs
+++ a/src/methods/repo.rs
@@ -172,7 +172,7 @@

    let reference = format!("refs/heads/{}", query.branch.as_deref().unwrap_or("master"));
    let repository = crate::database::schema::repository::Repository::open(&db, &*repo).unwrap();
    let commit_tree = repository.commit_tree(&db, &reference);
    let commit_tree = repository.get().commit_tree(&db, &reference);
    let mut commits = commit_tree.fetch_latest(101, offset).await;

    let next_offset = if commits.len() == 101 {
diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs
index 4ac6d28..47627d9 100644
--- a/src/database/schema/commit.rs
+++ a/src/database/schema/commit.rs
@@ -1,7 +1,8 @@
use std::borrow::Cow;
use crate::database::schema::Yoked;
use git2::{Oid, Signature};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use sled::IVec;
use std::borrow::Cow;
use std::ops::Deref;
use time::OffsetDateTime;
use yoke::{Yoke, Yokeable};
@@ -18,13 +19,20 @@
}

impl<'a> Commit<'a> {
    pub fn new(commit: &'a git2::Commit<'_>, author: &'a git2::Signature<'_>, committer: &'a git2::Signature<'_>) -> Self {
    pub fn new(
        commit: &'a git2::Commit<'_>,
        author: &'a git2::Signature<'_>,
        committer: &'a git2::Signature<'_>,
    ) -> Self {
        Self {
            summary: commit
                .summary_bytes()
                .map(String::from_utf8_lossy)
                .unwrap_or(Cow::Borrowed("")),
            message: commit
                .body_bytes()
                .map(String::from_utf8_lossy)
                .unwrap_or(Cow::Borrowed("")),
            message: commit.body_bytes().map(String::from_utf8_lossy).unwrap_or(Cow::Borrowed("")),
            committer: committer.into(),
            author: author.into(),
            hash: CommitHash::Oid(commit.id()),
@@ -32,7 +40,9 @@
    }

    pub fn insert(&self, batch: &CommitTree, id: usize) {
        batch.insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()).unwrap();
        batch
            .insert(&id.to_be_bytes(), bincode::serialize(self).unwrap())
            .unwrap();
    }
}

@@ -54,7 +64,10 @@
}

impl Serialize for CommitHash<'_> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        match self {
            CommitHash::Oid(v) => v.as_bytes().serialize(serializer),
            CommitHash::Bytes(v) => v.serialize(serializer),
@@ -63,7 +76,10 @@
}

impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        let bytes = <&'a [u8]>::deserialize(deserializer)?;
        Ok(Self::Bytes(bytes))
    }
@@ -97,14 +113,14 @@
    }
}

pub type CommitYoke = Yoke<Commit<'static>, Box<IVec>>;
pub type YokedCommit = Yoked<Commit<'static>>;

impl CommitTree {
    pub(super) fn new(tree: sled::Tree) -> Self {
        Self(tree)
    }

    pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<CommitYoke> {
    pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec<YokedCommit> {
        let latest_key = if let Some((latest_key, _)) = self.last().unwrap() {
            let mut latest_key_bytes = [0; std::mem::size_of::<usize>()];
            latest_key_bytes.copy_from_slice(&latest_key);
@@ -119,8 +135,7 @@
        let iter = self.range(start.to_be_bytes()..end.to_be_bytes());

        tokio::task::spawn_blocking(move || {
            iter
                .rev()
            iter.rev()
                .map(|res| {
                    let (_, value) = res?;

@@ -129,7 +144,10 @@
                    // to box the value as well to get a stablederef...
                    let value = Box::new(value);

                    Ok(Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data)).unwrap())
                    Ok(
                        Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data))
                            .unwrap(),
                    )
                })
                .collect::<Result<Vec<_>, sled::Error>>()
                .unwrap()
diff --git a/src/database/schema/mod.rs b/src/database/schema/mod.rs
index ae2930f..16364b9 100644
--- a/src/database/schema/mod.rs
+++ a/src/database/schema/mod.rs
@@ -1,3 +1,8 @@
use sled::IVec;
use yoke::Yoke;

pub mod commit;
pub mod prefixes;
pub mod repository;

pub type Yoked<T> = Yoke<T, Box<IVec>>;
diff --git a/src/database/schema/repository.rs b/src/database/schema/repository.rs
index 7a063b1..1889418 100644
--- a/src/database/schema/repository.rs
+++ a/src/database/schema/repository.rs
@@ -1,36 +1,53 @@
use crate::database::schema::commit::CommitTree;
use crate::database::schema::prefixes::TreePrefix;
use crate::database::schema::Yoked;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use sled::IVec;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::ops::Deref;
use std::path::Path;
use time::OffsetDateTime;
use yoke::{Yoke, Yokeable};

#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
pub struct Repository {
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Yokeable)]
pub struct Repository<'a> {
    /// The ID of the repository, as stored in `sled`

    pub id: RepositoryId,
    /// The "clean name" of the repository (ie. `hello-world.git`)

    pub name: String,
    #[serde(borrow)]
    pub name: Cow<'a, str>,
    /// The description of the repository, as it is stored in the `description` file in the

    /// bare repo root

    pub description: Option<String>,
    #[serde(borrow)]
    pub description: Option<Cow<'a, str>>,
    /// The owner of the repository (`gitweb.owner` in the repository configuration)

    pub owner: Option<String>,
    #[serde(borrow)]
    pub owner: Option<Cow<'a, str>>,
    /// The last time this repository was updated, currently read from the directory mtime

    pub last_modified: OffsetDateTime,
}

impl Repository {
    pub fn fetch_all(database: &sled::Db) -> HashMap<String, Repository> {
pub type YokedRepository = Yoked<Repository<'static>>;

impl Repository<'_> {
    pub fn fetch_all(database: &sled::Db) -> BTreeMap<String, YokedRepository> {
        database
            .scan_prefix([TreePrefix::Repository as u8])
            .filter_map(Result::ok)
            .map(|(k, v)| {
            .map(|(key, value)| {
                // strip the prefix we've just scanned for
                let key = String::from_utf8_lossy(&k[1..]).to_string();
                let value = bincode::deserialize(&v).unwrap();
                let key = String::from_utf8_lossy(&key[1..]).to_string();

                // internally value is an Arc so it should already be stablederef but because
                // of reasons unbeknownst to me, sled has its own Arc implementation so we need
                // to box the value as well to get a stablederef...
                let value = Box::new(value);

                let value =
                    Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data))
                        .unwrap();

                (key, value)
            })
            .collect()
@@ -45,11 +62,18 @@
            .unwrap();
    }

    pub fn open<P: AsRef<Path>>(database: &sled::Db, path: P) -> Option<Repository> {
    pub fn open<P: AsRef<Path>>(database: &sled::Db, path: P) -> Option<YokedRepository> {
        database
            .get(TreePrefix::repository_id(path))
            .unwrap()
            .map(|v| bincode::deserialize(&v))
            .map(|value| {
                // internally value is an Arc so it should already be stablederef but because
                // of reasons unbeknownst to me, sled has its own Arc implementation so we need
                // to box the value as well to get a stablederef...
                let value = Box::new(value);

                Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(&data))
            })
            .transpose()
            .unwrap()
    }