🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2022-07-17 21:12:08.0 +01:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-07-17 21:12:08.0 +01:00:00
commit
595fd116b0c71b623d13dafe6d9d5f2cd4526ff3 [patch]
tree
8aca3874965ecfdb154f3850bf804d12b4bb1906
parent
471ec711f568869a0b65c01390b8f39f0d82bc84
download
595fd116b0c71b623d13dafe6d9d5f2cd4526ff3.tar.gz

Update commit index in a transaction



Diff

 src/database/indexer.rs       | 96 ++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
 src/database/schema/commit.rs | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index 5e42a29..926ee69 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -1,9 +1,12 @@
use git2::Sort;
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use tracing::info;

use crate::database::schema::commit::{Author, Commit};
use crate::database::schema::repository::{Repository, RepositoryId};
use crate::database::schema::{
    commit::Commit,
    repository::{Repository, RepositoryId},
};

pub fn run_indexer(db: &sled::Db) {
    let scan_path = Path::new("/Users/jordan/Code/test-git");
@@ -43,7 +46,7 @@

fn update_repository_reflog(scan_path: &Path, db: &sled::Db) {
    for (relative_path, db_repository) in Repository::fetch_all(&db) {
        let git_repository = git2::Repository::open(scan_path.join(relative_path)).unwrap();
        let git_repository = git2::Repository::open(scan_path.join(&relative_path)).unwrap();

        for reference in git_repository.references().unwrap() {
            let reference = if let Some(reference) = reference.as_ref().ok().and_then(|v| v.name())
@@ -53,61 +56,44 @@
                continue;
            };

            let commit_tree = db_repository.commit_tree(db, reference);
            if !reference.starts_with("refs/heads/") {
                continue;
            }

            // TODO: only scan revs from the last time we looked
            let mut revwalk = git_repository.revwalk().unwrap();
            revwalk.set_sorting(Sort::REVERSE).unwrap();
            revwalk.push_ref(reference).unwrap();

            let mut i = 0;

            for rev in revwalk {
                let rev = rev.unwrap();
                let commit = if let Ok(commit) = git_repository.find_commit(rev) {
                    commit
                } else {
                    continue;
                };

                let author = commit.author();
                let committer = commit.committer();

                // TODO: all these unwrap_or_defaults need to properly handle non-utf8 data
                let author = Author {
                    name: author.name().map(ToString::to_string).unwrap_or_default(),
                    email: author.email().map(ToString::to_string).unwrap_or_default(),
                    // TODO: this needs to deal with offset
                    time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(),
                };
                let committer = Author {
                    name: committer
                        .name()
                        .map(ToString::to_string)
                        .unwrap_or_default(),
                    email: committer
                        .email()
                        .map(ToString::to_string)
                        .unwrap_or_default(),
                    // TODO: this needs to deal with offset
                    time: OffsetDateTime::from_unix_timestamp(committer.when().seconds()).unwrap(),
                };

                let db_commit = Commit {
                    summary: commit
                        .summary()
                        .map(ToString::to_string)
                        .unwrap_or_default(),
                    message: commit.body().map(ToString::to_string).unwrap_or_default(),
                    committer,
                    author,
                    hash: commit.id().as_bytes().to_vec(),
                };
            info!("Updating indexes for {} on {}", reference, relative_path);

                i += 1;
            let commit_tree = db_repository.commit_tree(db, reference);

                db_commit.insert(&commit_tree, i);
            }
            commit_tree
                .transaction::<_, _, std::io::Error>(|tx| {
                    // TODO: only scan revs from the last time we looked
                    let mut revwalk = git_repository.revwalk().unwrap();
                    revwalk.set_sorting(Sort::REVERSE).unwrap();
                    revwalk.push_ref(reference).unwrap();

                    let mut i = 0;
                    for rev in revwalk {
                        let rev = rev.unwrap();
                        let commit = if let Ok(commit) = git_repository.find_commit(rev) {
                            commit
                        } else {
                            continue;
                        };

                        i += 1;

                        Commit::from(commit).insert(tx, i);
                    }

                    // a complete and utter hack to remove potentially dropped commits from our tree,
                    // we'll need to add `clear()` to sled's tx api to remove this
                    for to_remove in (i + 1)..(i + 100) {
                        tx.remove(&to_remove.to_be_bytes())?;
                    }

                    Ok(())
                })
                .unwrap();
        }
    }
}
diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs
index db513b4..280fd0f 100644
--- a/src/database/schema/commit.rs
+++ a/src/database/schema/commit.rs
@@ -1,4 +1,6 @@
use git2::Signature;
use serde::{Deserialize, Serialize};
use sled::transaction::TransactionalTree;
use std::ops::Deref;
use time::OffsetDateTime;

@@ -11,6 +13,29 @@
    pub hash: Vec<u8>,
}

impl From<git2::Commit<'_>> for Commit {
    fn from(commit: git2::Commit<'_>) -> Self {
        Commit {
            summary: commit
                .summary()
                .map(ToString::to_string)
                .unwrap_or_default(),
            message: commit.body().map(ToString::to_string).unwrap_or_default(),
            committer: commit.committer().into(),
            author: commit.author().into(),
            hash: commit.id().as_bytes().to_vec(),
        }
    }
}

impl Commit {
    pub fn insert(&self, database: &TransactionalTree, id: usize) {
        database
            .insert(&id.to_be_bytes(), bincode::serialize(self).unwrap())
            .unwrap();
    }
}

#[derive(Serialize, Deserialize, Debug)]
pub struct Author {
    pub name: String,
@@ -18,11 +43,14 @@
    pub time: OffsetDateTime,
}

impl Commit {
    pub fn insert(&self, database: &CommitTree, id: usize) {
        database
            .insert(id.to_be_bytes(), bincode::serialize(self).unwrap())
            .unwrap();
impl From<git2::Signature<'_>> for Author {
    fn from(author: Signature<'_>) -> Self {
        Self {
            name: author.name().map(ToString::to_string).unwrap_or_default(),
            email: author.email().map(ToString::to_string).unwrap_or_default(),
            // TODO: this needs to deal with offset
            time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(),
        }
    }
}