From 595fd116b0c71b623d13dafe6d9d5f2cd4526ff3 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 17 Jul 2022 21:12:08 +0100 Subject: [PATCH] Update commit index in a transaction --- src/database/indexer.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- src/database/schema/commit.rs | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 60 deletions(-) diff --git a/src/database/indexer.rs b/src/database/indexer.rs index 5e42a29..926ee69 100644 --- a/src/database/indexer.rs +++ a/src/database/indexer.rs @@ -1,9 +1,12 @@ use git2::Sort; use std::path::{Path, PathBuf}; use time::OffsetDateTime; +use tracing::info; -use crate::database::schema::commit::{Author, Commit}; -use crate::database::schema::repository::{Repository, RepositoryId}; +use crate::database::schema::{ + commit::Commit, + repository::{Repository, RepositoryId}, +}; pub fn run_indexer(db: &sled::Db) { let scan_path = Path::new("/Users/jordan/Code/test-git"); @@ -43,7 +46,7 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) { for (relative_path, db_repository) in Repository::fetch_all(&db) { - let git_repository = git2::Repository::open(scan_path.join(relative_path)).unwrap(); + let git_repository = git2::Repository::open(scan_path.join(&relative_path)).unwrap(); for reference in git_repository.references().unwrap() { let reference = if let Some(reference) = reference.as_ref().ok().and_then(|v| v.name()) @@ -53,61 +56,44 @@ continue; }; - let commit_tree = db_repository.commit_tree(db, reference); + if !reference.starts_with("refs/heads/") { + continue; + } - // TODO: only scan revs from the last time we looked - let mut revwalk = git_repository.revwalk().unwrap(); - revwalk.set_sorting(Sort::REVERSE).unwrap(); - revwalk.push_ref(reference).unwrap(); - - let mut i = 0; - - for rev in revwalk { - let rev = rev.unwrap(); - let commit = if let Ok(commit) = git_repository.find_commit(rev) { - commit - } else { - continue; - }; - - let author = commit.author(); - let committer = commit.committer(); - - // TODO: all these unwrap_or_defaults need to properly handle non-utf8 data - let author = Author { - name: author.name().map(ToString::to_string).unwrap_or_default(), - email: author.email().map(ToString::to_string).unwrap_or_default(), - // TODO: this needs to deal with offset - time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(), - }; - let committer = Author { - name: committer - .name() - .map(ToString::to_string) - .unwrap_or_default(), - email: committer - .email() - .map(ToString::to_string) - .unwrap_or_default(), - // TODO: this needs to deal with offset - time: OffsetDateTime::from_unix_timestamp(committer.when().seconds()).unwrap(), - }; - - let db_commit = Commit { - summary: commit - .summary() - .map(ToString::to_string) - .unwrap_or_default(), - message: commit.body().map(ToString::to_string).unwrap_or_default(), - committer, - author, - hash: commit.id().as_bytes().to_vec(), - }; + info!("Updating indexes for {} on {}", reference, relative_path); - i += 1; + let commit_tree = db_repository.commit_tree(db, reference); - db_commit.insert(&commit_tree, i); - } + commit_tree + .transaction::<_, _, std::io::Error>(|tx| { + // TODO: only scan revs from the last time we looked + let mut revwalk = git_repository.revwalk().unwrap(); + revwalk.set_sorting(Sort::REVERSE).unwrap(); + revwalk.push_ref(reference).unwrap(); + + let mut i = 0; + for rev in revwalk { + let rev = rev.unwrap(); + let commit = if let Ok(commit) = git_repository.find_commit(rev) { + commit + } else { + continue; + }; + + i += 1; + + Commit::from(commit).insert(tx, i); + } + + // a complete and utter hack to remove potentially dropped commits from our tree, + // we'll need to add `clear()` to sled's tx api to remove this + for to_remove in (i + 1)..(i + 100) { + tx.remove(&to_remove.to_be_bytes())?; + } + + Ok(()) + }) + .unwrap(); } } } diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs index db513b4..280fd0f 100644 --- a/src/database/schema/commit.rs +++ a/src/database/schema/commit.rs @@ -1,4 +1,6 @@ +use git2::Signature; use serde::{Deserialize, Serialize}; +use sled::transaction::TransactionalTree; use std::ops::Deref; use time::OffsetDateTime; @@ -11,6 +13,29 @@ pub hash: Vec, } +impl From> for Commit { + fn from(commit: git2::Commit<'_>) -> Self { + Commit { + summary: commit + .summary() + .map(ToString::to_string) + .unwrap_or_default(), + message: commit.body().map(ToString::to_string).unwrap_or_default(), + committer: commit.committer().into(), + author: commit.author().into(), + hash: commit.id().as_bytes().to_vec(), + } + } +} + +impl Commit { + pub fn insert(&self, database: &TransactionalTree, id: usize) { + database + .insert(&id.to_be_bytes(), bincode::serialize(self).unwrap()) + .unwrap(); + } +} + #[derive(Serialize, Deserialize, Debug)] pub struct Author { pub name: String, @@ -18,11 +43,14 @@ pub time: OffsetDateTime, } -impl Commit { - pub fn insert(&self, database: &CommitTree, id: usize) { - database - .insert(id.to_be_bytes(), bincode::serialize(self).unwrap()) - .unwrap(); +impl From> for Author { + fn from(author: Signature<'_>) -> Self { + Self { + name: author.name().map(ToString::to_string).unwrap_or_default(), + email: author.email().map(ToString::to_string).unwrap_or_default(), + // TODO: this needs to deal with offset + time: OffsetDateTime::from_unix_timestamp(author.when().seconds()).unwrap(), + } } } -- rgit 0.1.3