🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2024-01-13 16:19:20.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2024-01-13 16:22:44.0 +00:00:00
commit
0beef9b7b664f085f56e5f80a393ed0aad73432c [patch]
tree
26818dc3ba8d317106119725f3722fa71f6296f5
parent
9d1b51c3e3fdb1edcaffec4207664a063d51d8ec
download
0beef9b7b664f085f56e5f80a393ed0aad73432c.tar.gz

Implement partial reindexes



Diff

 src/database/indexer.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index bf735e7..3cf751d 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -12,11 +12,11 @@
use time::OffsetDateTime;
use tracing::{error, info, info_span, instrument, warn};

use super::schema::tag::TagTree;
use crate::database::schema::{
    commit::Commit,
    prefixes::TreePrefix,
    repository::{Repository, RepositoryId},
    tag::Tag,
    tag::{Tag, TagTree},
};

pub fn run(scan_path: &Path, db: &sled::Db) {
@@ -154,6 +154,7 @@
                db_repository.get(),
                db,
                &git_repository,
                false,
            ) {
                error!(%error, "Failed to update reflog for {relative_path}@{reference_name}");
            }
@@ -169,39 +170,72 @@
    db_repository: &Repository<'_>,
    db: &sled::Db,
    git_repository: &git2::Repository,
    force_reindex: bool,
) -> Result<(), anyhow::Error> {
    info!("Refreshing indexes");

    if force_reindex {
        db.drop_tree(TreePrefix::commit_id(db_repository.id, reference_name))?;
    }

    let commit = reference.peel_to_commit()?;
    let commit_tree = db_repository.commit_tree(db, reference_name)?;

    if let (Some(latest_indexed), Ok(latest_commit)) =
        (commit_tree.fetch_latest_one(), reference.peel_to_commit())
    {
        if latest_commit.id().as_bytes() == &*latest_indexed.get().hash {
    let latest_indexed = if let Some(latest_indexed) = commit_tree.fetch_latest_one() {
        if commit.id().as_bytes() == &*latest_indexed.get().hash {
            info!("No commits since last index");
            return Ok(());
        }
    }

    // TODO: only scan revs from the last time we looked
        Some(latest_indexed)
    } else {
        None
    };

    let mut revwalk = git_repository.revwalk()?;
    revwalk.set_sorting(Sort::REVERSE)?;
    revwalk.push_ref(reference_name)?;

    let tree_len = commit_tree.len();
    let mut seen = false;
    let mut i = 0;
    for rev in revwalk {
        let commit = git_repository.find_commit(rev?)?;
        let rev = rev?;

        if let (false, Some(latest_indexed)) = (seen, &latest_indexed) {
            if rev.as_bytes() == &*latest_indexed.get().hash {
                seen = true;
            }

            continue;
        }

        seen = true;

        if ((i + 1) % 25_000) == 0 {
            info!("{} commits ingested", i + 1);
        }

        let commit = git_repository.find_commit(rev)?;
        let author = commit.author();
        let committer = commit.committer();

        Commit::new(&commit, &author, &committer).insert(&commit_tree, i);
        Commit::new(&commit, &author, &committer).insert(&commit_tree, tree_len + i);
        i += 1;
    }

    // a complete and utter hack to remove potentially dropped commits from our tree,
    // we'll need to add `clear()` to sled's tx api to remove this
    for to_remove in (i + 1)..(i + 100) {
        commit_tree.remove(to_remove.to_be_bytes())?;
    if !seen && !force_reindex {
        warn!("Detected converged history, forcing reindex");

        return branch_index_update(
            reference,
            reference_name,
            relative_path,
            db_repository,
            db,
            git_repository,
            true,
        );
    }

    Ok(())