🏡 index : ~doyle/rgit.git

author Jordan Doyle <jordan@doyle.la> 2025-04-03 16:13:15.0 +07:00:00
committer Jordan Doyle <jordan@doyle.la> 2025-04-03 16:23:51.0 +07:00:00
commit
6291310ba06619e6ca086d329448de92593d127d [patch]
tree
c6a27091b07cc3047c13dd228819b6acbdd6d434
parent
b60d5ddd3fe2bbc31903366ea6af63821d4566d7
download
6291310ba06619e6ca086d329448de92593d127d.tar.gz

Implement repository list file

This allows invocation of the server with a list of repositories
relative to the scan path that should be indexed, ignoring everything
else in the scan path.

Closes #105

Diff

 src/main.rs             | 15 ++++++++++++++-
 src/database/indexer.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 51d8fc0..3026c49 100644
--- a/src/main.rs
+++ a/src/main.rs
@@ -78,6 +78,10 @@
    bind_address: SocketAddr,
    /// The path in which your bare Git repositories reside (will be scanned recursively)

    scan_path: PathBuf,
    /// Optional path (relative to cwd) to a plain text file containing a list of repositories relative to the `scan_path`

    /// that are whitelisted to be exposed by rgit.

    #[clap(long)]
    repository_list: Option<PathBuf>,
    /// Configures the metadata refresh interval (eg. "never" or "60s")

    #[clap(long, default_value_t = RefreshInterval::Duration(Duration::from_secs(300)))]
    refresh_interval: RefreshInterval,
@@ -134,8 +138,12 @@

    let db = open_db(&args)?;

    let indexer_wakeup_task =
        run_indexer(db.clone(), args.scan_path.clone(), args.refresh_interval);
    let indexer_wakeup_task = run_indexer(
        db.clone(),
        args.scan_path.clone(),
        args.repository_list.clone(),
        args.refresh_interval,
    );

    let css = {
        let theme = basic_toml::from_str::<Theme>(include_str!("../themes/github_light.toml"))
@@ -289,13 +297,14 @@
async fn run_indexer(
    db: Arc<rocksdb::DB>,
    scan_path: PathBuf,
    repository_list: Option<PathBuf>,
    refresh_interval: RefreshInterval,
) -> Result<(), tokio::task::JoinError> {
    let (indexer_wakeup_send, mut indexer_wakeup_recv) = mpsc::channel(10);

    std::thread::spawn(move || loop {
        info!("Running periodic index");
        crate::database::indexer::run(&scan_path, &db);
        crate::database::indexer::run(&scan_path, repository_list.as_deref(), &db);
        info!("Finished periodic index");

        if indexer_wakeup_recv.blocking_recv().is_none() {
diff --git a/src/database/indexer.rs b/src/database/indexer.rs
index f4743ce..72cf47b 100644
--- a/src/database/indexer.rs
+++ a/src/database/indexer.rs
@@ -1,14 +1,15 @@
use std::{
    collections::HashSet,
    ffi::OsStr,
    fmt::Debug,
    io::{BufRead, BufReader},
    path::{Path, PathBuf},
    sync::Arc,
};

use anyhow::Context;
use gix::{bstr::ByteSlice, refs::Category, Reference};
use itertools::Itertools;
use itertools::{Either, Itertools};
use rocksdb::WriteBatch;
use time::{OffsetDateTime, UtcOffset};
use tracing::{error, info, info_span, instrument, warn};
@@ -19,13 +20,13 @@
    tag::{Tag, TagTree},
};

pub fn run(scan_path: &Path, db: &Arc<rocksdb::DB>) {
pub fn run(scan_path: &Path, repository_list: Option<&Path>, db: &Arc<rocksdb::DB>) {
    let span = info_span!("index_update");
    let _entered = span.enter();

    info!("Starting index update");

    update_repository_metadata(scan_path, db);
    update_repository_metadata(scan_path, repository_list, db);
    update_repository_reflog(scan_path, db.clone());
    update_repository_tags(scan_path, db.clone());

@@ -39,9 +40,9 @@
}

#[instrument(skip(db))]
fn update_repository_metadata(scan_path: &Path, db: &rocksdb::DB) {
fn update_repository_metadata(scan_path: &Path, repository_list: Option<&Path>, db: &rocksdb::DB) {
    let mut discovered = Vec::new();
    discover_repositories(scan_path, &mut discovered);
    discover_repositories(scan_path, repository_list, &mut discovered);

    for (repository_path, git_repository) in discovered {
        let Some(relative) = get_relative_path(scan_path, &repository_path) else {
@@ -403,19 +404,51 @@
    full_path.strip_prefix(relative_to).ok()
}

fn discover_repositories(current: &Path, discovered_repos: &mut Vec<(PathBuf, gix::Repository)>) {
    let current = match std::fs::read_dir(current) {
        Ok(v) => v,
        Err(error) => {
            error!(%error, "Failed to enter repository directory {}", current.display());
            return;
fn discover_repositories(
    current: &Path,
    repository_list: Option<&Path>,
    discovered_repos: &mut Vec<(PathBuf, gix::Repository)>,
) {
    let dirs = if let Some(repo_list) = repository_list {
        let mut repo_list = match std::fs::File::open(&repo_list) {
            Ok(v) => BufReader::new(v).lines(),
            Err(error) => {
                error!(%error, "Failed to open repository list file");
                return;
            }
        };

        let mut out = Vec::new();

        while let Some(line) = repo_list.next() {
            let line = match line {
                Ok(v) => v,
                Err(error) => {
                    error!(%error, "Failed to read repository list file");
                    return;
                }
            };

            out.push(current.join(line));
        }
    };

    let dirs = current
        .filter_map(Result::ok)
        .map(|v| v.path())
        .filter(|path| path.is_dir());
        Either::Left(out.into_iter())
    } else {
        let current = match std::fs::read_dir(current) {
            Ok(v) => v,
            Err(error) => {
                error!(%error, "Failed to enter repository directory {}", current.display());
                return;
            }
        };

        Either::Right(
            current
                .filter_map(Result::ok)
                .map(|v| v.path())
                .filter(|path| path.is_dir()),
        )
    };

    for dir in dirs {
        match gix::open_opts(&dir, gix::open::Options::default().open_path_as_is(true)) {
@@ -423,8 +456,15 @@
                repo.object_cache_size(10 * 1024 * 1024);
                discovered_repos.push((dir, repo));
            }
            Err(gix::open::Error::NotARepository { .. }) if repository_list.is_none() => {
                discover_repositories(&dir, None, discovered_repos);
            }

            Err(gix::open::Error::NotARepository { .. }) => {
                discover_repositories(&dir, discovered_repos);
                warn!(
                    "Repository list points to directory which isn't a Git repository: {}",
                    dir.display()
                );
            }
            Err(error) => {
                warn!(%error, "Failed to open repository {} for indexing", dir.display());