From 6291310ba06619e6ca086d329448de92593d127d Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Thu, 03 Apr 2025 16:13:15 +0700 Subject: [PATCH] Implement repository list file This allows invocation of the server with a list of repositories relative to the scan path that should be indexed, ignoring everything else in the scan path. Closes #105 --- src/main.rs | 15 ++++++++++++++- src/database/indexer.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 69 insertions(+), 20 deletions(-) diff --git a/src/main.rs b/src/main.rs index 51d8fc0..3026c49 100644 --- a/src/main.rs +++ a/src/main.rs @@ -78,6 +78,10 @@ bind_address: SocketAddr, /// The path in which your bare Git repositories reside (will be scanned recursively) scan_path: PathBuf, + /// Optional path (relative to cwd) to a plain text file containing a list of repositories relative to the `scan_path` + /// that are whitelisted to be exposed by rgit. + #[clap(long)] + repository_list: Option, /// Configures the metadata refresh interval (eg. "never" or "60s") #[clap(long, default_value_t = RefreshInterval::Duration(Duration::from_secs(300)))] refresh_interval: RefreshInterval, @@ -134,8 +138,12 @@ let db = open_db(&args)?; - let indexer_wakeup_task = - run_indexer(db.clone(), args.scan_path.clone(), args.refresh_interval); + let indexer_wakeup_task = run_indexer( + db.clone(), + args.scan_path.clone(), + args.repository_list.clone(), + args.refresh_interval, + ); let css = { let theme = basic_toml::from_str::(include_str!("../themes/github_light.toml")) @@ -289,13 +297,14 @@ async fn run_indexer( db: Arc, scan_path: PathBuf, + repository_list: Option, refresh_interval: RefreshInterval, ) -> Result<(), tokio::task::JoinError> { let (indexer_wakeup_send, mut indexer_wakeup_recv) = mpsc::channel(10); std::thread::spawn(move || loop { info!("Running periodic index"); - crate::database::indexer::run(&scan_path, &db); + crate::database::indexer::run(&scan_path, repository_list.as_deref(), &db); info!("Finished periodic index"); if indexer_wakeup_recv.blocking_recv().is_none() { diff --git a/src/database/indexer.rs b/src/database/indexer.rs index f4743ce..72cf47b 100644 --- a/src/database/indexer.rs +++ a/src/database/indexer.rs @@ -1,14 +1,15 @@ use std::{ collections::HashSet, ffi::OsStr, fmt::Debug, + io::{BufRead, BufReader}, path::{Path, PathBuf}, sync::Arc, }; use anyhow::Context; use gix::{bstr::ByteSlice, refs::Category, Reference}; -use itertools::Itertools; +use itertools::{Either, Itertools}; use rocksdb::WriteBatch; use time::{OffsetDateTime, UtcOffset}; use tracing::{error, info, info_span, instrument, warn}; @@ -19,13 +20,13 @@ tag::{Tag, TagTree}, }; -pub fn run(scan_path: &Path, db: &Arc) { +pub fn run(scan_path: &Path, repository_list: Option<&Path>, db: &Arc) { let span = info_span!("index_update"); let _entered = span.enter(); info!("Starting index update"); - update_repository_metadata(scan_path, db); + update_repository_metadata(scan_path, repository_list, db); update_repository_reflog(scan_path, db.clone()); update_repository_tags(scan_path, db.clone()); @@ -39,9 +40,9 @@ } #[instrument(skip(db))] -fn update_repository_metadata(scan_path: &Path, db: &rocksdb::DB) { +fn update_repository_metadata(scan_path: &Path, repository_list: Option<&Path>, db: &rocksdb::DB) { let mut discovered = Vec::new(); - discover_repositories(scan_path, &mut discovered); + discover_repositories(scan_path, repository_list, &mut discovered); for (repository_path, git_repository) in discovered { let Some(relative) = get_relative_path(scan_path, &repository_path) else { @@ -403,19 +404,51 @@ full_path.strip_prefix(relative_to).ok() } -fn discover_repositories(current: &Path, discovered_repos: &mut Vec<(PathBuf, gix::Repository)>) { - let current = match std::fs::read_dir(current) { - Ok(v) => v, - Err(error) => { - error!(%error, "Failed to enter repository directory {}", current.display()); - return; +fn discover_repositories( + current: &Path, + repository_list: Option<&Path>, + discovered_repos: &mut Vec<(PathBuf, gix::Repository)>, +) { + let dirs = if let Some(repo_list) = repository_list { + let mut repo_list = match std::fs::File::open(&repo_list) { + Ok(v) => BufReader::new(v).lines(), + Err(error) => { + error!(%error, "Failed to open repository list file"); + return; + } + }; + + let mut out = Vec::new(); + + while let Some(line) = repo_list.next() { + let line = match line { + Ok(v) => v, + Err(error) => { + error!(%error, "Failed to read repository list file"); + return; + } + }; + + out.push(current.join(line)); } - }; - let dirs = current - .filter_map(Result::ok) - .map(|v| v.path()) - .filter(|path| path.is_dir()); + Either::Left(out.into_iter()) + } else { + let current = match std::fs::read_dir(current) { + Ok(v) => v, + Err(error) => { + error!(%error, "Failed to enter repository directory {}", current.display()); + return; + } + }; + + Either::Right( + current + .filter_map(Result::ok) + .map(|v| v.path()) + .filter(|path| path.is_dir()), + ) + }; for dir in dirs { match gix::open_opts(&dir, gix::open::Options::default().open_path_as_is(true)) { @@ -423,8 +456,15 @@ repo.object_cache_size(10 * 1024 * 1024); discovered_repos.push((dir, repo)); } + Err(gix::open::Error::NotARepository { .. }) if repository_list.is_none() => { + discover_repositories(&dir, None, discovered_repos); + } + Err(gix::open::Error::NotARepository { .. }) => { - discover_repositories(&dir, discovered_repos); + warn!( + "Repository list points to directory which isn't a Git repository: {}", + dir.display() + ); } Err(error) => { warn!(%error, "Failed to open repository {} for indexing", dir.display()); -- rgit 0.1.4