From 3430b7c5d480f182d26ccb6142d889534521306c Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Wed, 13 Nov 2024 01:22:48 +0000 Subject: [PATCH] Avoid mutating vector to determine endpoint This change also fixes a long standing bug with directories named `tree` in the repository tree causing a "repository not found" error to be returned. --- src/main.rs | 8 +------- src/methods/repo/mod.rs | 82 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/main.rs b/src/main.rs index 20d0826..e803443 100644 --- a/src/main.rs +++ a/src/main.rs @@ -241,13 +241,7 @@ let mut commit_family_options = Options::default(); commit_family_options.set_prefix_extractor(SliceTransform::create( "commit_prefix", - |input| { - if let Some(offset) = memchr::memchr(b'\0', input) { - &input[offset + 1..] - } else { - input - } - }, + |input| memchr::memchr(b'\0', input).map_or(input, |idx| &input[..idx]), None, )); diff --git a/src/methods/repo/mod.rs b/src/methods/repo/mod.rs index 43ac306..72ece11 100644 --- a/src/methods/repo/mod.rs +++ a/src/methods/repo/mod.rs @@ -13,7 +13,7 @@ collections::BTreeMap, ops::Deref, path::{Path, PathBuf}, - sync::Arc, + sync::{Arc, LazyLock}, }; use axum::{ @@ -53,14 +53,6 @@ .get::>() .expect("scan_path missing"); - let mut uri_parts: Vec<&str> = request - .uri() - .path() - .trim_start_matches('/') - .trim_end_matches('/') - .split('/') - .collect(); - let mut child_path = None; macro_rules! h { @@ -68,15 +60,36 @@ BoxCloneService::new($handler.into_service()) }; } + + let uri = request + .uri() + .path() + .trim_start_matches('/') + .trim_end_matches('/'); + let mut uri_parts = memchr::memchr_iter(b'/', uri.as_bytes()); + + let original_uri = uri; + let (action, mut uri) = if let Some(idx) = uri_parts.next_back() { + (uri.get(idx + 1..), &uri[..idx]) + } else { + (None, uri) + }; - let mut service = match uri_parts.pop() { + let mut service = match action { Some("about") => h!(handle_about), - Some("refs") if uri_parts.last() == Some(&"info") => { - uri_parts.pop(); - h!(handle_smart_git) - } Some("git-upload-pack") => h!(handle_smart_git), - Some("refs") => h!(handle_refs), + Some("refs") => { + if let Some(idx) = uri_parts.next_back() { + if uri.get(idx + 1..) == Some("info") { + uri = &uri[..idx]; + h!(handle_smart_git) + } else { + h!(handle_refs) + } + } else { + h!(handle_refs) + } + } Some("log") => h!(handle_log), Some("tree") => h!(handle_tree), Some("commit") => h!(handle_commit), @@ -84,35 +97,26 @@ Some("patch") => h!(handle_patch), Some("tag") => h!(handle_tag), Some("snapshot") => h!(handle_snapshot), - Some(v) => { - uri_parts.push(v); - - // match tree children - if uri_parts.iter().any(|v| *v == "tree") { - // TODO: this needs fixing up so it doesn't accidentally match repos that have - // `tree` in their path - let mut reconstructed_path = Vec::new(); - - while let Some(part) = uri_parts.pop() { - if part == "tree" { - break; - } - - // TODO: FIXME - reconstructed_path.insert(0, part); - } + Some(_) => { + static TREE_FINDER: LazyLock = + LazyLock::new(|| memchr::memmem::Finder::new(b"/tree/")); - child_path = Some(reconstructed_path.into_iter().collect::().clean()); + uri = original_uri; + // match tree children + if let Some(idx) = TREE_FINDER.find(uri.as_bytes()) { + // 6 is the length of /tree/ + child_path = Some(Path::new(&uri[idx + 6..]).clean()); + uri = &uri[..idx]; h!(handle_tree) } else { h!(handle_summary) } } - None => panic!("not found"), + None => h!(handle_summary), }; - let uri = uri_parts.into_iter().collect::().clean(); + let uri = Path::new(uri).clean(); let path = scan_path.join(&uri); let db = request @@ -162,6 +166,14 @@ } pub type Result = std::result::Result; + +pub struct InvalidRequest; + +impl IntoResponse for InvalidRequest { + fn into_response(self) -> Response { + (StatusCode::NOT_FOUND, "Invalid request").into_response() + } +} pub struct RepositoryNotFound; -- rgit 0.1.3