From 6d1bab366bc5bcd89d7f6c126abbda7734b7fc8c Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 31 Dec 2023 05:13:01 +0000 Subject: [PATCH] Add ability to download tar.gz of commit/branch/tag --- Cargo.lock | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 +++ src/git.rs | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ templates/repo/commit.html | 4 ++++ templates/repo/tag.html | 4 ++++ src/methods/repo/commit.rs | 23 ++++++++++++++++++++++- src/methods/repo/mod.rs | 3 +++ src/methods/repo/snapshot.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 337 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c10d63d..aca619d 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -796,6 +796,18 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", +] + +[[package]] name = "flate2" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1727,6 +1739,15 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] @@ -1793,6 +1814,7 @@ "bytes", "clap", "comrak", + "flate2", "futures", "git2", "hex", @@ -1810,9 +1832,11 @@ "sha2", "sled", "syntect", + "tar", "time", "timeago", "tokio", + "tokio-stream", "tokio-util", "tower", "tower-http", @@ -2183,6 +2207,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] [[package]] name = "tempfile" @@ -2323,6 +2358,17 @@ "proc-macro2", "quote", "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", ] [[package]] @@ -2700,6 +2746,15 @@ checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", ] [[package]] @@ -2730,6 +2785,21 @@ "windows_x86_64_gnu 0.48.5", "windows_x86_64_gnullvm 0.48.5", "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] [[package]] @@ -2743,6 +2813,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" [[package]] name = "windows_aarch64_msvc" @@ -2757,6 +2833,12 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2767,6 +2849,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" [[package]] name = "windows_i686_msvc" @@ -2779,6 +2867,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" [[package]] name = "windows_x86_64_gnu" @@ -2791,6 +2885,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" [[package]] name = "windows_x86_64_gnullvm" @@ -2803,6 +2903,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" [[package]] name = "windows_x86_64_msvc" @@ -2815,6 +2921,21 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "xattr" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc6ab6ec1907d1a901cdbcd2bd4cb9e7d64ce5c9739cbb97d3c391acd8c7fae" +dependencies = [ + "libc", +] [[package]] name = "xdg" diff --git a/Cargo.toml b/Cargo.toml index 37aabed..223d8db 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -33,10 +33,13 @@ sha2 = "0.10" syntect = "5" sled = { version = "0.34", features = ["compression"] } +tar = "0.4" +flate2 = "1.0" time = { version = "0.3", features = ["serde"] } timeago = { version = "0.4.2", default-features = false } tokio = { version = "1.19", features = ["full"] } tokio-util = { version = "0.7.3", features = ["io"] } +tokio-stream = "0.1" tower = "0.4" tower-service = "0.3" tower-layer = "0.3" diff --git a/src/git.rs b/src/git.rs index b4842ac..3c3a813 100644 --- a/src/git.rs +++ a/src/git.rs @@ -7,12 +7,12 @@ time::Duration, }; -use anyhow::{Context, Result}; -use bytes::{Bytes, BytesMut}; +use anyhow::{anyhow, Context, Result}; +use bytes::{BufMut, Bytes, BytesMut}; use comrak::{ComrakOptions, ComrakPlugins}; use git2::{ DiffFormat, DiffLineType, DiffOptions, DiffStatsFormat, Email, EmailCreateOptions, ObjectType, - Oid, Signature, + Oid, Signature, TreeWalkResult, }; use moka::future::Cache; use parking_lot::Mutex; @@ -22,7 +22,7 @@ util::LinesWithEndings, }; use time::OffsetDateTime; -use tracing::instrument; +use tracing::{error, instrument, warn}; use crate::syntax_highlight::ComrakSyntectAdapter; @@ -272,6 +272,16 @@ .await } + pub async fn default_branch(self: Arc) -> Result> { + tokio::task::spawn_blocking(move || { + let repo = self.repo.lock(); + let head = repo.head().context("Couldn't find HEAD of repository")?; + Ok(head.shorthand().map(ToString::to_string)) + }) + .await + .context("Failed to join Tokio task")? + } + #[instrument(skip(self))] pub async fn latest_commit(self: Arc) -> Result { tokio::task::spawn_blocking(move || { @@ -297,6 +307,87 @@ }) .await .context("Failed to join Tokio task")? + } + + #[instrument(skip_all)] + pub async fn archive( + self: Arc, + res: tokio::sync::mpsc::Sender>, + cont: tokio::sync::oneshot::Sender<()>, + commit: Option<&str>, + ) -> Result<(), anyhow::Error> { + const BUFFER_CAP: usize = 512 * 1024; + + let commit = commit + .map(Oid::from_str) + .transpose() + .context("failed to build oid")?; + + tokio::task::spawn_blocking(move || { + let buffer = BytesMut::with_capacity(BUFFER_CAP + 1024); + + let flate = flate2::write::GzEncoder::new(buffer.writer(), flate2::Compression::fast()); + let mut archive = tar::Builder::new(flate); + + let repo = self.repo.lock(); + + let tree = if let Some(commit) = commit { + repo.find_commit(commit)?.tree()? + } else if let Some(reference) = &self.branch { + repo.resolve_reference_from_short_name(reference)? + .peel_to_tree()? + } else { + repo.head() + .context("Couldn't find HEAD of repository")? + .peel_to_tree()? + }; + + // tell the web server it can send response headers to the requester + if cont.send(()).is_err() { + return Err(anyhow!("requester gone")); + } + + let mut callback = |root: &str, entry: &git2::TreeEntry| -> TreeWalkResult { + if let Ok(blob) = entry.to_object(&repo).unwrap().peel_to_blob() { + let path = + Path::new(root).join(String::from_utf8_lossy(entry.name_bytes()).as_ref()); + + let mut header = tar::Header::new_gnu(); + if let Err(error) = header.set_path(&path) { + warn!(%error, "Attempted to write invalid path to archive"); + return TreeWalkResult::Skip; + } + header.set_size(blob.size() as u64); + #[allow(clippy::cast_sign_loss)] + header.set_mode(entry.filemode() as u32); + header.set_cksum(); + + if let Err(error) = archive.append(&header, blob.content()) { + error!(%error, "Failed to write blob to archive"); + return TreeWalkResult::Abort; + } + } + + if archive.get_ref().get_ref().get_ref().len() >= BUFFER_CAP { + let b = archive.get_mut().get_mut().get_mut().split().freeze(); + if let Err(error) = res.blocking_send(Ok(b)) { + error!(%error, "Failed to send buffer to client"); + return TreeWalkResult::Abort; + } + } + + TreeWalkResult::Ok + }; + + tree.walk(git2::TreeWalkMode::PreOrder, &mut callback)?; + + res.blocking_send(Ok(archive.into_inner()?.finish()?.into_inner().freeze()))?; + + Ok::<_, anyhow::Error>(()) + }) + .await??; + + Ok(()) } #[instrument(skip(self))] diff --git a/templates/repo/commit.html b/templates/repo/commit.html index 39b0d6a..1b666c0 100644 --- a/templates/repo/commit.html +++ a/templates/repo/commit.html @@ -35,6 +35,10 @@
{{ parent }}
{%- endfor %} + + download (tar.gz) +
{{ id.as_deref().unwrap_or(dl_branch.as_ref()) }}
+ diff --git a/templates/repo/tag.html b/templates/repo/tag.html index 6c78733..bf55e8a 100644 --- a/templates/repo/tag.html +++ a/templates/repo/tag.html @@ -31,6 +31,10 @@ {% endif %} + + download (tar.gz) +
{{ tag.name }}
+ diff --git a/src/methods/repo/commit.rs b/src/methods/repo/commit.rs index 046a6c5..b339ff8 100644 --- a/src/methods/repo/commit.rs +++ a/src/methods/repo/commit.rs @@ -17,6 +17,8 @@ pub repo: Repository, pub commit: Arc, pub branch: Option>, + pub dl_branch: Arc, + pub id: Option, } #[derive(Deserialize)] @@ -33,8 +35,23 @@ Query(query): Query, ) -> Result { let open_repo = git.repo(repository_path, query.branch.clone()).await?; - let commit = if let Some(commit) = query.id { - open_repo.commit(&commit).await? + + let dl_branch = if let Some(branch) = query.branch.clone() { + branch + } else { + Arc::from( + open_repo + .clone() + .default_branch() + .await + .ok() + .flatten() + .unwrap_or_else(|| "master".to_string()), + ) + }; + + let commit = if let Some(commit) = query.id.as_deref() { + open_repo.commit(commit).await? } else { Arc::new(open_repo.latest_commit().await?) }; @@ -43,5 +60,7 @@ repo, commit, branch: query.branch, + id: query.id, + dl_branch, })) } diff --git a/src/methods/repo/mod.rs b/src/methods/repo/mod.rs index 393cd46..1b818eb 100644 --- a/src/methods/repo/mod.rs +++ a/src/methods/repo/mod.rs @@ -1,9 +1,10 @@ mod about; mod commit; mod diff; mod log; mod refs; mod smart_git; +mod snapshot; mod summary; mod tag; mod tree; @@ -32,6 +33,7 @@ log::handle as handle_log, refs::handle as handle_refs, smart_git::handle as handle_smart_git, + snapshot::handle as handle_snapshot, summary::handle as handle_summary, tag::handle as handle_tag, tree::handle as handle_tree, @@ -89,6 +91,7 @@ Some("diff") => h!(handle_diff), Some("patch") => h!(handle_patch), Some("tag") => h!(handle_tag), + Some("snapshot") => h!(handle_snapshot), Some(v) => { uri_parts.push(v); diff --git a/src/methods/repo/snapshot.rs b/src/methods/repo/snapshot.rs new file mode 100644 index 0000000..908da22 100644 --- /dev/null +++ a/src/methods/repo/snapshot.rs @@ -1,0 +1,86 @@ +use std::sync::Arc; + +use anyhow::{anyhow, Context}; +use axum::{ + body::{boxed, Body, BoxBody}, + extract::Query, + http::Response, + Extension, +}; +use serde::Deserialize; +use tokio_stream::wrappers::ReceiverStream; +use tracing::{error, info_span, Instrument}; + +use super::{RepositoryPath, Result}; +use crate::git::Git; + +#[derive(Deserialize)] +pub struct UriQuery { + #[serde(rename = "h")] + branch: Option>, + id: Option>, +} + +pub async fn handle( + Extension(RepositoryPath(repository_path)): Extension, + Extension(git): Extension>, + Query(query): Query, +) -> Result> { + let open_repo = git.repo(repository_path, query.branch.clone()).await?; + + // byte stream back to the client + let (send, recv) = tokio::sync::mpsc::channel(1); + + // channel for `archive` to tell us we can send headers etc back to + // the user so it has time to return an error + let (send_cont, recv_cont) = tokio::sync::oneshot::channel(); + + let id = query.id.clone(); + + let res = tokio::spawn( + async move { + if let Err(error) = open_repo + .archive(send.clone(), send_cont, id.as_deref()) + .await + { + error!(%error, "Failed to build archive for client"); + let _res = send.send(Err(anyhow!("archive builder failed"))).await; + return Err(error); + } + + Ok(()) + } + .instrument(info_span!("sender")), + ); + + // don't send any headers until `archive` has told us we're good + // to continue + if recv_cont.await.is_err() { + // sender disappearing means `archive` hit an issue during init, lets + // wait for the error back from the spawned tokio task to return to + // the client + res.await + .context("Tokio task failed")? + .context("Failed to build archive")?; + + // ok, well this isn't ideal. the sender disappeared but we never got + // an error. this shouldn't be possible, i guess lets just return an + // internal error + return Err(anyhow!("Ran into inconsistent error state whilst building archive, please file an issue at https://github.com/w4/rgit/issues").into()); + } + + let file_name = query + .id + .as_deref() + .or(query.branch.as_deref()) + .unwrap_or("main"); + + Ok(Response::builder() + .header("Content-Type", "application/gzip") + .header( + "Content-Disposition", + format!("attachment; filename=\"{file_name}.tar.gz\""), + ) + .body(boxed(Body::wrap_stream(ReceiverStream::new(recv)))) + .context("failed to build response")?) +} -- rgit 0.1.3