From be1de75e63d5737228ee2435132ccf1fadd15959 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 12 Mar 2022 01:35:43 +0000 Subject: [PATCH] Return valid index to cargo over git, integration with new download gitlab endpoint https://gitlab.com/gitlab-org/gitlab/-/merge_requests/82663 --- Cargo.lock | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 5 +++++ src/git_command_handlers/fetch.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/git_command_handlers/ls_refs.rs | 31 +++++++++++++++++++++++++++++++ src/git_command_handlers/mod.rs | 2 ++ src/main.rs | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------- src/metadata.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/protocol/high_level.rs | 59 ++++++++++++++++++++++++++++++----------------------------- src/protocol/low_level.rs | 50 +++++++++++++++++++++++++------------------------- src/protocol/mod.rs | 2 +- src/protocol/packet_line.rs | 2 +- src/providers/gitlab.rs | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------- src/providers/mod.rs | 19 ++++++++++++++++--- src/util.rs | 32 +++++++++++++++++++------------- 14 files changed, 722 insertions(+), 156 deletions(-) create mode 100644 src/git_command_handlers/fetch.rs create mode 100644 src/git_command_handlers/ls_refs.rs create mode 100644 src/git_command_handlers/mod.rs create mode 100644 src/metadata.rs diff --git a/Cargo.lock b/Cargo.lock index 20df39b..fec55db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,15 @@ dependencies = [ ] [[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] name = "anyhow" version = "1.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -160,6 +169,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" [[package]] +name = "camino" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f3132262930b0522068049f5870a856ab8affc80c70d08b6ecb785771a6fc23" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + +[[package]] name = "cc" version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -478,16 +518,19 @@ dependencies = [ "async-trait", "base64", "bytes", + "cargo_metadata", "flate2", "futures", "hex", "indexmap", "indoc", "itoa", + "parking_lot", "parse_link_header", "percent-encoding", "reqwest", "serde", + "serde_json", "sha1", "shlex", "thrussh", @@ -495,6 +538,8 @@ dependencies = [ "time", "tokio", "tokio-util 0.7.0", + "tracing", + "tracing-subscriber", ] [[package]] @@ -1169,6 +1214,15 @@ dependencies = [ ] [[package]] +name = "semver" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" +dependencies = [ + "serde", +] + +[[package]] name = "serde" version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1236,6 +1290,15 @@ dependencies = [ ] [[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] name = "shlex" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1324,6 +1387,15 @@ dependencies = [ ] [[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] name = "thrussh" version = "0.33.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1509,16 +1581,54 @@ checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f" dependencies = [ "cfg-if", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] +name = "tracing-attributes" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] name = "tracing-core" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23" dependencies = [ "lazy_static", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0ab7bdc962035a87fba73f3acca9b8a8d0034c2e6f60b84aeaaddddc155dce" +dependencies = [ + "ansi_term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -1573,6 +1683,12 @@ dependencies = [ ] [[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index c06303a..8352f2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ async-trait = "0.1" arrayvec = "0.7" base64 = "0.13" bytes = "1.1" +cargo_metadata = "0.14" flate2 = "1.0" futures = "0.3" hex = "0.4" @@ -18,11 +19,15 @@ itoa = "1.0" indexmap = "1.8" indoc = "1.0" parse_link_header = "0.3" +parking_lot = "0.12" percent-encoding = "2.1" reqwest = { version = "0.11", features = ["json"] } serde = { version = "1.0", features = ["derive"] } +serde_json = "1" sha1 = "0.10" shlex = "1.1" +tracing = "0.1" +tracing-subscriber = "0.3" thrussh = "0.33" thrussh-keys = "0.21" time = "0.3" diff --git a/src/git_command_handlers/fetch.rs b/src/git_command_handlers/fetch.rs new file mode 100644 index 0000000..0034e9f --- /dev/null +++ b/src/git_command_handlers/fetch.rs @@ -0,0 +1,51 @@ +use bytes::Bytes; +use thrussh::{server::Session, ChannelId}; + +use crate::{ + protocol::{ + low_level::{PackFile, PackFileEntry}, + packet_line::PktLine, + }, + Handler, PackageProvider, UserProvider, +}; + +pub fn handle( + handle: &mut Handler, + session: &mut Session, + channel: ChannelId, + metadata: Vec, + packfile_entries: Vec, +) -> Result<(), anyhow::Error> { + // the client sending us `done` in the metadata means they know there's no negotiation + // required for which commits we need to send, they just want us to send whatever we + // have. + let done = metadata.iter().any(|v| v.as_ref() == b"done"); + + // the client thinks we can negotiate some commits with them, but we don't want to so + // we'll just say we've got nothing in common and continue on as we were. + if !done { + handle.write(PktLine::Data(b"acknowledgments\n"))?; + handle.write(PktLine::Data(b"ready\n"))?; + handle.write(PktLine::Delimiter)?; + } + + // magic header + handle.write(PktLine::Data(b"packfile\n"))?; + + // send a welcome message + // handle.write(PktLine::SidebandMsg(b"Hello from gitlab-cargo-shim!\n"))?; + // handle.flush(session, channel); + + // send the complete packfile + let packfile = PackFile::new(packfile_entries); + handle.write(PktLine::SidebandData(packfile))?; + handle.write(PktLine::Flush)?; + handle.flush(session, channel); + + // tell the client we exited successfully and close the channel + session.exit_status_request(channel, 0); + session.eof(channel); + session.close(channel); + + Ok(()) +} diff --git a/src/git_command_handlers/ls_refs.rs b/src/git_command_handlers/ls_refs.rs new file mode 100644 index 0000000..19da2de --- /dev/null +++ b/src/git_command_handlers/ls_refs.rs @@ -0,0 +1,31 @@ +//! [ls-refs][lsr] is sent from the client when they want to see what refs we have +//! on the server, we're generating our commits on the fly though so we'll just tell +//! them we have a master branch with whatever the generated commit hash is. +//! +//! [lsr]: https://git-scm.com/docs/protocol-v2/2.19.0#_ls_refs + +use bytes::Bytes; +use thrussh::{server::Session, ChannelId}; + +use crate::{ + protocol::{low_level::HashOutput, packet_line::PktLine}, + Handler, PackageProvider, UserProvider, +}; + +pub fn handle( + handle: &mut Handler, + session: &mut Session, + channel: ChannelId, + _metadata: Vec, + commit_hash: &HashOutput, +) -> Result<(), anyhow::Error> { + let commit_hash = hex::encode(&commit_hash); + + handle.write(PktLine::Data( + format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(), + ))?; + handle.write(PktLine::Flush)?; + handle.flush(session, channel); + + Ok(()) +} diff --git a/src/git_command_handlers/mod.rs b/src/git_command_handlers/mod.rs new file mode 100644 index 0000000..8406cdb --- /dev/null +++ b/src/git_command_handlers/mod.rs @@ -0,0 +1,2 @@ +pub mod fetch; +pub mod ls_refs; diff --git a/src/main.rs b/src/main.rs index 4b03ddb..ade64fc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,32 @@ +pub mod git_command_handlers; +pub mod metadata; pub mod protocol; pub mod providers; pub mod util; -use crate::{providers::{gitlab::Gitlab, PackageProvider, Release, User, UserProvider}, protocol::{codec::Encoder, packet_line::PktLine}}; +use crate::metadata::CargoIndexCrateMetadata; +use crate::protocol::low_level::{HashOutput, PackFileEntry}; +use crate::util::get_crate_folder; +use crate::{ + protocol::{ + codec::{Encoder, GitCodec}, + high_level::GitRepository, + packet_line::PktLine, + }, + providers::{gitlab::Gitlab, PackageProvider, Release, User, UserProvider}, +}; +use anyhow::anyhow; +use bytes::{BufMut, Bytes, BytesMut}; use futures::Future; -use std::{net::SocketAddr, pin::Pin, sync::Arc, fmt::Write}; -use bytes::BytesMut; -use thrussh::{server::{Auth, Session}, ChannelId, CryptoVec}; +use parking_lot::RwLock; +use std::{borrow::Cow, collections::HashMap, fmt::Write, net::SocketAddr, pin::Pin, sync::Arc}; +use thrussh::{ + server::{Auth, Session}, + ChannelId, CryptoVec, +}; use thrussh_keys::key::PublicKey; -use tokio::task::JoinHandle; -use tokio_util::codec::Encoder as CodecEncoder; -use crate::protocol::high_level::GitRepository; +use tokio_util::{codec::Decoder, codec::Encoder as CodecEncoder}; +use tracing::error; const AGENT: &str = concat!( "agent=", @@ -22,6 +38,8 @@ const AGENT: &str = concat!( #[tokio::main] async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt::init(); + let ed25519_key = thrussh_keys::key::KeyPair::generate_ed25519().unwrap(); let thrussh_config = Arc::new(thrussh::server::Config { @@ -32,12 +50,23 @@ async fn main() -> anyhow::Result<()> { let gitlab = Arc::new(Gitlab::new()?); - thrussh::server::run(thrussh_config, "127.0.0.1:2222", Server { gitlab }).await?; + thrussh::server::run( + thrussh_config, + "127.0.0.1:2210", + Server { + gitlab, + metadata_cache: Arc::new(Default::default()), + }, + ) + .await?; Ok(()) } +type MetadataCache = Arc, Arc>>>; + struct Server { gitlab: Arc, + metadata_cache: MetadataCache, } impl thrussh::server::Server @@ -47,25 +76,31 @@ impl thrussh::server: fn new(&mut self, _peer_addr: Option) -> Self::Handler { Handler { + codec: GitCodec::default(), gitlab: self.gitlab.clone(), user: None, group: None, - fetcher_future: None, + // fetcher_future: None, input_bytes: BytesMut::new(), output_bytes: BytesMut::new(), - is_git_protocol_v2: false + is_git_protocol_v2: false, + metadata_cache: self.metadata_cache.clone(), + packfile_cache: None, } } } -struct Handler { +pub struct Handler { + codec: GitCodec, gitlab: Arc, user: Option, group: Option, - fetcher_future: Option>>>, + // fetcher_future: Option>>>, input_bytes: BytesMut, output_bytes: BytesMut, is_git_protocol_v2: bool, + metadata_cache: MetadataCache, + packfile_cache: Option<(HashOutput, Vec)>, } impl Handler { @@ -88,9 +123,126 @@ impl Handler { ); } - async fn fetch_releases(&self, group: &str) -> anyhow::Result> { + async fn fetch_releases_by_crate( + &self, + group: &str, + ) -> anyhow::Result>> { + let user = self.user()?; + + let mut res = HashMap::new(); + + for (path, release) in self + .gitlab + .clone() + .fetch_releases_for_group(group, user) + .await? + { + res.entry((path, release.name.clone())) + .or_insert_with(Vec::new) + .push(release); + } + + Ok(res) + } + + async fn fetch_metadata( + &self, + path: &U::CratePath, + checksum: &str, + crate_name: &str, + crate_version: &str, + ) -> anyhow::Result> { + let key = MetadataCacheKey { + checksum: checksum.into(), + crate_name: crate_name.into(), + crate_version: crate_version.into(), + }; + + { + let reader = self.metadata_cache.read(); + if let Some(cache) = reader.get(&key) { + return Ok(cache.clone()); + } + } + + let metadata = self + .gitlab + .clone() + .fetch_metadata_for_release(path, crate_version) + .await?; + + // transform the `cargo metadata` output to the cargo index + // format + let cksum = checksum.to_string(); + let metadata = metadata::transform(metadata, crate_name, cksum) + .map(Arc::new) + .ok_or_else(|| anyhow!("the supplied metadata.json did contain the released crate"))?; + + { + let mut writer = self.metadata_cache.write(); + writer.insert(key.into_owned(), metadata.clone()); + } + + Ok(metadata) + } + + async fn build_packfile(&mut self) -> anyhow::Result<(HashOutput, Vec)> { + if let Some(packfile_cache) = &self.packfile_cache { + // TODO + return Ok(packfile_cache.clone()); + } + + let mut packfile = GitRepository::default(); + let user = self.user()?; - self.gitlab.clone().fetch_releases_for_group(group, user.clone()).await + let group = self.group()?; + + let token = self.gitlab.fetch_token_for_user(user).await?; + + let config_json = Bytes::from(format!( + "{{\"dl\": \"{}\"}}", + self.gitlab.cargo_dl_uri(group, &token) + )); + + // write config.json to the root of the repo + packfile.insert(vec![], "config.json".to_string(), config_json)?; + + // fetch the releases for every project within the given group + let releases_by_crate = self.fetch_releases_by_crate(group).await?; + + let mut buffer = BytesMut::new(); + + for ((crate_path, crate_name), releases) in &releases_by_crate { + for release in releases { + let checksum = &release.checksum; + let version = &release.version; + + // parses the `cargo metadata` stored in the release, which + // should be stored under `metadata.json`. + let meta = self + .fetch_metadata(&crate_path, &checksum, &crate_name, &version) + .await?; + + buffer.extend_from_slice(&serde_json::to_vec(&*meta).unwrap()); + buffer.put_u8(b'\n'); + } + + packfile.insert( + get_crate_folder(&crate_name), + crate_name.to_string(), + buffer.split().freeze(), + )?; + } + + let packfile = packfile.commit( + "test".to_string(), + "test@test.com".to_string(), + "test".to_string(), + )?; + + self.packfile_cache = Some(packfile.clone()); + + Ok(packfile) } } @@ -131,41 +283,66 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser if user.is_none() { user = self .gitlab - .find_user_by_ssh_key(&util::format_fingerprint(&fingerprint)?) + .find_user_by_ssh_key(&util::format_fingerprint(&fingerprint)) .await?; } - self.user = Some(user.ok_or(anyhow::anyhow!("failed to find user"))?); - - self.finished_auth(Auth::Accept).await + if let Some(user) = user { + self.user = Some(user); + self.finished_auth(Auth::Accept).await + } else { + self.finished_auth(Auth::Reject).await + } }) } fn data(mut self, channel: ChannelId, data: &[u8], mut session: Session) -> Self::FutureUnit { self.input_bytes.extend_from_slice(data); - Box::pin( - async move { - while let Some(frame) = self.codec.decode(&mut self.input_bytes)? { - // if the client flushed without giving us a command, we're expected to close - // the connection or else the client will just hang - if frame.command.is_empty() { - session.exit_status_request(channel, 0); - session.eof(channel); - session.close(channel); - return Ok((self, session)); - } - - let user = self.user()?; - let group = self.group()?; - - // start building the packfile we're going to send to the user - let mut packfile = GitRepository::default(); + Box::pin(async move { + // start building the packfile we're going to send to the user + let (commit_hash, packfile_entries) = self.build_packfile().await?; + + while let Some(frame) = self.codec.decode(&mut self.input_bytes)? { + // if the client flushed without giving us a command, we're expected to close + // the connection or else the client will just hang + if frame.command.is_empty() { + session.exit_status_request(channel, 0); + session.eof(channel); + session.close(channel); + return Ok((self, session)); } - Ok((self, session)) + match frame.command.as_ref() { + b"command=ls-refs" => { + git_command_handlers::ls_refs::handle( + &mut self, + &mut session, + channel, + frame.metadata, + &commit_hash, + )?; + } + b"command=fetch" => { + git_command_handlers::fetch::handle( + &mut self, + &mut session, + channel, + frame.metadata, + packfile_entries.clone(), + )?; + } + v => { + error!( + "Client sent unknown command, ignoring command {}", + std::str::from_utf8(v).unwrap_or("invalid utf8") + ); + } + } } - ) + + Ok((self, session)) + }) } fn env_request( @@ -265,3 +442,20 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser }) } } + +#[derive(Hash, Debug, PartialEq, Eq)] +struct MetadataCacheKey<'a> { + checksum: Cow<'a, str>, + crate_name: Cow<'a, str>, + crate_version: Cow<'a, str>, +} + +impl MetadataCacheKey<'_> { + pub fn into_owned(self) -> MetadataCacheKey<'static> { + MetadataCacheKey { + checksum: self.checksum.into_owned().into(), + crate_name: self.crate_name.into_owned().into(), + crate_version: self.crate_version.into_owned().into(), + } + } +} diff --git a/src/metadata.rs b/src/metadata.rs new file mode 100644 index 0000000..2a75669 --- /dev/null +++ b/src/metadata.rs @@ -0,0 +1,68 @@ +use cargo_metadata::{Package}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Transforms metadata from `cargo metadata` to the standard one-line JSON used in cargo registries. +/// +/// https://github.com/rust-lang/cargo/blob/3bc0e6d83f7f5da0161ce445f8864b0b639776a9/src/cargo/ops/registry.rs#L183 +pub fn transform( + metadata: cargo_metadata::Metadata, + crate_name: &str, + cksum: String, +) -> Option { + let package: Package = metadata + .packages + .into_iter() + .find(|v| v.name == crate_name)?; + + Some(CargoIndexCrateMetadata { + name: package.name, + vers: package.version.to_string(), + deps: package + .dependencies + .into_iter() + .map(|v| CargoIndexCrateMetadataDependency { + name: v.name, + req: v.req.to_string(), + features: v.features, + optional: v.optional, + default_features: v.uses_default_features, + target: v.target.map(|v| v.to_string()), + kind: v.kind.to_string(), + registry: Some( + v.registry + .unwrap_or("https://github.com/rust-lang/crates.io-index.git".to_string()), + ), + package: v.rename, + }) + .collect(), + cksum, + features: package.features, + yanked: false, + links: package.links, + }) +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct CargoIndexCrateMetadata { + name: String, + vers: String, + deps: Vec, + cksum: String, + features: HashMap>, + yanked: bool, + links: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct CargoIndexCrateMetadataDependency { + name: String, + req: String, + features: Vec, + optional: bool, + default_features: bool, + target: Option, + kind: String, + registry: Option, + package: Option, +} diff --git a/src/protocol/high_level.rs b/src/protocol/high_level.rs index b560036..3f699df 100644 --- a/src/protocol/high_level.rs +++ b/src/protocol/high_level.rs @@ -6,7 +6,7 @@ //! for our purposes because `cargo` will `git pull --force` from our Git //! server, allowing us to ignore any history the client may have. -use arrayvec::ArrayVec; +use bytes::Bytes; use indexmap::IndexMap; use super::low_level::{ @@ -18,25 +18,25 @@ use super::low_level::{ /// Builds a whole packfile containing files, directories and commits - essentially /// building out a full Git repository in memory. #[derive(Default, Debug)] -pub struct GitRepository<'a> { +pub struct GitRepository { /// A map containing all the blobs and their corresponding hashes so they're /// not inserted more than once for any files in the whole tree with the same /// content. - packfile_entries: IndexMap>, + packfile_entries: IndexMap, /// An in-progress `Tree` currently being built out, the tree refers to items /// in `file_entries` by hash. - tree: Tree<'a>, + tree: Tree, } -impl<'a> GitRepository<'a> { +impl GitRepository { /// Inserts a file into the repository, writing a file to the path /// `path/to/my-file` would require a `path` of `["path", "to"]` /// and a `file` of `"my-file"`. - pub fn insert( + pub fn insert( &mut self, - path: ArrayVec<&'a str, N>, - file: &'a str, - content: &'a [u8], + path: Vec, + file: String, + content: Bytes, ) -> Result<(), anyhow::Error> { // we'll initialise the directory to the root of the tree, this means // if a path isn't specified we'll just write it to the root directory @@ -79,13 +79,15 @@ impl<'a> GitRepository<'a> { /// all the files currently in the `tree`, returning all the packfile entries /// and also the commit hash so it can be referred to by `ls-ref`s. pub fn commit( - &'a mut self, - name: &'static str, - email: &'static str, - message: &'static str, - ) -> Result<(HashOutput, Vec>), anyhow::Error> { + mut self, + name: String, + email: String, + message: String, + ) -> Result<(HashOutput, Vec), anyhow::Error> { // gets the hash of the entire tree from the root - let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?; + let tree_hash = self + .tree + .into_packfile_entries(&mut self.packfile_entries)?; // build the commit using the given inputs let commit_user = CommitUserInfo { @@ -96,7 +98,7 @@ impl<'a> GitRepository<'a> { let commit = PackFileEntry::Commit(Commit { tree: tree_hash, - author: commit_user, + author: commit_user.clone(), committer: commit_user, message, }); @@ -105,34 +107,33 @@ impl<'a> GitRepository<'a> { let commit_hash = commit.hash()?; self.packfile_entries.insert(commit_hash, commit); - // TODO: make PackFileEntry copy and remove this clone Ok(( commit_hash, - self.packfile_entries.values().cloned().collect(), + self.packfile_entries.into_iter().map(|(_, v)| v).collect(), )) } } /// An in-progress tree builder, containing file hashes along with their names or nested trees #[derive(Default, Debug)] -struct Tree<'a>(IndexMap<&'a str, Box>>); +struct Tree(IndexMap>); -impl<'a> Tree<'a> { +impl Tree { /// Recursively writes the the whole tree out to the given `pack_file`, /// the tree contains pointers to (hashes of) files contained within a /// directory, and pointers to other directories. - fn to_packfile_entries( - &self, - pack_file: &mut IndexMap>, + fn into_packfile_entries( + self, + pack_file: &mut IndexMap, ) -> Result { let mut tree = Vec::with_capacity(self.0.len()); - for (name, item) in &self.0 { - tree.push(match item.as_ref() { + for (name, item) in self.0 { + tree.push(match *item { TreeItem::Blob(hash) => LowLevelTreeItem { kind: TreeItemKind::File, name, - hash: *hash, + hash, }, TreeItem::Tree(tree) => LowLevelTreeItem { kind: TreeItemKind::Directory, @@ -140,7 +141,7 @@ impl<'a> Tree<'a> { // we're essentially working through our tree from the bottom up, // so we can grab the hash of each directory along the way and // reference it from the parent directory - hash: tree.to_packfile_entries(pack_file)?, + hash: tree.into_packfile_entries(pack_file)?, }, }); } @@ -157,9 +158,9 @@ impl<'a> Tree<'a> { /// An item within a `Tree`, this could be a file blob or another directory. #[derive(Debug)] -enum TreeItem<'a> { +enum TreeItem { /// Refers to a file by hash Blob(HashOutput), /// Refers to a nested directory - Tree(Tree<'a>), + Tree(Tree), } diff --git a/src/protocol/low_level.rs b/src/protocol/low_level.rs index 512e98c..8e9eb38 100644 --- a/src/protocol/low_level.rs +++ b/src/protocol/low_level.rs @@ -1,4 +1,4 @@ -use bytes::{BufMut, BytesMut}; +use bytes::{BufMut, Bytes, BytesMut}; use flate2::{write::ZlibEncoder, Compression}; use sha1::Digest; use std::{convert::TryInto, fmt::Write, io::Write as IoWrite}; @@ -11,13 +11,13 @@ pub type HashOutput = [u8; 20]; // which is sort of used to make sure you're getting the start of the // packfile correctly. This is followed by a 4-byte packfile version // number and then a 4-byte number of entries in that file. -pub struct PackFile<'a> { - entries: Vec>, +pub struct PackFile { + entries: Vec, } -impl<'a> PackFile<'a> { +impl PackFile { #[must_use] - pub fn new(entries: Vec>) -> Self { + pub fn new(entries: Vec) -> Self { Self { entries } } @@ -54,17 +54,17 @@ impl<'a> PackFile<'a> { } } -#[derive(Debug, Clone, Copy)] -pub struct Commit<'a> { +#[derive(Debug, Clone)] +pub struct Commit { pub tree: HashOutput, // pub parent: [u8; 20], - pub author: CommitUserInfo<'a>, - pub committer: CommitUserInfo<'a>, + pub author: CommitUserInfo, + pub committer: CommitUserInfo, // pub gpgsig: &str, - pub message: &'a str, + pub message: String, } -impl Commit<'_> { +impl Commit { fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { let mut tree_hex = [0_u8; 20 * 2]; hex::encode_to_slice(self.tree, &mut tree_hex)?; @@ -91,14 +91,14 @@ impl Commit<'_> { } } -#[derive(Copy, Clone, Debug)] -pub struct CommitUserInfo<'a> { - pub name: &'a str, - pub email: &'a str, +#[derive(Clone, Debug)] +pub struct CommitUserInfo { + pub name: String, + pub email: String, pub time: time::OffsetDateTime, } -impl CommitUserInfo<'_> { +impl CommitUserInfo { fn encode(&self) -> String { // TODO: remove `format!`, `format_args!`? format!( @@ -138,15 +138,15 @@ impl TreeItemKind { } } -#[derive(Debug, Copy, Clone)] -pub struct TreeItem<'a> { +#[derive(Debug, Clone)] +pub struct TreeItem { pub kind: TreeItemKind, - pub name: &'a str, + pub name: String, pub hash: HashOutput, } // `[mode] [name]\0[hash]` -impl TreeItem<'_> { +impl TreeItem { fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { out.write_str(self.kind.mode())?; write!(out, " {}\0", self.name)?; @@ -161,7 +161,7 @@ impl TreeItem<'_> { } #[derive(Debug, Clone)] // could be copy but Vec> -pub enum PackFileEntry<'a> { +pub enum PackFileEntry { // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 @@ -185,20 +185,20 @@ pub enum PackFileEntry<'a> { // -----END PGP SIGNATURE----- // // test - Commit(Commit<'a>), + Commit(Commit), // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� - Tree(Vec>), + Tree(Vec), // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc // blob 23try and find me in .git - Blob(&'a [u8]), + Blob(Bytes), // Tag, // OfsDelta, // RefDelta, } -impl PackFileEntry<'_> { +impl PackFileEntry { fn write_header(&self, buf: &mut BytesMut) { let mut size = self.uncompressed_size(); diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index ba28159..5e11618 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -1,4 +1,4 @@ +pub mod codec; pub mod high_level; pub mod low_level; -pub mod codec; pub mod packet_line; diff --git a/src/protocol/packet_line.rs b/src/protocol/packet_line.rs index e469001..81f98ee 100644 --- a/src/protocol/packet_line.rs +++ b/src/protocol/packet_line.rs @@ -8,7 +8,7 @@ pub enum PktLine<'a> { Data(&'a [u8]), /// Similar to a data packet, but used during packfile sending to indicate this /// packet is a block of data by appending a byte containing the u8 `1`. - SidebandData(PackFile<'a>), + SidebandData(PackFile), /// Similar to a data packet, but used during packfile sending to indicate this /// packet is a status message by appending a byte containing the u8 `2`. SidebandMsg(&'a [u8]), diff --git a/src/providers/gitlab.rs b/src/providers/gitlab.rs index 49c6508..3d39ab8 100644 --- a/src/providers/gitlab.rs +++ b/src/providers/gitlab.rs @@ -3,9 +3,13 @@ use async_trait::async_trait; use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt}; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use reqwest::header; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::sync::Arc; +const GITLAB_API_ENDPOINT: &str = "http://127.0.0.1:3000"; +// const PAT: &str = "glpat-saSjc4srMhxAA-qDp8F8"; +const PAT: &str = "X994NFZjTy1ZYbsCwTLK"; + pub struct Gitlab { client: reqwest::Client, base_url: String, @@ -14,36 +18,18 @@ pub struct Gitlab { impl Gitlab { pub fn new() -> anyhow::Result { let mut headers = header::HeaderMap::new(); - headers.insert( - "PRIVATE-TOKEN", - header::HeaderValue::from_static("token"), - ); + headers.insert("PRIVATE-TOKEN", header::HeaderValue::from_static(PAT)); Ok(Self { client: reqwest::ClientBuilder::new() .default_headers(headers) .build()?, - base_url: "https://127.0.0.1/api/v4".to_string(), + base_url: format!("{}/api/v4", GITLAB_API_ENDPOINT), }) } - - pub async fn get_impersonation_token_for(&self, user: &User) -> anyhow::Result { - let impersonation_token: GitlabImpersonationTokenResponse = self - .client - .get(format!( - "{}/users/{}/impersonation_tokens", - self.base_url, user.id - )) - .body(format!("name={};scopes=api", env!("CARGO_PKG_NAME"))) - .send() - .await? - .json() - .await?; - - Ok(impersonation_token.token) - } } +// TODO: errors are not yet handled, they're returned as {"error": "abc"} #[async_trait] impl super::UserProvider for Gitlab { async fn find_user_by_username_password_combo( @@ -51,7 +37,10 @@ impl super::UserProvider for Gitlab { username_password: &str, ) -> anyhow::Result> { let mut splitter = username_password.splitn(2, ':'); - let (username, password) = (splitter.next().unwrap(), splitter.next().unwrap()); + let (username, password) = match (splitter.next(), splitter.next()) { + (Some(username), Some(password)) => (username, password), + _ => return Ok(None), + }; if username == "gitlab-ci-token" { let res: GitlabJobResponse = self @@ -77,7 +66,8 @@ impl super::UserProvider for Gitlab { .client .get(format!( "{}/keys?fingerprint={}", - self.base_url, fingerprint + self.base_url, + utf8_percent_encode(fingerprint, NON_ALPHANUMERIC) )) .send() .await? @@ -88,19 +78,38 @@ impl super::UserProvider for Gitlab { username: u.username, })) } + + async fn fetch_token_for_user(&self, user: &User) -> anyhow::Result { + let impersonation_token: GitlabImpersonationTokenResponse = self + .client + .post(format!( + "{}/users/{}/impersonation_tokens", + self.base_url, user.id + )) + .json(&GitlabImpersonationTokenRequest { + name: env!("CARGO_PKG_NAME"), + scopes: vec!["api"], + }) + .send() + .await? + .json() + .await?; + + Ok(impersonation_token.token) + } } #[async_trait] impl super::PackageProvider for Gitlab { + type CratePath = Arc; + async fn fetch_releases_for_group( self: Arc, group: &str, - do_as: User, - ) -> anyhow::Result> { - let impersonation_token = Arc::new(self.get_impersonation_token_for(&do_as).await?); - + do_as: &User, + ) -> anyhow::Result> { let mut next_uri = Some(format!( - "{}/groups/{}/packages?per_page=100&pagination=keyset&order_by=id&sort=asc&sudo={}", + "{}/groups/{}/packages?per_page=100&pagination=keyset&sort=asc&sudo={}", self.base_url, utf8_percent_encode(group, NON_ALPHANUMERIC), do_as.id @@ -123,7 +132,6 @@ impl super::PackageProvider for Gitlab { for release in res { let this = self.clone(); - let impersonation_token = impersonation_token.clone(); futures.push(tokio::spawn(async move { let (project, package) = { @@ -134,7 +142,13 @@ impl super::PackageProvider for Gitlab { } }; - let package_files: GitlabPackageFilesResponse = this + let package_path = Arc::new(GitlabCratePath { + project: utf8_percent_encode(project, NON_ALPHANUMERIC).to_string(), + package_name: utf8_percent_encode(&release.name, NON_ALPHANUMERIC) + .to_string(), + }); + + let package_files: Vec = this .client .get(format!( "{}/projects/{}/packages/{}/package_files", @@ -147,30 +161,95 @@ impl super::PackageProvider for Gitlab { .json() .await?; - Ok::<_, anyhow::Error>(Some(Release { - uri: format!( - "{}/projects/{}/packages/generic/{}/{}/{}?private_token={}", - this.base_url, - utf8_percent_encode(project, NON_ALPHANUMERIC), - utf8_percent_encode(&release.name, NON_ALPHANUMERIC), - utf8_percent_encode(&release.version, NON_ALPHANUMERIC), - package_files.file_name, - impersonation_token, - ), - name: release.name, - version: release.version, - checksum: package_files.file_sha256, - })) + Ok::<_, anyhow::Error>(Some( + package_files + .into_iter() + .filter_map(|package_file| { + if package_file.file_name.ends_with(".crate") { + if package_file.file_name + == format!("{}-{}.crate", release.name, release.version) + { + Some(( + package_path.clone(), + Release { + name: release.name.clone(), + version: release.version.clone(), + checksum: package_file.file_sha256, + }, + )) + } else { + tracing::info!( + "{}/{}/{}/{} should be called {}-{}.crate", + project, + release.name, + release.version, + package_file.file_name, + release.name, + release.version + ); + None + } + } else { + None + } + }) + .collect(), + )) })) } } - futures + let x: Vec> = futures .err_into() .filter_map(|v| async move { v.and_then(|v| v).transpose() }) .try_collect() - .await + .await?; + + Ok(x.into_iter().flatten().collect()) } + + async fn fetch_metadata_for_release( + self: Arc, + path: &Self::CratePath, + version: &str, + ) -> anyhow::Result { + let uri = format!( + "{}{}?private_token={}", + self.base_url, + path.metadata_uri(version), + PAT, + ); + + Ok(self.client.get(uri).send().await?.json().await?) + } + + fn cargo_dl_uri(&self, group: &str, token: &str) -> String { + format!( + "{}/groups/{group}/packages/generic/{{sha256-checksum}}/{{crate}}-{{version}}.crate?private_token={token}", + self.base_url + ) + } +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct GitlabCratePath { + project: String, + package_name: String, +} + +impl GitlabCratePath { + pub fn metadata_uri(&self, version: &str) -> String { + format!( + "/projects/{}/packages/generic/{}/{version}/metadata.json", + self.project, self.package_name + ) + } +} + +#[derive(Serialize)] +pub struct GitlabImpersonationTokenRequest { + name: &'static str, + scopes: Vec<&'static str>, } #[derive(Deserialize)] diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 05bb127..ffa2bed 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -11,15 +11,29 @@ pub trait UserProvider { ) -> anyhow::Result>; async fn find_user_by_ssh_key(&self, fingerprint: &str) -> anyhow::Result>; + + async fn fetch_token_for_user(&self, user: &User) -> anyhow::Result; } #[async_trait] pub trait PackageProvider { + /// Provider-specific metadata passed between `PackageProvider` methods to + /// figure out the path of a package. + type CratePath: std::fmt::Debug + Send + std::hash::Hash + Clone + Eq + PartialEq + Send + Sync; + async fn fetch_releases_for_group( self: Arc, group: &str, - do_as: User, - ) -> anyhow::Result>; + do_as: &User, + ) -> anyhow::Result>; + + async fn fetch_metadata_for_release( + self: Arc, + path: &Self::CratePath, + version: &str, + ) -> anyhow::Result; + + fn cargo_dl_uri(&self, group: &str, token: &str) -> String; } #[derive(Debug, Clone)] @@ -33,5 +47,4 @@ pub struct Release { pub name: String, pub version: String, pub checksum: String, - pub uri: String, } diff --git a/src/util.rs b/src/util.rs index b01f71c..f4324a8 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,18 +1,24 @@ -/// Retrieves the key fingerprint, encoded in hex and separated in two character chunks -/// with colons. -pub fn format_fingerprint(fingerprint: &str) -> Result { - let raw_hex = hex::encode( - base64::decode(&fingerprint).map_err(|_| thrussh_keys::Error::CouldNotReadKey)?, - ); - let mut hex = String::with_capacity(raw_hex.len() + (raw_hex.len() / 2 - 1)); +pub fn format_fingerprint(fingerprint: &str) -> String { + format!("SHA256:{}", fingerprint) +} - for (i, c) in raw_hex.chars().enumerate() { - if i != 0 && i % 2 == 0 { - hex.push(':'); - } +/// Crates with a total of 1, 2 or 3 characters in the same are written out to directories named +/// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for +/// using the frist four characters of the crate name, 2 for the first directory and the other 2 +/// for the second. +pub fn get_crate_folder(crate_name: &str) -> Vec { + let mut folders = Vec::new(); - hex.push(c); + match crate_name.len() { + 0 => {} + 1 => folders.push("1".to_string()), + 2 => folders.push("2".to_string()), + 3 => folders.push("3".to_string()), + _ => { + folders.push(crate_name[..2].to_string()); + folders.push(crate_name[2..4].to_string()); + } } - Ok(hex) + folders } -- libgit2 1.7.2