From 46ca918befbc28693db6a2b1ff474648742fca44 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 13 Mar 2022 02:11:55 +0000 Subject: [PATCH] Persist generated server private keys across restarts --- .gitignore | 1 + config.toml | 4 ++++ src/config.rs | 3 +++ src/main.rs | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- src/metadata.rs | 5 +++++ src/protocol/high_level.rs | 8 ++++---- src/protocol/low_level.rs | 6 +++--- 7 files changed, 110 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index d81f12e..a9e780a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target /.idea +config-private.toml diff --git a/config.toml b/config.toml index dbe3f41..147f487 100644 --- a/config.toml +++ b/config.toml @@ -1,3 +1,7 @@ +# directory in which the generated private keys for the server +# should be stored +state-directory = "/var/lib/gitlab-cargo-shim" + [gitlab] # the base url of the gitlab instance uri = "http://127.0.0.1:3000" diff --git a/src/config.rs b/src/config.rs index 812d18e..35effc1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,7 @@ use clap::Parser; use serde::{de::DeserializeOwned, Deserialize}; +use std::path::PathBuf; #[derive(Parser)] #[clap(version = clap::crate_version!(), author = clap::crate_authors!())] @@ -11,7 +12,9 @@ pub struct Args { } #[derive(Deserialize)] +#[serde(rename_all = "kebab-case")] pub struct Config { + pub state_directory: PathBuf, pub gitlab: GitlabConfig, } diff --git a/src/main.rs b/src/main.rs index 7438bce..1a7485b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ pub mod util; use crate::{ config::Args, - metadata::CargoIndexCrateMetadata, + metadata::{CargoConfig, CargoIndexCrateMetadata}, protocol::{ codec::{Encoder, GitCodec}, high_level::GitRepository, @@ -32,7 +32,7 @@ use thrussh::{ }; use thrussh_keys::key::PublicKey; use tokio_util::{codec::Decoder, codec::Encoder as CodecEncoder}; -use tracing::error; +use tracing::{error, info}; const AGENT: &str = concat!( "agent=", @@ -48,11 +48,43 @@ async fn main() -> anyhow::Result<()> { let args: Args = Args::parse(); - let ed25519_key = thrussh_keys::key::KeyPair::generate_ed25519().unwrap(); + if !args.config.state_directory.exists() { + std::fs::create_dir_all(&args.config.state_directory)?; + } + + let server_private_key = args.config.state_directory.join("ssh-private-key.pem"); + + let key = if server_private_key.exists() { + let key_bytes = std::fs::read(&server_private_key)?; + if key_bytes.len() != 64 { + anyhow::bail!( + "invalid private key. length = {}, expected = 64", + key_bytes.len() + ); + } + + let mut key = [0_u8; 64]; + key.copy_from_slice(&key_bytes); + + thrussh_keys::key::KeyPair::Ed25519(thrussh_keys::key::ed25519::SecretKey { key }) + } else { + info!( + "Generating new server private key to {}", + server_private_key.display() + ); + + let key = thrussh_keys::key::KeyPair::generate_ed25519() + .ok_or_else(|| anyhow!("failed to generate server private key"))?; + let thrussh_keys::key::KeyPair::Ed25519(key) = key; + + std::fs::write(server_private_key, &key.key)?; + + thrussh_keys::key::KeyPair::Ed25519(key) + }; let thrussh_config = Arc::new(thrussh::server::Config { methods: thrussh::MethodSet::PUBLICKEY, - keys: vec![ed25519_key], + keys: vec![key], ..thrussh::server::Config::default() }); @@ -122,10 +154,13 @@ impl Handler { self.group.as_ref().ok_or(anyhow::anyhow!("no group set")) } + /// Writes a Git packet line response to the buffer, this should only + /// be used once the client opens a `shell_request`. fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> { Encoder.encode(packet, &mut self.output_bytes) } + /// Flushes the buffer out to the client fn flush(&mut self, session: &mut Session, channel: ChannelId) { session.data( channel, @@ -133,6 +168,8 @@ impl Handler { ); } + /// Fetches all the releases from the provider for the given group + /// and groups them by crate. async fn fetch_releases_by_crate( &self, ) -> anyhow::Result>> { @@ -153,6 +190,10 @@ impl Handler { Ok(res) } + /// Fetches metadata from the provider for a given crate, this is + /// globally cache-able since it's immutable, to get to this call + /// the user must've already fetched the crate path from the provider + /// and hence verified they have permission to read it. async fn fetch_metadata( &self, path: &U::CratePath, @@ -166,6 +207,8 @@ impl Handler { crate_version: crate_version.into(), }; + // check if the crate metadata already exists in our cache, if it does + // we'll just return that { let reader = self.metadata_cache.read(); if let Some(cache) = reader.get(&key) { @@ -173,6 +216,7 @@ impl Handler { } } + // fetch metadata from the provider let metadata = Arc::clone(&self.gitlab) .fetch_metadata_for_release(path, crate_version) .await?; @@ -184,6 +228,8 @@ impl Handler { .map(Arc::new) .ok_or_else(|| anyhow!("the supplied metadata.json did contain the released crate"))?; + // cache the transformed value so the next user to pull it + // doesn't have to wait for _yet another_ gitlab call { let mut writer = self.metadata_cache.write(); writer.insert(key.into_owned(), Arc::clone(&metadata)); @@ -192,22 +238,37 @@ impl Handler { Ok(metadata) } + // Builds the packfile for the current connection, and caches it in case + // this function is called again (ie. the client calling `ls-ref`s before + // `fetch` will result in two calls). The output isn't deterministic because + // the datetime is included in the commit causing the hash to change, by + // caching we ensure that: + // + // 1. the client receives the expected refs when calling `fetch`, + // 2. we don't do the relatively expensive processing that comes with + // generating the packfile more than once per connection. async fn build_packfile(&mut self) -> anyhow::Result)>> { + // return the cached value if we've generated the packfile for + // this connection already if let Some(packfile_cache) = &self.packfile_cache { return Ok(packfile_cache.clone()); } + // create the high-level packfile generator let mut packfile = GitRepository::default(); let user = self.user()?; let group = self.group()?; + // fetch the impersonation token for the user we'll embed + // the `dl` string. let token = self.gitlab.fetch_token_for_user(user).await?; - let config_json = Bytes::from(format!( - "{{\"dl\": \"{}\"}}", - self.gitlab.cargo_dl_uri(group, &token) - )); + // generate the config for the user, containing the download + // url template from gitlab and the impersonation token embedded + let config_json = Bytes::from(serde_json::to_vec(&CargoConfig { + dl: self.gitlab.cargo_dl_uri(group, &token), + })?); // write config.json to the root of the repo packfile.insert(vec![], "config.json".to_string(), config_json)?; @@ -215,6 +276,8 @@ impl Handler { // fetch the releases for every project within the given group let releases_by_crate = self.fetch_releases_by_crate().await?; + // a reusable buffer for writing the metadata json blobs out to + // for each package let mut buffer = BytesMut::new(); for ((crate_path, crate_name), releases) in &releases_by_crate { @@ -228,10 +291,13 @@ impl Handler { .fetch_metadata(crate_path, checksum, crate_name, version) .await?; - buffer.extend_from_slice(&serde_json::to_vec(&*meta).unwrap()); + // each crates file in the index is a metadata blob for + // each version separated by a newline + buffer.extend_from_slice(&serde_json::to_vec(&*meta)?); buffer.put_u8(b'\n'); } + // insert the crate version metadata into the packfile packfile.insert( get_crate_folder(crate_name), crate_name.to_string(), @@ -239,12 +305,16 @@ impl Handler { )?; } + // build a commit for all of our inserted files and build + // into its lower-level `Vec` counter-part. let packfile = Arc::new(packfile.commit( - "test".to_string(), - "test@test.com".to_string(), - "test".to_string(), + env!("CARGO_PKG_NAME"), + "noreply@chart.rs", + "Latest crates from GitLab", )?); + // cache the built packfile for the next time this + // function is called from this connection self.packfile_cache = Some(Arc::clone(&packfile)); Ok(packfile) @@ -280,11 +350,18 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser let user = user.to_string(); Box::pin(capture_errors(async move { + // username:password combo is used by CI to authenticate to us, + // it does not allow users to authenticate directly. it's + // technically the SSH username that contains both the username + // and password as we don't want an interactive prompt or + // anything like that let mut user = self .gitlab .find_user_by_username_password_combo(&user) .await?; + // if there was no username:password combo given we'll lookup + // the user by the SSH key they're connecting to us with if user.is_none() { user = self .gitlab @@ -305,7 +382,7 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser self.input_bytes.extend_from_slice(data); Box::pin(capture_errors(async move { - // start building the packfile we're going to send to the user + // build the packfile we're going to send to the user let (commit_hash, packfile_entries) = &*self.build_packfile().await?; while let Some(frame) = self.codec.decode(&mut self.input_bytes)? { diff --git a/src/metadata.rs b/src/metadata.rs index a359e56..906c503 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -45,6 +45,11 @@ pub fn transform( }) } +#[derive(Serialize)] +pub struct CargoConfig { + pub dl: String, +} + #[derive(Serialize, Deserialize, Debug)] pub struct CargoIndexCrateMetadata { name: String, diff --git a/src/protocol/high_level.rs b/src/protocol/high_level.rs index 68be1ed..917b065 100644 --- a/src/protocol/high_level.rs +++ b/src/protocol/high_level.rs @@ -55,7 +55,7 @@ impl GitRepository { directory = d; } else { // TODO: how should we handle this? one of items we tried to - // recurse into was a directory. + // recurse into was a file. anyhow::bail!("attempted to use a file as a directory"); } } @@ -80,9 +80,9 @@ impl GitRepository { /// and also the commit hash so it can be referred to by `ls-ref`s. pub fn commit( mut self, - name: String, - email: String, - message: String, + name: &'static str, + email: &'static str, + message: &'static str, ) -> Result<(HashOutput, Vec), anyhow::Error> { // gets the hash of the entire tree from the root let tree_hash = self diff --git a/src/protocol/low_level.rs b/src/protocol/low_level.rs index c38b5e9..1365b27 100644 --- a/src/protocol/low_level.rs +++ b/src/protocol/low_level.rs @@ -61,7 +61,7 @@ pub struct Commit { pub author: CommitUserInfo, pub committer: CommitUserInfo, // pub gpgsig: &str, - pub message: String, + pub message: &'static str, } impl Commit { @@ -93,8 +93,8 @@ impl Commit { #[derive(Clone, Debug)] pub struct CommitUserInfo { - pub name: String, - pub email: String, + pub name: &'static str, + pub email: &'static str, pub time: time::OffsetDateTime, } -- libgit2 1.7.2