🏡 index : ~doyle/gitlab-cargo-shim.git

author Jordan Doyle <jordan@doyle.la> 2022-03-12 1:35:43.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2022-03-12 6:13:37.0 +00:00:00
commit
be1de75e63d5737228ee2435132ccf1fadd15959 [patch]
tree
68398ad7c4809ceba7f9504ce139f9e70550809d
parent
3727713df362561b753ee785b7c61d07c60f8330
download
be1de75e63d5737228ee2435132ccf1fadd15959.tar.gz

Return valid index to cargo over git, integration with new download gitlab endpoint

https://gitlab.com/gitlab-org/gitlab/-/merge_requests/82663

Diff

 Cargo.lock                          | 116 ++++++++++++++++-
 Cargo.toml                          |   5 +-
 src/git_command_handlers/fetch.rs   |  51 +++++++-
 src/git_command_handlers/ls_refs.rs |  31 ++++-
 src/git_command_handlers/mod.rs     |   2 +-
 src/main.rs                         | 268 +++++++++++++++++++++++++++++++------
 src/metadata.rs                     |  68 +++++++++-
 src/protocol/high_level.rs          |  59 ++++----
 src/protocol/low_level.rs           |  50 +++----
 src/protocol/mod.rs                 |   2 +-
 src/protocol/packet_line.rs         |   2 +-
 src/providers/gitlab.rs             | 173 +++++++++++++++++-------
 src/providers/mod.rs                |  19 ++-
 src/util.rs                         |  32 ++--
 14 files changed, 722 insertions(+), 156 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 20df39b..fec55db 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -31,6 +31,15 @@ dependencies = [
]

[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
 "winapi",
]

[[package]]
name = "anyhow"
version = "1.0.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -160,6 +169,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"

[[package]]
name = "camino"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f3132262930b0522068049f5870a856ab8affc80c70d08b6ecb785771a6fc23"
dependencies = [
 "serde",
]

[[package]]
name = "cargo-platform"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27"
dependencies = [
 "serde",
]

[[package]]
name = "cargo_metadata"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa"
dependencies = [
 "camino",
 "cargo-platform",
 "semver",
 "serde",
 "serde_json",
]

[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -478,16 +518,19 @@ dependencies = [
 "async-trait",
 "base64",
 "bytes",
 "cargo_metadata",
 "flate2",
 "futures",
 "hex",
 "indexmap",
 "indoc",
 "itoa",
 "parking_lot",
 "parse_link_header",
 "percent-encoding",
 "reqwest",
 "serde",
 "serde_json",
 "sha1",
 "shlex",
 "thrussh",
@@ -495,6 +538,8 @@ dependencies = [
 "time",
 "tokio",
 "tokio-util 0.7.0",
 "tracing",
 "tracing-subscriber",
]

[[package]]
@@ -1169,6 +1214,15 @@ dependencies = [
]

[[package]]
name = "semver"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d"
dependencies = [
 "serde",
]

[[package]]
name = "serde"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1236,6 +1290,15 @@ dependencies = [
]

[[package]]
name = "sharded-slab"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
dependencies = [
 "lazy_static",
]

[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1324,6 +1387,15 @@ dependencies = [
]

[[package]]
name = "thread_local"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
dependencies = [
 "once_cell",
]

[[package]]
name = "thrussh"
version = "0.33.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1509,16 +1581,54 @@ checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f"
dependencies = [
 "cfg-if",
 "pin-project-lite",
 "tracing-attributes",
 "tracing-core",
]

[[package]]
name = "tracing-attributes"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b"
dependencies = [
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "tracing-core"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23"
dependencies = [
 "lazy_static",
 "valuable",
]

[[package]]
name = "tracing-log"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
dependencies = [
 "lazy_static",
 "log",
 "tracing-core",
]

[[package]]
name = "tracing-subscriber"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e0ab7bdc962035a87fba73f3acca9b8a8d0034c2e6f60b84aeaaddddc155dce"
dependencies = [
 "ansi_term",
 "sharded-slab",
 "smallvec",
 "thread_local",
 "tracing-core",
 "tracing-log",
]

[[package]]
@@ -1573,6 +1683,12 @@ dependencies = [
]

[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"

[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index c06303a..8352f2d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,7 @@ async-trait = "0.1"
arrayvec = "0.7"
base64 = "0.13"
bytes = "1.1"
cargo_metadata = "0.14"
flate2 = "1.0"
futures = "0.3"
hex = "0.4"
@@ -18,11 +19,15 @@ itoa = "1.0"
indexmap = "1.8"
indoc = "1.0"
parse_link_header = "0.3"
parking_lot = "0.12"
percent-encoding = "2.1"
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
sha1 = "0.10"
shlex = "1.1"
tracing = "0.1"
tracing-subscriber = "0.3"
thrussh = "0.33"
thrussh-keys = "0.21"
time = "0.3"
diff --git a/src/git_command_handlers/fetch.rs b/src/git_command_handlers/fetch.rs
new file mode 100644
index 0000000..0034e9f
--- /dev/null
+++ b/src/git_command_handlers/fetch.rs
@@ -0,0 +1,51 @@
use bytes::Bytes;
use thrussh::{server::Session, ChannelId};

use crate::{
    protocol::{
        low_level::{PackFile, PackFileEntry},
        packet_line::PktLine,
    },
    Handler, PackageProvider, UserProvider,
};

pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
    handle: &mut Handler<U>,
    session: &mut Session,
    channel: ChannelId,
    metadata: Vec<Bytes>,
    packfile_entries: Vec<PackFileEntry>,
) -> Result<(), anyhow::Error> {
    // the client sending us `done` in the metadata means they know there's no negotiation
    // required for which commits we need to send, they just want us to send whatever we
    // have.
    let done = metadata.iter().any(|v| v.as_ref() == b"done");

    // the client thinks we can negotiate some commits with them, but we don't want to so
    // we'll just say we've got nothing in common and continue on as we were.
    if !done {
        handle.write(PktLine::Data(b"acknowledgments\n"))?;
        handle.write(PktLine::Data(b"ready\n"))?;
        handle.write(PktLine::Delimiter)?;
    }

    // magic header
    handle.write(PktLine::Data(b"packfile\n"))?;

    // send a welcome message
    // handle.write(PktLine::SidebandMsg(b"Hello from gitlab-cargo-shim!\n"))?;
    // handle.flush(session, channel);

    // send the complete packfile
    let packfile = PackFile::new(packfile_entries);
    handle.write(PktLine::SidebandData(packfile))?;
    handle.write(PktLine::Flush)?;
    handle.flush(session, channel);

    // tell the client we exited successfully and close the channel
    session.exit_status_request(channel, 0);
    session.eof(channel);
    session.close(channel);

    Ok(())
}
diff --git a/src/git_command_handlers/ls_refs.rs b/src/git_command_handlers/ls_refs.rs
new file mode 100644
index 0000000..19da2de
--- /dev/null
+++ b/src/git_command_handlers/ls_refs.rs
@@ -0,0 +1,31 @@
//! [ls-refs][lsr] is sent from the client when they want to see what refs we have
//! on the server, we're generating our commits on the fly though so we'll just tell
//! them we have a master branch with whatever the generated commit hash is.
//!
//! [lsr]: https://git-scm.com/docs/protocol-v2/2.19.0#_ls_refs

use bytes::Bytes;
use thrussh::{server::Session, ChannelId};

use crate::{
    protocol::{low_level::HashOutput, packet_line::PktLine},
    Handler, PackageProvider, UserProvider,
};

pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
    handle: &mut Handler<U>,
    session: &mut Session,
    channel: ChannelId,
    _metadata: Vec<Bytes>,
    commit_hash: &HashOutput,
) -> Result<(), anyhow::Error> {
    let commit_hash = hex::encode(&commit_hash);

    handle.write(PktLine::Data(
        format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(),
    ))?;
    handle.write(PktLine::Flush)?;
    handle.flush(session, channel);

    Ok(())
}
diff --git a/src/git_command_handlers/mod.rs b/src/git_command_handlers/mod.rs
new file mode 100644
index 0000000..8406cdb
--- /dev/null
+++ b/src/git_command_handlers/mod.rs
@@ -0,0 +1,2 @@
pub mod fetch;
pub mod ls_refs;
diff --git a/src/main.rs b/src/main.rs
index 4b03ddb..ade64fc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,16 +1,32 @@
pub mod git_command_handlers;
pub mod metadata;
pub mod protocol;
pub mod providers;
pub mod util;

use crate::{providers::{gitlab::Gitlab, PackageProvider, Release, User, UserProvider}, protocol::{codec::Encoder, packet_line::PktLine}};
use crate::metadata::CargoIndexCrateMetadata;
use crate::protocol::low_level::{HashOutput, PackFileEntry};
use crate::util::get_crate_folder;
use crate::{
    protocol::{
        codec::{Encoder, GitCodec},
        high_level::GitRepository,
        packet_line::PktLine,
    },
    providers::{gitlab::Gitlab, PackageProvider, Release, User, UserProvider},
};
use anyhow::anyhow;
use bytes::{BufMut, Bytes, BytesMut};
use futures::Future;
use std::{net::SocketAddr, pin::Pin, sync::Arc, fmt::Write};
use bytes::BytesMut;
use thrussh::{server::{Auth, Session}, ChannelId, CryptoVec};
use parking_lot::RwLock;
use std::{borrow::Cow, collections::HashMap, fmt::Write, net::SocketAddr, pin::Pin, sync::Arc};
use thrussh::{
    server::{Auth, Session},
    ChannelId, CryptoVec,
};
use thrussh_keys::key::PublicKey;
use tokio::task::JoinHandle;
use tokio_util::codec::Encoder as CodecEncoder;
use crate::protocol::high_level::GitRepository;
use tokio_util::{codec::Decoder, codec::Encoder as CodecEncoder};
use tracing::error;

const AGENT: &str = concat!(
    "agent=",
@@ -22,6 +38,8 @@ const AGENT: &str = concat!(

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    tracing_subscriber::fmt::init();

    let ed25519_key = thrussh_keys::key::KeyPair::generate_ed25519().unwrap();

    let thrussh_config = Arc::new(thrussh::server::Config {
@@ -32,12 +50,23 @@ async fn main() -> anyhow::Result<()> {

    let gitlab = Arc::new(Gitlab::new()?);

    thrussh::server::run(thrussh_config, "127.0.0.1:2222", Server { gitlab }).await?;
    thrussh::server::run(
        thrussh_config,
        "127.0.0.1:2210",
        Server {
            gitlab,
            metadata_cache: Arc::new(Default::default()),
        },
    )
    .await?;
    Ok(())
}

type MetadataCache = Arc<RwLock<HashMap<MetadataCacheKey<'static>, Arc<CargoIndexCrateMetadata>>>>;

struct Server<U: UserProvider + PackageProvider + Send + Sync + 'static> {
    gitlab: Arc<U>,
    metadata_cache: MetadataCache,
}

impl<U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::server::Server
@@ -47,25 +76,31 @@ impl<U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::server:

    fn new(&mut self, _peer_addr: Option<SocketAddr>) -> Self::Handler {
        Handler {
            codec: GitCodec::default(),
            gitlab: self.gitlab.clone(),
            user: None,
            group: None,
            fetcher_future: None,
            // fetcher_future: None,
            input_bytes: BytesMut::new(),
            output_bytes: BytesMut::new(),
            is_git_protocol_v2: false
            is_git_protocol_v2: false,
            metadata_cache: self.metadata_cache.clone(),
            packfile_cache: None,
        }
    }
}

struct Handler<U: UserProvider + PackageProvider + Send + Sync + 'static> {
pub struct Handler<U: UserProvider + PackageProvider + Send + Sync + 'static> {
    codec: GitCodec,
    gitlab: Arc<U>,
    user: Option<User>,
    group: Option<String>,
    fetcher_future: Option<JoinHandle<anyhow::Result<Vec<Release>>>>,
    // fetcher_future: Option<JoinHandle<anyhow::Result<Vec<Release>>>>,
    input_bytes: BytesMut,
    output_bytes: BytesMut,
    is_git_protocol_v2: bool,
    metadata_cache: MetadataCache,
    packfile_cache: Option<(HashOutput, Vec<PackFileEntry>)>,
}

impl<U: UserProvider + PackageProvider + Send + Sync + 'static> Handler<U> {
@@ -88,9 +123,126 @@ impl<U: UserProvider + PackageProvider + Send + Sync + 'static> Handler<U> {
        );
    }

    async fn fetch_releases(&self, group: &str) -> anyhow::Result<Vec<Release>> {
    async fn fetch_releases_by_crate(
        &self,
        group: &str,
    ) -> anyhow::Result<HashMap<(U::CratePath, String), Vec<Release>>> {
        let user = self.user()?;

        let mut res = HashMap::new();

        for (path, release) in self
            .gitlab
            .clone()
            .fetch_releases_for_group(group, user)
            .await?
        {
            res.entry((path, release.name.clone()))
                .or_insert_with(Vec::new)
                .push(release);
        }

        Ok(res)
    }

    async fn fetch_metadata(
        &self,
        path: &U::CratePath,
        checksum: &str,
        crate_name: &str,
        crate_version: &str,
    ) -> anyhow::Result<Arc<CargoIndexCrateMetadata>> {
        let key = MetadataCacheKey {
            checksum: checksum.into(),
            crate_name: crate_name.into(),
            crate_version: crate_version.into(),
        };

        {
            let reader = self.metadata_cache.read();
            if let Some(cache) = reader.get(&key) {
                return Ok(cache.clone());
            }
        }

        let metadata = self
            .gitlab
            .clone()
            .fetch_metadata_for_release(path, crate_version)
            .await?;

        // transform the `cargo metadata` output to the cargo index
        // format
        let cksum = checksum.to_string();
        let metadata = metadata::transform(metadata, crate_name, cksum)
            .map(Arc::new)
            .ok_or_else(|| anyhow!("the supplied metadata.json did contain the released crate"))?;

        {
            let mut writer = self.metadata_cache.write();
            writer.insert(key.into_owned(), metadata.clone());
        }

        Ok(metadata)
    }

    async fn build_packfile(&mut self) -> anyhow::Result<(HashOutput, Vec<PackFileEntry>)> {
        if let Some(packfile_cache) = &self.packfile_cache {
            // TODO
            return Ok(packfile_cache.clone());
        }

        let mut packfile = GitRepository::default();

        let user = self.user()?;
        self.gitlab.clone().fetch_releases_for_group(group, user.clone()).await
        let group = self.group()?;

        let token = self.gitlab.fetch_token_for_user(user).await?;

        let config_json = Bytes::from(format!(
            "{{\"dl\": \"{}\"}}",
            self.gitlab.cargo_dl_uri(group, &token)
        ));

        // write config.json to the root of the repo
        packfile.insert(vec![], "config.json".to_string(), config_json)?;

        // fetch the releases for every project within the given group
        let releases_by_crate = self.fetch_releases_by_crate(group).await?;

        let mut buffer = BytesMut::new();

        for ((crate_path, crate_name), releases) in &releases_by_crate {
            for release in releases {
                let checksum = &release.checksum;
                let version = &release.version;

                // parses the `cargo metadata` stored in the release, which
                // should be stored under `metadata.json`.
                let meta = self
                    .fetch_metadata(&crate_path, &checksum, &crate_name, &version)
                    .await?;

                buffer.extend_from_slice(&serde_json::to_vec(&*meta).unwrap());
                buffer.put_u8(b'\n');
            }

            packfile.insert(
                get_crate_folder(&crate_name),
                crate_name.to_string(),
                buffer.split().freeze(),
            )?;
        }

        let packfile = packfile.commit(
            "test".to_string(),
            "test@test.com".to_string(),
            "test".to_string(),
        )?;

        self.packfile_cache = Some(packfile.clone());

        Ok(packfile)
    }
}

@@ -131,41 +283,66 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser
            if user.is_none() {
                user = self
                    .gitlab
                    .find_user_by_ssh_key(&util::format_fingerprint(&fingerprint)?)
                    .find_user_by_ssh_key(&util::format_fingerprint(&fingerprint))
                    .await?;
            }

            self.user = Some(user.ok_or(anyhow::anyhow!("failed to find user"))?);

            self.finished_auth(Auth::Accept).await
            if let Some(user) = user {
                self.user = Some(user);
                self.finished_auth(Auth::Accept).await
            } else {
                self.finished_auth(Auth::Reject).await
            }
        })
    }

    fn data(mut self, channel: ChannelId, data: &[u8], mut session: Session) -> Self::FutureUnit {
        self.input_bytes.extend_from_slice(data);

        Box::pin(
            async move {
                while let Some(frame) = self.codec.decode(&mut self.input_bytes)? {
                    // if the client flushed without giving us a command, we're expected to close
                    // the connection or else the client will just hang
                    if frame.command.is_empty() {
                        session.exit_status_request(channel, 0);
                        session.eof(channel);
                        session.close(channel);
                        return Ok((self, session));
                    }

                    let user = self.user()?;
                    let group = self.group()?;

                    // start building the packfile we're going to send to the user
                    let mut packfile = GitRepository::default();
        Box::pin(async move {
            // start building the packfile we're going to send to the user
            let (commit_hash, packfile_entries) = self.build_packfile().await?;

            while let Some(frame) = self.codec.decode(&mut self.input_bytes)? {
                // if the client flushed without giving us a command, we're expected to close
                // the connection or else the client will just hang
                if frame.command.is_empty() {
                    session.exit_status_request(channel, 0);
                    session.eof(channel);
                    session.close(channel);
                    return Ok((self, session));
                }

                Ok((self, session))
                match frame.command.as_ref() {
                    b"command=ls-refs" => {
                        git_command_handlers::ls_refs::handle(
                            &mut self,
                            &mut session,
                            channel,
                            frame.metadata,
                            &commit_hash,
                        )?;
                    }
                    b"command=fetch" => {
                        git_command_handlers::fetch::handle(
                            &mut self,
                            &mut session,
                            channel,
                            frame.metadata,
                            packfile_entries.clone(),
                        )?;
                    }
                    v => {
                        error!(
                            "Client sent unknown command, ignoring command {}",
                            std::str::from_utf8(v).unwrap_or("invalid utf8")
                        );
                    }
                }
            }
        )

            Ok((self, session))
        })
    }

    fn env_request(
@@ -265,3 +442,20 @@ impl<'a, U: UserProvider + PackageProvider + Send + Sync + 'static> thrussh::ser
        })
    }
}

#[derive(Hash, Debug, PartialEq, Eq)]
struct MetadataCacheKey<'a> {
    checksum: Cow<'a, str>,
    crate_name: Cow<'a, str>,
    crate_version: Cow<'a, str>,
}

impl MetadataCacheKey<'_> {
    pub fn into_owned(self) -> MetadataCacheKey<'static> {
        MetadataCacheKey {
            checksum: self.checksum.into_owned().into(),
            crate_name: self.crate_name.into_owned().into(),
            crate_version: self.crate_version.into_owned().into(),
        }
    }
}
diff --git a/src/metadata.rs b/src/metadata.rs
new file mode 100644
index 0000000..2a75669
--- /dev/null
+++ b/src/metadata.rs
@@ -0,0 +1,68 @@
use cargo_metadata::{Package};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Transforms metadata from `cargo metadata` to the standard one-line JSON used in cargo registries.
///
/// https://github.com/rust-lang/cargo/blob/3bc0e6d83f7f5da0161ce445f8864b0b639776a9/src/cargo/ops/registry.rs#L183
pub fn transform(
    metadata: cargo_metadata::Metadata,
    crate_name: &str,
    cksum: String,
) -> Option<CargoIndexCrateMetadata> {
    let package: Package = metadata
        .packages
        .into_iter()
        .find(|v| v.name == crate_name)?;

    Some(CargoIndexCrateMetadata {
        name: package.name,
        vers: package.version.to_string(),
        deps: package
            .dependencies
            .into_iter()
            .map(|v| CargoIndexCrateMetadataDependency {
                name: v.name,
                req: v.req.to_string(),
                features: v.features,
                optional: v.optional,
                default_features: v.uses_default_features,
                target: v.target.map(|v| v.to_string()),
                kind: v.kind.to_string(),
                registry: Some(
                    v.registry
                        .unwrap_or("https://github.com/rust-lang/crates.io-index.git".to_string()),
                ),
                package: v.rename,
            })
            .collect(),
        cksum,
        features: package.features,
        yanked: false,
        links: package.links,
    })
}

#[derive(Serialize, Deserialize, Debug)]
pub struct CargoIndexCrateMetadata {
    name: String,
    vers: String,
    deps: Vec<CargoIndexCrateMetadataDependency>,
    cksum: String,
    features: HashMap<String, Vec<String>>,
    yanked: bool,
    links: Option<String>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct CargoIndexCrateMetadataDependency {
    name: String,
    req: String,
    features: Vec<String>,
    optional: bool,
    default_features: bool,
    target: Option<String>,
    kind: String,
    registry: Option<String>,
    package: Option<String>,
}
diff --git a/src/protocol/high_level.rs b/src/protocol/high_level.rs
index b560036..3f699df 100644
--- a/src/protocol/high_level.rs
+++ b/src/protocol/high_level.rs
@@ -6,7 +6,7 @@
//! for our purposes because `cargo` will `git pull --force` from our Git
//! server, allowing us to ignore any history the client may have.

use arrayvec::ArrayVec;
use bytes::Bytes;
use indexmap::IndexMap;

use super::low_level::{
@@ -18,25 +18,25 @@ use super::low_level::{
/// Builds a whole packfile containing files, directories and commits - essentially
/// building out a full Git repository in memory.
#[derive(Default, Debug)]
pub struct GitRepository<'a> {
pub struct GitRepository {
    /// A map containing all the blobs and their corresponding hashes so they're
    /// not inserted more than once for any files in the whole tree with the same
    /// content.
    packfile_entries: IndexMap<HashOutput, PackFileEntry<'a>>,
    packfile_entries: IndexMap<HashOutput, PackFileEntry>,
    /// An in-progress `Tree` currently being built out, the tree refers to items
    /// in `file_entries` by hash.
    tree: Tree<'a>,
    tree: Tree,
}

impl<'a> GitRepository<'a> {
impl GitRepository {
    /// Inserts a file into the repository, writing a file to the path
    /// `path/to/my-file` would require a `path` of `["path", "to"]`
    /// and a `file` of `"my-file"`.
    pub fn insert<const N: usize>(
    pub fn insert(
        &mut self,
        path: ArrayVec<&'a str, N>,
        file: &'a str,
        content: &'a [u8],
        path: Vec<String>,
        file: String,
        content: Bytes,
    ) -> Result<(), anyhow::Error> {
        // we'll initialise the directory to the root of the tree, this means
        // if a path isn't specified we'll just write it to the root directory
@@ -79,13 +79,15 @@ impl<'a> GitRepository<'a> {
    /// all the files currently in the `tree`, returning all the packfile entries
    /// and also the commit hash so it can be referred to by `ls-ref`s.
    pub fn commit(
        &'a mut self,
        name: &'static str,
        email: &'static str,
        message: &'static str,
    ) -> Result<(HashOutput, Vec<PackFileEntry<'a>>), anyhow::Error> {
        mut self,
        name: String,
        email: String,
        message: String,
    ) -> Result<(HashOutput, Vec<PackFileEntry>), anyhow::Error> {
        // gets the hash of the entire tree from the root
        let tree_hash = self.tree.to_packfile_entries(&mut self.packfile_entries)?;
        let tree_hash = self
            .tree
            .into_packfile_entries(&mut self.packfile_entries)?;

        // build the commit using the given inputs
        let commit_user = CommitUserInfo {
@@ -96,7 +98,7 @@ impl<'a> GitRepository<'a> {

        let commit = PackFileEntry::Commit(Commit {
            tree: tree_hash,
            author: commit_user,
            author: commit_user.clone(),
            committer: commit_user,
            message,
        });
@@ -105,34 +107,33 @@ impl<'a> GitRepository<'a> {
        let commit_hash = commit.hash()?;
        self.packfile_entries.insert(commit_hash, commit);

        // TODO: make PackFileEntry copy and remove this clone
        Ok((
            commit_hash,
            self.packfile_entries.values().cloned().collect(),
            self.packfile_entries.into_iter().map(|(_, v)| v).collect(),
        ))
    }
}

/// An in-progress tree builder, containing file hashes along with their names or nested trees
#[derive(Default, Debug)]
struct Tree<'a>(IndexMap<&'a str, Box<TreeItem<'a>>>);
struct Tree(IndexMap<String, Box<TreeItem>>);

impl<'a> Tree<'a> {
impl Tree {
    /// Recursively writes the the whole tree out to the given `pack_file`,
    /// the tree contains pointers to (hashes of) files contained within a
    /// directory, and pointers to other directories.
    fn to_packfile_entries(
        &self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry<'a>>,
    fn into_packfile_entries(
        self,
        pack_file: &mut IndexMap<HashOutput, PackFileEntry>,
    ) -> Result<HashOutput, anyhow::Error> {
        let mut tree = Vec::with_capacity(self.0.len());

        for (name, item) in &self.0 {
            tree.push(match item.as_ref() {
        for (name, item) in self.0 {
            tree.push(match *item {
                TreeItem::Blob(hash) => LowLevelTreeItem {
                    kind: TreeItemKind::File,
                    name,
                    hash: *hash,
                    hash,
                },
                TreeItem::Tree(tree) => LowLevelTreeItem {
                    kind: TreeItemKind::Directory,
@@ -140,7 +141,7 @@ impl<'a> Tree<'a> {
                    // we're essentially working through our tree from the bottom up,
                    // so we can grab the hash of each directory along the way and
                    // reference it from the parent directory
                    hash: tree.to_packfile_entries(pack_file)?,
                    hash: tree.into_packfile_entries(pack_file)?,
                },
            });
        }
@@ -157,9 +158,9 @@ impl<'a> Tree<'a> {

/// An item within a `Tree`, this could be a file blob or another directory.
#[derive(Debug)]
enum TreeItem<'a> {
enum TreeItem {
    /// Refers to a file by hash
    Blob(HashOutput),
    /// Refers to a nested directory
    Tree(Tree<'a>),
    Tree(Tree),
}
diff --git a/src/protocol/low_level.rs b/src/protocol/low_level.rs
index 512e98c..8e9eb38 100644
--- a/src/protocol/low_level.rs
+++ b/src/protocol/low_level.rs
@@ -1,4 +1,4 @@
use bytes::{BufMut, BytesMut};
use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use std::{convert::TryInto, fmt::Write, io::Write as IoWrite};
@@ -11,13 +11,13 @@ pub type HashOutput = [u8; 20];
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
    entries: Vec<PackFileEntry<'a>>,
pub struct PackFile {
    entries: Vec<PackFileEntry>,
}

impl<'a> PackFile<'a> {
impl PackFile {
    #[must_use]
    pub fn new(entries: Vec<PackFileEntry<'a>>) -> Self {
    pub fn new(entries: Vec<PackFileEntry>) -> Self {
        Self { entries }
    }

@@ -54,17 +54,17 @@ impl<'a> PackFile<'a> {
    }
}

#[derive(Debug, Clone, Copy)]
pub struct Commit<'a> {
#[derive(Debug, Clone)]
pub struct Commit {
    pub tree: HashOutput,
    // pub parent: [u8; 20],
    pub author: CommitUserInfo<'a>,
    pub committer: CommitUserInfo<'a>,
    pub author: CommitUserInfo,
    pub committer: CommitUserInfo,
    // pub gpgsig: &str,
    pub message: &'a str,
    pub message: String,
}

impl Commit<'_> {
impl Commit {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        let mut tree_hex = [0_u8; 20 * 2];
        hex::encode_to_slice(self.tree, &mut tree_hex)?;
@@ -91,14 +91,14 @@ impl Commit<'_> {
    }
}

#[derive(Copy, Clone, Debug)]
pub struct CommitUserInfo<'a> {
    pub name: &'a str,
    pub email: &'a str,
#[derive(Clone, Debug)]
pub struct CommitUserInfo {
    pub name: String,
    pub email: String,
    pub time: time::OffsetDateTime,
}

impl CommitUserInfo<'_> {
impl CommitUserInfo {
    fn encode(&self) -> String {
        // TODO: remove `format!`, `format_args!`?
        format!(
@@ -138,15 +138,15 @@ impl TreeItemKind {
    }
}

#[derive(Debug, Copy, Clone)]
pub struct TreeItem<'a> {
#[derive(Debug, Clone)]
pub struct TreeItem {
    pub kind: TreeItemKind,
    pub name: &'a str,
    pub name: String,
    pub hash: HashOutput,
}

// `[mode] [name]\0[hash]`
impl TreeItem<'_> {
impl TreeItem {
    fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
        out.write_str(self.kind.mode())?;
        write!(out, " {}\0", self.name)?;
@@ -161,7 +161,7 @@ impl TreeItem<'_> {
}

#[derive(Debug, Clone)] // could be copy but Vec<TreeItem<'a>>
pub enum PackFileEntry<'a> {
pub enum PackFileEntry {
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
@@ -185,20 +185,20 @@ pub enum PackFileEntry<'a> {
    // -----END PGP SIGNATURE-----
    //
    // test
    Commit(Commit<'a>),
    Commit(Commit),
    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
    Tree(Vec<TreeItem<'a>>),
    Tree(Vec<TreeItem>),
    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
    // blob 23try and find me in .git
    Blob(&'a [u8]),
    Blob(Bytes),
    // Tag,
    // OfsDelta,
    // RefDelta,
}

impl PackFileEntry<'_> {
impl PackFileEntry {
    fn write_header(&self, buf: &mut BytesMut) {
        let mut size = self.uncompressed_size();

diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs
index ba28159..5e11618 100644
--- a/src/protocol/mod.rs
+++ b/src/protocol/mod.rs
@@ -1,4 +1,4 @@
pub mod codec;
pub mod high_level;
pub mod low_level;
pub mod codec;
pub mod packet_line;
diff --git a/src/protocol/packet_line.rs b/src/protocol/packet_line.rs
index e469001..81f98ee 100644
--- a/src/protocol/packet_line.rs
+++ b/src/protocol/packet_line.rs
@@ -8,7 +8,7 @@ pub enum PktLine<'a> {
    Data(&'a [u8]),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a block of data by appending a byte containing the u8 `1`.
    SidebandData(PackFile<'a>),
    SidebandData(PackFile),
    /// Similar to a data packet, but used during packfile sending to indicate this
    /// packet is a status message by appending a byte containing the u8 `2`.
    SidebandMsg(&'a [u8]),
diff --git a/src/providers/gitlab.rs b/src/providers/gitlab.rs
index 49c6508..3d39ab8 100644
--- a/src/providers/gitlab.rs
+++ b/src/providers/gitlab.rs
@@ -3,9 +3,13 @@ use async_trait::async_trait;
use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::header;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use std::sync::Arc;

const GITLAB_API_ENDPOINT: &str = "http://127.0.0.1:3000";
// const PAT: &str = "glpat-saSjc4srMhxAA-qDp8F8";
const PAT: &str = "X994NFZjTy1ZYbsCwTLK";

pub struct Gitlab {
    client: reqwest::Client,
    base_url: String,
@@ -14,36 +18,18 @@ pub struct Gitlab {
impl Gitlab {
    pub fn new() -> anyhow::Result<Self> {
        let mut headers = header::HeaderMap::new();
        headers.insert(
            "PRIVATE-TOKEN",
            header::HeaderValue::from_static("token"),
        );
        headers.insert("PRIVATE-TOKEN", header::HeaderValue::from_static(PAT));

        Ok(Self {
            client: reqwest::ClientBuilder::new()
                .default_headers(headers)
                .build()?,
            base_url: "https://127.0.0.1/api/v4".to_string(),
            base_url: format!("{}/api/v4", GITLAB_API_ENDPOINT),
        })
    }

    pub async fn get_impersonation_token_for(&self, user: &User) -> anyhow::Result<String> {
        let impersonation_token: GitlabImpersonationTokenResponse = self
            .client
            .get(format!(
                "{}/users/{}/impersonation_tokens",
                self.base_url, user.id
            ))
            .body(format!("name={};scopes=api", env!("CARGO_PKG_NAME")))
            .send()
            .await?
            .json()
            .await?;

        Ok(impersonation_token.token)
    }
}

// TODO: errors are not yet handled, they're returned as {"error": "abc"}
#[async_trait]
impl super::UserProvider for Gitlab {
    async fn find_user_by_username_password_combo(
@@ -51,7 +37,10 @@ impl super::UserProvider for Gitlab {
        username_password: &str,
    ) -> anyhow::Result<Option<User>> {
        let mut splitter = username_password.splitn(2, ':');
        let (username, password) = (splitter.next().unwrap(), splitter.next().unwrap());
        let (username, password) = match (splitter.next(), splitter.next()) {
            (Some(username), Some(password)) => (username, password),
            _ => return Ok(None),
        };

        if username == "gitlab-ci-token" {
            let res: GitlabJobResponse = self
@@ -77,7 +66,8 @@ impl super::UserProvider for Gitlab {
            .client
            .get(format!(
                "{}/keys?fingerprint={}",
                self.base_url, fingerprint
                self.base_url,
                utf8_percent_encode(fingerprint, NON_ALPHANUMERIC)
            ))
            .send()
            .await?
@@ -88,19 +78,38 @@ impl super::UserProvider for Gitlab {
            username: u.username,
        }))
    }

    async fn fetch_token_for_user(&self, user: &User) -> anyhow::Result<String> {
        let impersonation_token: GitlabImpersonationTokenResponse = self
            .client
            .post(format!(
                "{}/users/{}/impersonation_tokens",
                self.base_url, user.id
            ))
            .json(&GitlabImpersonationTokenRequest {
                name: env!("CARGO_PKG_NAME"),
                scopes: vec!["api"],
            })
            .send()
            .await?
            .json()
            .await?;

        Ok(impersonation_token.token)
    }
}

#[async_trait]
impl super::PackageProvider for Gitlab {
    type CratePath = Arc<GitlabCratePath>;

    async fn fetch_releases_for_group(
        self: Arc<Self>,
        group: &str,
        do_as: User,
    ) -> anyhow::Result<Vec<Release>> {
        let impersonation_token = Arc::new(self.get_impersonation_token_for(&do_as).await?);

        do_as: &User,
    ) -> anyhow::Result<Vec<(Self::CratePath, Release)>> {
        let mut next_uri = Some(format!(
            "{}/groups/{}/packages?per_page=100&pagination=keyset&order_by=id&sort=asc&sudo={}",
            "{}/groups/{}/packages?per_page=100&pagination=keyset&sort=asc&sudo={}",
            self.base_url,
            utf8_percent_encode(group, NON_ALPHANUMERIC),
            do_as.id
@@ -123,7 +132,6 @@ impl super::PackageProvider for Gitlab {

            for release in res {
                let this = self.clone();
                let impersonation_token = impersonation_token.clone();

                futures.push(tokio::spawn(async move {
                    let (project, package) = {
@@ -134,7 +142,13 @@ impl super::PackageProvider for Gitlab {
                        }
                    };

                    let package_files: GitlabPackageFilesResponse = this
                    let package_path = Arc::new(GitlabCratePath {
                        project: utf8_percent_encode(project, NON_ALPHANUMERIC).to_string(),
                        package_name: utf8_percent_encode(&release.name, NON_ALPHANUMERIC)
                            .to_string(),
                    });

                    let package_files: Vec<GitlabPackageFilesResponse> = this
                        .client
                        .get(format!(
                            "{}/projects/{}/packages/{}/package_files",
@@ -147,30 +161,95 @@ impl super::PackageProvider for Gitlab {
                        .json()
                        .await?;

                    Ok::<_, anyhow::Error>(Some(Release {
                        uri: format!(
                            "{}/projects/{}/packages/generic/{}/{}/{}?private_token={}",
                            this.base_url,
                            utf8_percent_encode(project, NON_ALPHANUMERIC),
                            utf8_percent_encode(&release.name, NON_ALPHANUMERIC),
                            utf8_percent_encode(&release.version, NON_ALPHANUMERIC),
                            package_files.file_name,
                            impersonation_token,
                        ),
                        name: release.name,
                        version: release.version,
                        checksum: package_files.file_sha256,
                    }))
                    Ok::<_, anyhow::Error>(Some(
                        package_files
                            .into_iter()
                            .filter_map(|package_file| {
                                if package_file.file_name.ends_with(".crate") {
                                    if package_file.file_name
                                        == format!("{}-{}.crate", release.name, release.version)
                                    {
                                        Some((
                                            package_path.clone(),
                                            Release {
                                                name: release.name.clone(),
                                                version: release.version.clone(),
                                                checksum: package_file.file_sha256,
                                            },
                                        ))
                                    } else {
                                        tracing::info!(
                                            "{}/{}/{}/{} should be called {}-{}.crate",
                                            project,
                                            release.name,
                                            release.version,
                                            package_file.file_name,
                                            release.name,
                                            release.version
                                        );
                                        None
                                    }
                                } else {
                                    None
                                }
                            })
                            .collect(),
                    ))
                }))
            }
        }

        futures
        let x: Vec<Vec<_>> = futures
            .err_into()
            .filter_map(|v| async move { v.and_then(|v| v).transpose() })
            .try_collect()
            .await
            .await?;

        Ok(x.into_iter().flatten().collect())
    }

    async fn fetch_metadata_for_release(
        self: Arc<Self>,
        path: &Self::CratePath,
        version: &str,
    ) -> anyhow::Result<cargo_metadata::Metadata> {
        let uri = format!(
            "{}{}?private_token={}",
            self.base_url,
            path.metadata_uri(version),
            PAT,
        );

        Ok(self.client.get(uri).send().await?.json().await?)
    }

    fn cargo_dl_uri(&self, group: &str, token: &str) -> String {
        format!(
            "{}/groups/{group}/packages/generic/{{sha256-checksum}}/{{crate}}-{{version}}.crate?private_token={token}",
            self.base_url
        )
    }
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct GitlabCratePath {
    project: String,
    package_name: String,
}

impl GitlabCratePath {
    pub fn metadata_uri(&self, version: &str) -> String {
        format!(
            "/projects/{}/packages/generic/{}/{version}/metadata.json",
            self.project, self.package_name
        )
    }
}

#[derive(Serialize)]
pub struct GitlabImpersonationTokenRequest {
    name: &'static str,
    scopes: Vec<&'static str>,
}

#[derive(Deserialize)]
diff --git a/src/providers/mod.rs b/src/providers/mod.rs
index 05bb127..ffa2bed 100644
--- a/src/providers/mod.rs
+++ b/src/providers/mod.rs
@@ -11,15 +11,29 @@ pub trait UserProvider {
    ) -> anyhow::Result<Option<User>>;

    async fn find_user_by_ssh_key(&self, fingerprint: &str) -> anyhow::Result<Option<User>>;

    async fn fetch_token_for_user(&self, user: &User) -> anyhow::Result<String>;
}

#[async_trait]
pub trait PackageProvider {
    /// Provider-specific metadata passed between `PackageProvider` methods to
    /// figure out the path of a package.
    type CratePath: std::fmt::Debug + Send + std::hash::Hash + Clone + Eq + PartialEq + Send + Sync;

    async fn fetch_releases_for_group(
        self: Arc<Self>,
        group: &str,
        do_as: User,
    ) -> anyhow::Result<Vec<Release>>;
        do_as: &User,
    ) -> anyhow::Result<Vec<(Self::CratePath, Release)>>;

    async fn fetch_metadata_for_release(
        self: Arc<Self>,
        path: &Self::CratePath,
        version: &str,
    ) -> anyhow::Result<cargo_metadata::Metadata>;

    fn cargo_dl_uri(&self, group: &str, token: &str) -> String;
}

#[derive(Debug, Clone)]
@@ -33,5 +47,4 @@ pub struct Release {
    pub name: String,
    pub version: String,
    pub checksum: String,
    pub uri: String,
}
diff --git a/src/util.rs b/src/util.rs
index b01f71c..f4324a8 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,18 +1,24 @@
/// Retrieves the key fingerprint, encoded in hex and separated in two character chunks
/// with colons.
pub fn format_fingerprint(fingerprint: &str) -> Result<String, thrussh_keys::Error> {
    let raw_hex = hex::encode(
        base64::decode(&fingerprint).map_err(|_| thrussh_keys::Error::CouldNotReadKey)?,
    );
    let mut hex = String::with_capacity(raw_hex.len() + (raw_hex.len() / 2 - 1));
pub fn format_fingerprint(fingerprint: &str) -> String {
    format!("SHA256:{}", fingerprint)
}

    for (i, c) in raw_hex.chars().enumerate() {
        if i != 0 && i % 2 == 0 {
            hex.push(':');
        }
/// Crates with a total of 1, 2 or 3 characters in the same are written out to directories named
/// 1, 2 or 3 respectively as per the cargo spec. Anything else we'll build out a normal tree for
/// using the frist four characters of the crate name, 2 for the first directory and the other 2
/// for the second.
pub fn get_crate_folder(crate_name: &str) -> Vec<String> {
    let mut folders = Vec::new();

        hex.push(c);
    match crate_name.len() {
        0 => {}
        1 => folders.push("1".to_string()),
        2 => folders.push("2".to_string()),
        3 => folders.push("3".to_string()),
        _ => {
            folders.push(crate_name[..2].to_string());
            folders.push(crate_name[2..4].to_string());
        }
    }

    Ok(hex)
    folders
}