Add file checksum fetch caching
Diff
CHANGELOG.md | 1 +
Cargo.lock | 27 +++++++++++++++++++++++++++
Cargo.toml | 10 +++++-----
config.toml | 9 +++++++++
src/config.rs | 42 ++++++++++++++++++++++++++++++++++++++++++
src/providers/gitlab.rs | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
src/providers/mod.rs | 2 +-
src/providers/gitlab/checksums.rs | 40 ++++++++++++++++++++++++++++++++++++++++
8 files changed, 197 insertions(+), 51 deletions(-)
@@ -6,6 +6,7 @@
- Add info logs for release & metadata fetch latency.
- When fetching all releases handle 429 by backing off.
- Improve fetch error logging.
- Add file checksum fetch caching controlled by `cache-releases-older-than` config.
# v0.1.4
@@ -681,6 +681,7 @@
"clap",
"futures",
"hex",
"humantime-serde",
"indexmap",
"indoc",
"itoa",
@@ -693,6 +694,7 @@
"serde",
"serde_json",
"shlex",
"smol_str",
"thrussh",
"thrussh-keys",
"thrussh-libsodium",
@@ -797,6 +799,22 @@
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "humantime-serde"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57a3db5ea5923d99402c94e9feb261dc5ee9b4efa158b0315f788cf549cc200c"
dependencies = [
"humantime",
"serde",
]
[[package]]
name = "hyper"
version = "0.14.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1588,6 +1606,15 @@
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "smol_str"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
@@ -1,10 +1,8 @@
[package]
name = "gitlab-cargo-shim"
version = "0.1.4"
edition = "2021"
authors = [
"Jordan Doyle <jordan@doyl.ee>"
]
authors = ["Jordan Doyle <jordan@doyl.ee>"]
[dependencies]
anyhow = "1"
@@ -17,6 +15,7 @@
clap = { version = "4", features = ["derive", "cargo", "wrap_help"] }
futures = "0.3"
hex = "0.4"
humantime-serde = "1.1.1"
indexmap = "2"
indoc = "2.0"
itoa = "1.0"
@@ -26,13 +25,14 @@
percent-encoding = "2.3"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
semver = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1"
shlex = "1.1"
smol_str = { version = "0.2.1", features = ["serde"] }
thrussh = "0.34"
thrussh-keys = "0.22"
thrussh-libsodium = "=0.2.1"
time = { version = "0.3", features = ["serde"] }
time = { version = "0.3", features = ["serde", "parsing"] }
tokio = { version = "1.17", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec"] }
toml = "0.5"
@@ -23,3 +23,12 @@
@@ -1,10 +1,9 @@
#![allow(clippy::module_name_repetitions)]
use crate::providers::gitlab::handle_error;
use clap::Parser;
use serde::{de::DeserializeOwned, Deserialize};
use std::{io, net::SocketAddr, path::PathBuf, str::FromStr};
use time::Duration;
use std::{io, net::SocketAddr, path::PathBuf, str::FromStr, time::Duration};
use url::Url;
#[derive(Parser)]
@@ -36,19 +35,23 @@
pub uri: Url,
pub admin_token: Option<String>,
#[serde(default = "GitlabConfig::default_token_expiry")]
pub token_expiry: Duration,
pub token_expiry: time::Duration,
#[serde(default)]
pub ssl_cert: Option<String>,
#[serde(default)]
pub metadata_format: MetadataFormat,
#[serde(default, with = "humantime_serde")]
pub cache_releases_older_than: Option<Duration>,
}
impl GitlabConfig {
#[must_use]
const fn default_token_expiry() -> Duration {
Duration::days(30)
const fn default_token_expiry() -> time::Duration {
time::Duration::days(30)
}
}
@@ -94,4 +97,33 @@
pub fn from_toml_path<T: DeserializeOwned>(path: &str) -> Result<T, std::io::Error> {
let contents = std::fs::read(path)?;
toml::from_slice(&contents).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
#[test]
fn deser_config() {
let conf = r#"
listen-address = "[::]:2222"
state-directory = "/var/lib/gitlab-cargo-shim"
[gitlab]
uri = "http://127.0.0.1:3000"
metadata-format = "json.zst"
cache-releases-older-than = "2 days""#;
let conf: Config = toml::from_str(conf).unwrap();
assert_eq!(
conf.state_directory.to_string_lossy(),
"/var/lib/gitlab-cargo-shim"
);
assert_eq!(conf.listen_address.to_string(), "[::]:2222");
let gitlab = conf.gitlab;
assert_eq!(gitlab.uri.as_str(), "http://127.0.0.1:3000/");
assert_eq!(gitlab.admin_token, None);
assert_eq!(gitlab.token_expiry, GitlabConfig::default_token_expiry());
assert_eq!(gitlab.ssl_cert, None);
assert_eq!(gitlab.metadata_format, MetadataFormat::JsonZst);
assert_eq!(
gitlab.cache_releases_older_than,
Some(Duration::from_secs(2 * 24 * 60 * 60))
);
}
@@ -1,5 +1,6 @@
#![allow(clippy::module_name_repetitions, clippy::blocks_in_conditions)]
mod checksums;
use crate::{
config::{GitlabConfig, MetadataFormat},
@@ -8,12 +9,14 @@
use anyhow::Context;
use async_trait::async_trait;
use backoff::backoff::Backoff;
use checksums::ChecksumCache;
use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::{header, Certificate};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use time::{Duration, OffsetDateTime};
use smol_str::{format_smolstr, SmolStr};
use std::{sync::Arc, time::Duration};
use time::OffsetDateTime;
use tokio::sync::Semaphore;
use tracing::{debug, info_span, instrument, Instrument};
use url::Url;
@@ -24,9 +27,11 @@
pub struct Gitlab {
client: reqwest::Client,
base_url: Url,
token_expiry: Duration,
token_expiry: time::Duration,
metadata_format: MetadataFormat,
admin_token: Option<String>,
checksums: ChecksumCache,
cache_checksums_older_than: Option<Duration>,
}
impl Gitlab {
@@ -45,8 +50,49 @@
token_expiry: config.token_expiry,
metadata_format: config.metadata_format,
admin_token: config.admin_token.clone(),
checksums: <_>::default(),
cache_checksums_older_than: config.cache_releases_older_than,
})
}
async fn fetch_checksum(
&self,
key: checksums::Key,
do_as: &User,
) -> anyhow::Result<Option<Arc<str>>> {
if let Some(chksum) = self.checksums.get(&key) {
return Ok(Some(chksum));
}
let package_files: Vec<GitlabPackageFilesResponse> = handle_error(
self.client
.get(key.fetch_url())
.user_or_admin_token(do_as, &self.admin_token)
.send_retry_429()
.await?,
)
.await?
.json()
.await?;
let Some(file) = package_files
.into_iter()
.find(|package_file| package_file.file_name == key.file_name)
else {
return Ok(None);
};
if let Some(cache_older_than) = self.cache_checksums_older_than {
let cache_max_created = OffsetDateTime::now_utc() - cache_older_than;
if file.created_at < cache_max_created {
self.checksums.set(key, Arc::clone(&file.file_sha256));
}
}
Ok(Some(file.file_sha256))
}
}
#[async_trait]
@@ -219,7 +265,7 @@
let mut splitter = release.links.web_path.splitn(2, "/-/packages/");
match (splitter.next(), splitter.next()) {
(Some(project), Some(package)) => (&project[1..], package),
_ => return Ok(None),
_ => return anyhow::Ok(None),
}
};
@@ -228,41 +274,30 @@
package_name: utf8_percent_encode(&release.name, NON_ALPHANUMERIC)
.to_string(),
});
let key = checksums::Key {
base_url: this.base_url.as_str().into(),
project: project.into(),
package: package.into(),
file_name: format_smolstr!(
"{}-{}.crate",
release.name,
release.version
),
};
let package_files: Vec<GitlabPackageFilesResponse> = handle_error(
this.client
.get(format!(
"{}/projects/{}/packages/{}/package_files",
this.base_url,
utf8_percent_encode(project, NON_ALPHANUMERIC),
utf8_percent_encode(package, NON_ALPHANUMERIC),
))
.user_or_admin_token(&do_as, &this.admin_token)
.send_retry_429()
.await?,
)
.await?
.json()
.await?;
let expected_file_name =
format!("{}-{}.crate", release.name, release.version);
Ok::<_, anyhow::Error>(
package_files
.into_iter()
.find(|package_file| package_file.file_name == expected_file_name)
.map(move |package_file| {
(
Arc::clone(&package_path),
Release {
name: Arc::from(release.name),
version: release.version,
checksum: package_file.file_sha256,
},
)
}),
)
let checksum = this.fetch_checksum(key, &do_as).await?;
Ok(checksum.map(|checksum| {
(
Arc::clone(&package_path),
Release {
name: Arc::from(release.name),
version: release.version,
checksum,
},
)
}))
}
.instrument(info_span!("fetch_package_files")),
);
@@ -357,8 +392,10 @@
#[derive(Deserialize)]
pub struct GitlabPackageFilesResponse {
pub file_name: String,
pub file_sha256: String,
pub file_name: SmolStr,
#[serde(with = "time::serde::rfc3339")]
pub created_at: time::OffsetDateTime,
pub file_sha256: Arc<str>,
}
#[derive(Deserialize)]
@@ -50,5 +50,5 @@
pub struct Release {
pub name: ReleaseName,
pub version: String,
pub checksum: String,
pub checksum: Arc<str>,
}
@@ -1,0 +1,40 @@
use parking_lot::RwLock;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use smol_str::SmolStr;
use std::{collections::HashMap, sync::Arc};
#[derive(Debug, Default)]
pub struct ChecksumCache {
checksums: RwLock<HashMap<Key, Arc<str>>>,
}
impl ChecksumCache {
pub fn get(&self, key: &Key) -> Option<Arc<str>> {
self.checksums.read().get(key).cloned()
}
pub fn set(&self, key: Key, checksum: Arc<str>) {
self.checksums.write().insert(key, checksum);
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Key {
pub base_url: SmolStr,
pub project: SmolStr,
pub package: SmolStr,
pub file_name: SmolStr,
}
impl Key {
pub fn fetch_url(&self) -> String {
format!(
"{}/projects/{}/packages/{}/package_files",
self.base_url,
utf8_percent_encode(self.project.as_str(), NON_ALPHANUMERIC),
utf8_percent_encode(self.package.as_str(), NON_ALPHANUMERIC),
)
}
}