Merge pull request #76 from alexheretic/cache-older-checksums
Cache older checksums
Diff
CHANGELOG.md | 1 +
Cargo.lock | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Cargo.toml | 10 +++++-----
config.toml | 9 +++++++++
src/config.rs | 42 +++++++++++++++++++++++++++++++++++++++++-
src/providers/gitlab.rs | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
src/providers/mod.rs | 2 +-
src/providers/gitlab/checksums.rs | 40 ++++++++++++++++++++++++++++++++++++++++
8 files changed, 250 insertions(+), 113 deletions(-)
@@ -6,6 +6,7 @@
- Add info logs for release & metadata fetch latency.
- When fetching all releases handle 429 by backing off.
- Improve fetch error logging.
- Add file checksum fetch caching controlled by `cache-releases-older-than` config.
# v0.1.4
@@ -32,9 +32,9 @@
[[package]]
name = "ahash"
version = "0.8.8"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
checksum = "d713b3834d76b85304d4d525563c1276e2e30dc97cc67bfb4585a4a29fc2c89f"
dependencies = [
"cfg-if",
"getrandom",
@@ -54,9 +54,9 @@
[[package]]
name = "anstream"
version = "0.6.11"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5"
checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540"
dependencies = [
"anstyle",
"anstyle-parse",
@@ -102,9 +102,9 @@
[[package]]
name = "anyhow"
version = "1.0.79"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
[[package]]
name = "arrayvec"
@@ -242,9 +242,9 @@
[[package]]
name = "bumpalo"
version = "3.14.0"
version = "3.15.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b"
[[package]]
name = "byteorder"
@@ -292,11 +292,10 @@
[[package]]
name = "cc"
version = "1.0.83"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
checksum = "7f9fa1897e4325be0d68d48df6aa1a71ac2ed4d27723887e7754192705350730"
dependencies = [
"jobserver",
"libc",
]
@@ -681,6 +680,7 @@
"clap",
"futures",
"hex",
"humantime-serde",
"indexmap",
"indoc",
"itoa",
@@ -693,6 +693,7 @@
"serde",
"serde_json",
"shlex",
"smol_str",
"thrussh",
"thrussh-keys",
"thrussh-libsodium",
@@ -797,6 +798,22 @@
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "humantime-serde"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57a3db5ea5923d99402c94e9feb261dc5ee9b4efa158b0315f788cf549cc200c"
dependencies = [
"humantime",
"serde",
]
[[package]]
name = "hyper"
version = "0.14.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -891,15 +908,6 @@
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "jobserver"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1370,16 +1378,17 @@
[[package]]
name = "ring"
version = "0.17.7"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [
"cc",
"cfg-if",
"getrandom",
"libc",
"spin",
"untrusted",
"windows-sys 0.48.0",
"windows-sys 0.52.0",
]
[[package]]
@@ -1434,9 +1443,9 @@
[[package]]
name = "ryu"
version = "1.0.16"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
[[package]]
name = "same-file"
@@ -1465,9 +1474,9 @@
[[package]]
name = "semver"
version = "1.0.21"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
dependencies = [
"serde",
]
@@ -1494,9 +1503,9 @@
[[package]]
name = "serde_json"
version = "1.0.113"
version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79"
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
dependencies = [
"itoa",
"ryu",
@@ -1590,13 +1599,22 @@
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "smol_str"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
version = "0.5.5"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871"
dependencies = [
"libc",
"windows-sys 0.48.0",
"windows-sys 0.52.0",
]
[[package]]
@@ -1619,9 +1637,9 @@
[[package]]
name = "syn"
version = "2.0.48"
version = "2.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb"
dependencies = [
"proc-macro2",
"quote",
@@ -1687,9 +1705,9 @@
[[package]]
name = "thread_local"
version = "1.1.7"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
@@ -1974,9 +1992,9 @@
[[package]]
name = "unicode-normalization"
version = "0.1.22"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
dependencies = [
"tinyvec",
]
@@ -2203,7 +2221,7 @@
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
"windows-targets 0.52.3",
]
[[package]]
@@ -2223,17 +2241,17 @@
[[package]]
name = "windows-targets"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
"windows_aarch64_gnullvm 0.52.3",
"windows_aarch64_msvc 0.52.3",
"windows_i686_gnu 0.52.3",
"windows_i686_msvc 0.52.3",
"windows_x86_64_gnu 0.52.3",
"windows_x86_64_gnullvm 0.52.3",
"windows_x86_64_msvc 0.52.3",
]
[[package]]
@@ -2244,9 +2262,9 @@
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6"
[[package]]
name = "windows_aarch64_msvc"
@@ -2256,9 +2274,9 @@
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f"
[[package]]
name = "windows_i686_gnu"
@@ -2268,9 +2286,9 @@
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb"
[[package]]
name = "windows_i686_msvc"
@@ -2280,9 +2298,9 @@
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58"
[[package]]
name = "windows_x86_64_gnu"
@@ -2292,9 +2310,9 @@
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614"
[[package]]
name = "windows_x86_64_gnullvm"
@@ -2304,9 +2322,9 @@
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c"
[[package]]
name = "windows_x86_64_msvc"
@@ -2316,9 +2334,9 @@
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
version = "0.52.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6"
[[package]]
name = "winreg"
@@ -1,10 +1,8 @@
[package]
name = "gitlab-cargo-shim"
version = "0.1.4"
edition = "2021"
authors = [
"Jordan Doyle <jordan@doyl.ee>"
]
authors = ["Jordan Doyle <jordan@doyl.ee>"]
[dependencies]
anyhow = "1"
@@ -17,6 +15,7 @@
clap = { version = "4", features = ["derive", "cargo", "wrap_help"] }
futures = "0.3"
hex = "0.4"
humantime-serde = "1.1.1"
indexmap = "2"
indoc = "2.0"
itoa = "1.0"
@@ -26,13 +25,14 @@
percent-encoding = "2.3"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
semver = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1"
shlex = "1.1"
smol_str = { version = "0.2.1", features = ["serde"] }
thrussh = "0.34"
thrussh-keys = "0.22"
thrussh-libsodium = "=0.2.1"
time = { version = "0.3", features = ["serde"] }
time = { version = "0.3", features = ["serde", "parsing"] }
tokio = { version = "1.17", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec"] }
toml = "0.5"
@@ -23,3 +23,12 @@
@@ -1,10 +1,9 @@
#![allow(clippy::module_name_repetitions)]
use crate::providers::gitlab::handle_error;
use clap::Parser;
use serde::{de::DeserializeOwned, Deserialize};
use std::{io, net::SocketAddr, path::PathBuf, str::FromStr};
use time::Duration;
use std::{io, net::SocketAddr, path::PathBuf, str::FromStr, time::Duration};
use url::Url;
#[derive(Parser)]
@@ -36,19 +35,23 @@
pub uri: Url,
pub admin_token: Option<String>,
#[serde(default = "GitlabConfig::default_token_expiry")]
pub token_expiry: Duration,
pub token_expiry: time::Duration,
#[serde(default)]
pub ssl_cert: Option<String>,
#[serde(default)]
pub metadata_format: MetadataFormat,
#[serde(default, with = "humantime_serde")]
pub cache_releases_older_than: Option<Duration>,
}
impl GitlabConfig {
#[must_use]
const fn default_token_expiry() -> Duration {
Duration::days(30)
const fn default_token_expiry() -> time::Duration {
time::Duration::days(30)
}
}
@@ -94,4 +97,33 @@
pub fn from_toml_path<T: DeserializeOwned>(path: &str) -> Result<T, std::io::Error> {
let contents = std::fs::read(path)?;
toml::from_slice(&contents).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
#[test]
fn deser_config() {
let conf = r#"
listen-address = "[::]:2222"
state-directory = "/var/lib/gitlab-cargo-shim"
[gitlab]
uri = "http://127.0.0.1:3000"
metadata-format = "json.zst"
cache-releases-older-than = "2 days""#;
let conf: Config = toml::from_str(conf).unwrap();
assert_eq!(
conf.state_directory.to_string_lossy(),
"/var/lib/gitlab-cargo-shim"
);
assert_eq!(conf.listen_address.to_string(), "[::]:2222");
let gitlab = conf.gitlab;
assert_eq!(gitlab.uri.as_str(), "http://127.0.0.1:3000/");
assert_eq!(gitlab.admin_token, None);
assert_eq!(gitlab.token_expiry, GitlabConfig::default_token_expiry());
assert_eq!(gitlab.ssl_cert, None);
assert_eq!(gitlab.metadata_format, MetadataFormat::JsonZst);
assert_eq!(
gitlab.cache_releases_older_than,
Some(Duration::from_secs(2 * 24 * 60 * 60))
);
}
@@ -1,5 +1,6 @@
#![allow(clippy::module_name_repetitions, clippy::blocks_in_conditions)]
mod checksums;
use crate::{
config::{GitlabConfig, MetadataFormat},
@@ -8,12 +9,14 @@
use anyhow::Context;
use async_trait::async_trait;
use backoff::backoff::Backoff;
use checksums::ChecksumCache;
use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::{header, Certificate};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use time::{Duration, OffsetDateTime};
use smol_str::{format_smolstr, SmolStr};
use std::{sync::Arc, time::Duration};
use time::OffsetDateTime;
use tokio::sync::Semaphore;
use tracing::{debug, info_span, instrument, Instrument};
use url::Url;
@@ -24,9 +27,11 @@
pub struct Gitlab {
client: reqwest::Client,
base_url: Url,
token_expiry: Duration,
token_expiry: time::Duration,
metadata_format: MetadataFormat,
admin_token: Option<String>,
checksums: ChecksumCache,
cache_checksums_older_than: Option<Duration>,
}
impl Gitlab {
@@ -45,8 +50,49 @@
token_expiry: config.token_expiry,
metadata_format: config.metadata_format,
admin_token: config.admin_token.clone(),
checksums: <_>::default(),
cache_checksums_older_than: config.cache_releases_older_than,
})
}
async fn fetch_checksum(
&self,
key: checksums::Key,
do_as: &User,
) -> anyhow::Result<Option<Arc<str>>> {
if let Some(chksum) = self.checksums.get(&key) {
return Ok(Some(chksum));
}
let package_files: Vec<GitlabPackageFilesResponse> = handle_error(
self.client
.get(key.fetch_url())
.user_or_admin_token(do_as, &self.admin_token)
.send_retry_429()
.await?,
)
.await?
.json()
.await?;
let Some(file) = package_files
.into_iter()
.find(|package_file| package_file.file_name == key.file_name)
else {
return Ok(None);
};
if let Some(cache_older_than) = self.cache_checksums_older_than {
let cache_max_created = OffsetDateTime::now_utc() - cache_older_than;
if file.created_at < cache_max_created {
self.checksums.set(key, Arc::clone(&file.file_sha256));
}
}
Ok(Some(file.file_sha256))
}
}
#[async_trait]
@@ -219,7 +265,7 @@
let mut splitter = release.links.web_path.splitn(2, "/-/packages/");
match (splitter.next(), splitter.next()) {
(Some(project), Some(package)) => (&project[1..], package),
_ => return Ok(None),
_ => return anyhow::Ok(None),
}
};
@@ -228,41 +274,30 @@
package_name: utf8_percent_encode(&release.name, NON_ALPHANUMERIC)
.to_string(),
});
let key = checksums::Key {
base_url: this.base_url.as_str().into(),
project: project.into(),
package: package.into(),
file_name: format_smolstr!(
"{}-{}.crate",
release.name,
release.version
),
};
let package_files: Vec<GitlabPackageFilesResponse> = handle_error(
this.client
.get(format!(
"{}/projects/{}/packages/{}/package_files",
this.base_url,
utf8_percent_encode(project, NON_ALPHANUMERIC),
utf8_percent_encode(package, NON_ALPHANUMERIC),
))
.user_or_admin_token(&do_as, &this.admin_token)
.send_retry_429()
.await?,
)
.await?
.json()
.await?;
let expected_file_name =
format!("{}-{}.crate", release.name, release.version);
Ok::<_, anyhow::Error>(
package_files
.into_iter()
.find(|package_file| package_file.file_name == expected_file_name)
.map(move |package_file| {
(
Arc::clone(&package_path),
Release {
name: Arc::from(release.name),
version: release.version,
checksum: package_file.file_sha256,
},
)
}),
)
let checksum = this.fetch_checksum(key, &do_as).await?;
Ok(checksum.map(|checksum| {
(
Arc::clone(&package_path),
Release {
name: Arc::from(release.name),
version: release.version,
checksum,
},
)
}))
}
.instrument(info_span!("fetch_package_files")),
);
@@ -357,8 +392,10 @@
#[derive(Deserialize)]
pub struct GitlabPackageFilesResponse {
pub file_name: String,
pub file_sha256: String,
pub file_name: SmolStr,
#[serde(with = "time::serde::rfc3339")]
pub created_at: time::OffsetDateTime,
pub file_sha256: Arc<str>,
}
#[derive(Deserialize)]
@@ -50,5 +50,5 @@
pub struct Release {
pub name: ReleaseName,
pub version: String,
pub checksum: String,
pub checksum: Arc<str>,
}
@@ -1,0 +1,40 @@
use parking_lot::RwLock;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use smol_str::SmolStr;
use std::{collections::HashMap, sync::Arc};
#[derive(Debug, Default)]
pub struct ChecksumCache {
checksums: RwLock<HashMap<Key, Arc<str>>>,
}
impl ChecksumCache {
pub fn get(&self, key: &Key) -> Option<Arc<str>> {
self.checksums.read().get(key).cloned()
}
pub fn set(&self, key: Key, checksum: Arc<str>) {
self.checksums.write().insert(key, checksum);
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Key {
pub base_url: SmolStr,
pub project: SmolStr,
pub package: SmolStr,
pub file_name: SmolStr,
}
impl Key {
pub fn fetch_url(&self) -> String {
format!(
"{}/projects/{}/packages/{}/package_files",
self.base_url,
utf8_percent_encode(self.project.as_str(), NON_ALPHANUMERIC),
utf8_percent_encode(self.package.as_str(), NON_ALPHANUMERIC),
)
}
}