From 044a8e07f48c404851475f9f0be2329b323435d0 Mon Sep 17 00:00:00 2001 From: jordan Date: Wed, 13 Nov 2024 17:37:57 +0000 Subject: [PATCH] Merge pull request #91 from alexheretic/cache-package-pages Cache package pages--- CHANGELOG.md | 1 + Cargo.lock | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- Cargo.toml | 2 +- src/cache.rs | 1 + src/providers/gitlab.rs | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 5 files changed, 135 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee9f6f8..bc8c731 100644 --- a/CHANGELOG.md +++ a/CHANGELOG.md @@ -11,6 +11,7 @@ - Support crate yanking by creating a `yanked` file on the release. - Add `bust-cache` command, invoked via `ssh [registry] -- bust-cache [project] [crate-name] [crate-version]` to remove eligibility cache (ie. after a crate has been yanked) - Update dependencies, require libsodium at build & runtime. +- Add package page caching. Controlled with config `cache-releases-older-than`. # v0.1.4 diff --git a/Cargo.lock b/Cargo.lock index f5e8402..a52321b 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -352,14 +352,14 @@ "semver", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "cc" -version = "1.1.37" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8" dependencies = [ "jobserver", "libc", @@ -466,9 +466,9 @@ [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6" dependencies = [ "libc", ] @@ -756,8 +756,10 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1465,7 +1467,7 @@ "indexmap", "itoa", "sha1", - "thiserror", + "thiserror 1.0.69", "time", "tokio-util", "tracing", @@ -1588,9 +1590,9 @@ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", @@ -1599,26 +1601,29 @@ "rustc-hash 2.0.0", "rustls", "socket2", - "thiserror", + "thiserror 2.0.3", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring", "rustc-hash 2.0.0", "rustls", + "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.3", "tinyvec", "tracing", + "web-time", ] [[package]] @@ -1691,7 +1696,7 @@ dependencies = [ "getrandom", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1866,6 +1871,9 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -1910,18 +1918,18 @@ [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -2139,14 +2147,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", ] [[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", +] + +[[package]] name = "thiserror-impl" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", @@ -2179,7 +2207,7 @@ "log", "rand", "sha2 0.9.9", - "thiserror", + "thiserror 1.0.69", "thrussh-keys", "thrussh-libsodium", "tokio", @@ -2210,7 +2238,7 @@ "serde", "serde_derive", "sha2 0.9.9", - "thiserror", + "thiserror 1.0.69", "thrussh-libsodium", "tokio", "tokio-stream", @@ -2237,6 +2265,7 @@ checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", + "itoa", "num-conv", "powerfmt", "serde", @@ -2652,6 +2681,16 @@ version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 5f86228..dde1af8 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -33,7 +33,7 @@ smol_str = { version = "0.3", features = ["serde"] } thrussh = "0.35.6" thrussh-keys = "0.22" -time = { version = "0.3", features = ["serde", "parsing"] } +time = { version = "0.3", features = ["serde", "parsing", "formatting"] } tokio = { version = "1.17", features = ["full"] } tokio-util = { version = "0.7", features = ["codec"] } toml = "0.8" diff --git a/src/cache.rs b/src/cache.rs index 5d22f2a..a756b39 100644 --- a/src/cache.rs +++ a/src/cache.rs @@ -38,6 +38,7 @@ pub enum CacheKind { Eligibility = 1, CrateMetadata = 2, + PackagePage = 3, } /// A generic-erased `Cache`. diff --git a/src/providers/gitlab.rs b/src/providers/gitlab.rs index c164636..72ac63d 100644 --- a/src/providers/gitlab.rs +++ a/src/providers/gitlab.rs @@ -1,7 +1,7 @@ // blocks_in_conditions: didn't work with `#[instrument...`` usage #![allow(clippy::module_name_repetitions, clippy::blocks_in_conditions)] use crate::{ - cache::{Cache, ConcreteCache, Yoked}, + cache::{Cache, CacheKind, Cacheable, ConcreteCache, Yoked}, config::{GitlabConfig, MetadataFormat}, providers::{EligibilityCacheKey, Release, User}, }; @@ -18,7 +18,7 @@ use tokio::sync::Semaphore; use tracing::{debug, info_span, instrument, Instrument}; use url::Url; -use yoke::Yoke; +use yoke::{Yoke, Yokeable}; /// Number of `package_files` GETs to do in parallel. const PARALLEL_PACKAGE_FILES_GETS: usize = 32; @@ -30,7 +30,7 @@ metadata_format: MetadataFormat, admin_token: Option, cache: ConcreteCache, - cache_checksums_older_than: Duration, + cache_releases_older_than: Duration, } impl Gitlab { @@ -50,7 +50,7 @@ metadata_format: config.metadata_format, admin_token: config.admin_token.clone(), cache, - cache_checksums_older_than: config.cache_releases_older_than, + cache_releases_older_than: config.cache_releases_older_than, }) } @@ -128,13 +128,13 @@ let release = Some(Release { name: Cow::Owned(release.name.to_string()), - version: Cow::Owned(release.version.clone()), + version: Cow::Owned(release.version.to_string()), checksum: Cow::Owned(package_file.file_sha256), project: Cow::Owned(raw_project.to_string()), yanked, }); - if package_file.created_at + self.cache_checksums_older_than < OffsetDateTime::now_utc() { + if package_file.created_at + self.cache_releases_older_than < OffsetDateTime::now_utc() { self.cache .put(cache_key, &release) .await @@ -286,9 +286,10 @@ ))?; { let mut query = uri.query_pairs_mut(); - query.append_pair("per_page", itoa::Buffer::new().format(100u16)); + query.append_pair("per_page", "100"); query.append_pair("pagination", "keyset"); query.append_pair("sort", "asc"); + query.append_pair("order_by", "created_at"); if do_as.token.is_none() { query.append_pair("sudo", itoa::Buffer::new().format(do_as.id)); } @@ -300,31 +301,53 @@ let futures = FuturesUnordered::new(); while let Some(uri) = next_uri.take() { - let res = handle_error( - self.client - .get(uri) - .user_or_admin_token(do_as, &self.admin_token) - .send_retry_429() - .await?, - ) - .await?; + let items = if let Some(page) = self.cache.get::(uri.as_str()).await? { + let PackagePage { items, next } = page.get(); + next_uri.clone_from(next); + items.clone() + } else { + let res = handle_error( + self.client + .get(uri.clone()) + .user_or_admin_token(do_as, &self.admin_token) + .send_retry_429() + .await?, + ) + .await?; - if let Some(link_header) = res.headers().get(header::LINK) { - let mut link_header = parse_link_header::parse_with_rel(link_header.to_str()?)?; + let mut next = None::; + if let Some(link_header) = res.headers().get(header::LINK) { + let mut link_header = parse_link_header::parse_with_rel(link_header.to_str()?)?; - if let Some(next) = link_header.remove("next") { - next_uri = Some(next.raw_uri.parse()?); + if let Some(next_link) = link_header.remove("next") { + next = Some(next_link.raw_uri.parse()?); + } } - } - let res: Vec<_> = res - .json::>() - .await? - .into_iter() - .filter(|release| release.package_type == "generic") - .collect(); - - for release in res { + let items: Vec<_> = res + .json::>() + .await? + .into_iter() + .filter(|release| release.package_type == "generic") + .collect(); + + let page = PackagePage { items, next }; + + // cache page if all items are older than config `cache_releases_older_than` + // & it is not the last page + if page.next.is_some() + && page.items.iter().all(|item| { + item.created_at + self.cache_releases_older_than < OffsetDateTime::now_utc() + }) + { + self.cache.put(uri.as_str(), &page).await?; + } + + next_uri = page.next; + page.items + }; + + for release in items { let this = Arc::clone(&self); let do_as = Arc::clone(do_as); let fetch_concurrency = &fetch_concurrency; @@ -501,17 +524,19 @@ pub file_sha256: String, } -#[derive(Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct GitlabPackageResponse { pub id: u64, - pub name: String, - pub version: String, - pub package_type: String, + pub name: SmolStr, + pub version: SmolStr, + pub package_type: SmolStr, + #[serde(with = "time::serde::rfc3339")] + pub created_at: time::OffsetDateTime, #[serde(rename = "_links")] pub links: GitlabPackageLinksResponse, } -#[derive(Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct GitlabPackageLinksResponse { web_path: String, } @@ -576,5 +601,20 @@ } return Ok(r); } + } +} + +#[derive(Debug, Serialize, Deserialize, Yokeable)] +pub struct PackagePage { + pub items: Vec, + pub next: Option, +} + +impl Cacheable for PackagePage { + type Key<'b> = &'b str; + const KIND: CacheKind = CacheKind::PackagePage; + + fn format_key(out: &mut Vec, k: Self::Key<'_>) { + out.extend_from_slice(k.as_bytes()); } } -- rgit 0.1.3