Move to extracted packfile crate
Diff
Cargo.lock | 33 ++++++++++++++++++++++++++-------
Cargo.toml | 5 ++---
src/main.rs | 19 +++++++++----------
src/git_command_handlers/fetch.rs | 12 +++++-------
src/git_command_handlers/ls_refs.rs | 6 ++----
src/protocol/codec.rs | 138 --------------------------------------------------------------------------------
src/protocol/high_level.rs | 178 --------------------------------------------------------------------------------
src/protocol/low_level.rs | 339 --------------------------------------------------------------------------------
src/protocol/mod.rs | 4 ----
src/protocol/packet_line.rs | 73 -------------------------------------------------------------------------
10 files changed, 43 insertions(+), 764 deletions(-)
@@ -186,9 +186,9 @@
[[package]]
name = "bytes"
version = "1.1.0"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
[[package]]
name = "camino"
@@ -539,12 +539,12 @@
"cargo-platform",
"cargo_metadata",
"clap",
"flate2",
"futures",
"hex",
"indexmap",
"indoc",
"itoa",
"packfile",
"parking_lot 0.12.1",
"parse_link_header",
"percent-encoding",
@@ -556,7 +556,6 @@
"shlex",
"thrussh",
"thrussh-keys",
"time",
"tokio",
"tokio-util",
"toml",
@@ -906,6 +905,24 @@
version = "6.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
[[package]]
name = "packfile"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eeb3adcc3e5e1d0bf59cd1a1bc0a663497c7f89a4dc1632d8568c15da66dbef"
dependencies = [
"bytes",
"flate2",
"hex",
"indexmap",
"itoa",
"sha1",
"thiserror",
"time",
"tokio-util",
"tracing",
]
[[package]]
name = "parking_lot"
@@ -1499,9 +1516,9 @@
[[package]]
name = "time"
version = "0.3.11"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c"
dependencies = [
"libc",
"num_threads",
@@ -1722,9 +1739,9 @@
[[package]]
name = "ustr"
version = "0.8.1"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbd539d8973e229b9d04f15d36e6a8f8d8f85f946b366f06bb001aaed3fa9dd9"
checksum = "371436099f2980de56dc385b615696d3eabbdac9649a72b85f9d75f68474fa9c"
dependencies = [
"ahash",
"byteorder",
@@ -16,12 +16,12 @@
cargo_metadata = "0.15"
cargo-platform = "0.1"
clap = { version = "3.2", features = ["derive", "cargo"] }
flate2 = "1.0"
futures = "0.3"
hex = "0.4"
itoa = "1.0"
indexmap = "1.9"
indoc = "1.0"
packfile = "0.1"
parse_link_header = "0.3"
parking_lot = "0.12"
percent-encoding = "2.1"
@@ -35,11 +35,10 @@
tracing-subscriber = "0.3"
thrussh = "0.33"
thrussh-keys = "0.21"
time = "0.3"
tokio = { version = "1.17", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec"] }
toml = "0.5"
url = { version = "2.2", features = ["serde"] }
urlencoding = "2.1"
ustr = "0.8"
ustr = "0.9"
uuid = { version = "1.1", features = ["v4"] }
@@ -1,22 +1,15 @@
#![deny(clippy::pedantic)]
#![allow(clippy::missing_errors_doc)]
pub mod config;
pub mod git_command_handlers;
pub mod metadata;
pub mod protocol;
pub mod providers;
pub mod util;
use crate::{
config::Args,
metadata::{CargoConfig, CargoIndexCrateMetadata},
protocol::{
codec::{Encoder, GitCodec},
high_level::GitRepository,
low_level::{HashOutput, PackFileEntry},
packet_line::PktLine,
},
providers::{gitlab::Gitlab, PackageProvider, Release, ReleaseName, User, UserProvider},
util::get_crate_folder,
};
@@ -25,6 +18,12 @@
use clap::Parser;
use futures::Future;
use indexmap::IndexMap;
use packfile::{
codec::{Encoder, GitCodec},
high_level::GitRepository,
low_level::{HashOutput, PackFileEntry},
PktLine,
};
use parking_lot::RwLock;
use std::{
borrow::Cow, collections::HashMap, fmt::Write, net::SocketAddr, net::SocketAddrV6, pin::Pin,
@@ -179,7 +178,7 @@
fn write(&mut self, packet: PktLine<'_>) -> Result<(), anyhow::Error> {
Encoder.encode(packet, &mut self.output_bytes)
Ok(Encoder.encode(packet, &mut self.output_bytes)?)
}
@@ -295,7 +294,7 @@
})?);
packfile.insert(&[], "config.json".into(), config_json)?;
packfile.insert(&[], "config.json", config_json)?;
let releases_by_crate = self.fetch_releases_by_crate().await?;
@@ -326,7 +325,7 @@
packfile.insert(
&get_crate_folder(crate_name),
Arc::clone(crate_name).into(),
Arc::clone(crate_name),
buffer.split().freeze(),
)?;
}
@@ -1,14 +1,12 @@
use bytes::Bytes;
use packfile::{
low_level::{PackFile, PackFileEntry},
PktLine,
};
use thrussh::{server::Session, ChannelId};
use tracing::instrument;
use crate::{
protocol::{
low_level::{PackFile, PackFileEntry},
packet_line::PktLine,
},
Handler, PackageProvider, UserProvider,
};
use crate::{Handler, PackageProvider, UserProvider};
#[instrument(skip(handle, session, channel, metadata, packfile_entries), err)]
pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
@@ -5,13 +5,11 @@
use bytes::Bytes;
use packfile::{low_level::HashOutput, PktLine};
use thrussh::{server::Session, ChannelId};
use tracing::instrument;
use crate::{
protocol::{low_level::HashOutput, packet_line::PktLine},
Handler, PackageProvider, UserProvider,
};
use crate::{Handler, PackageProvider, UserProvider};
#[instrument(skip(handle, session, channel, _metadata, commit_hash), err)]
pub fn handle<U: UserProvider + PackageProvider + Send + Sync + 'static>(
@@ -1,138 +1,0 @@
#![allow(clippy::module_name_repetitions)]
use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;
use tracing::instrument;
use super::packet_line::PktLine;
pub struct Encoder;
impl codec::Encoder<PktLine<'_>> for Encoder {
type Error = anyhow::Error;
fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> {
item.encode_to(dst)?;
Ok(())
}
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct GitCommand {
pub command: Bytes,
pub metadata: Vec<Bytes>,
}
#[derive(Default)]
pub struct GitCodec {
command: GitCommand,
}
impl codec::Decoder for GitCodec {
type Item = GitCommand;
type Error = anyhow::Error;
#[instrument(skip(self, src), err)]
fn decode(&mut self, src: &mut bytes::BytesMut) -> Result<Option<Self::Item>, Self::Error> {
loop {
if src.len() < 4 {
return Ok(None);
}
let mut length_bytes = [0_u8; 4];
length_bytes.copy_from_slice(&src[..4]);
let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize;
if length == 0 {
src.advance(4);
return Ok(Some(std::mem::take(&mut self.command)));
} else if length == 1 || length == 2 {
src.advance(4);
continue;
} else if !(4..=65520).contains(&length) {
return Err(
std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(),
);
}
if src.len() < length {
src.reserve(length - src.len());
return Ok(None);
}
let mut data = src.split_to(length).freeze();
data.advance(4);
if data.ends_with(b"\n") {
data.truncate(data.len() - 1);
}
if self.command.command.is_empty() {
self.command.command = data;
} else {
self.command.metadata.push(data);
}
}
}
}
#[cfg(test)]
mod test {
use bytes::{Bytes, BytesMut};
use std::fmt::Write;
use tokio_util::codec::Decoder;
#[test]
fn decode() {
let mut codec = super::GitCodec::default();
let mut bytes = BytesMut::new();
bytes.write_str("0015agent=git/2.32.0").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(res, None);
bytes.write_char('\n').unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(res, None);
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::from_static(b"agent=git/2.32.0"),
metadata: vec![],
})
);
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::new(),
metadata: vec![],
})
);
bytes.write_str("0002").unwrap();
bytes.write_str("0005a").unwrap();
bytes.write_str("0001").unwrap();
bytes.write_str("0005b").unwrap();
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::from_static(b"a"),
metadata: vec![Bytes::from_static(b"b")],
})
);
}
}
@@ -1,178 +1,0 @@
use crate::instrument;
use crate::util::ArcOrCowStr;
use bytes::Bytes;
use indexmap::IndexMap;
use super::low_level::{
Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, TreeItemKind,
};
#[derive(Default, Debug)]
pub struct GitRepository {
packfile_entries: IndexMap<HashOutput, PackFileEntry>,
tree: Tree,
}
impl GitRepository {
#[instrument(skip(self, file, content), err)]
pub fn insert(
&mut self,
path: &[&'static str],
file: ArcOrCowStr,
content: Bytes,
) -> Result<(), anyhow::Error> {
let mut directory = &mut self.tree;
for part in path {
let tree_item = directory
.0
.entry((*part).into())
.or_insert_with(|| Box::new(TreeItem::Tree(Tree::default())));
if let TreeItem::Tree(d) = tree_item.as_mut() {
directory = d;
} else {
anyhow::bail!("attempted to use a file as a directory");
}
}
let entry = PackFileEntry::Blob(content);
let file_hash = entry.hash()?;
directory
.0
.insert(file, Box::new(TreeItem::Blob(file_hash)));
self.packfile_entries.insert(file_hash, entry);
Ok(())
}
#[instrument(skip(self, name, email, message), err)]
pub fn commit(
mut self,
name: &'static str,
email: &'static str,
message: &'static str,
) -> Result<(HashOutput, Vec<PackFileEntry>), anyhow::Error> {
let tree_hash = self
.tree
.into_packfile_entries(&mut self.packfile_entries)?;
let commit_user = CommitUserInfo {
name,
email,
time: time::OffsetDateTime::now_utc(),
};
let commit = PackFileEntry::Commit(Commit {
tree: tree_hash,
author: commit_user,
committer: commit_user,
message,
});
let commit_hash = commit.hash()?;
self.packfile_entries.insert(commit_hash, commit);
Ok((
commit_hash,
self.packfile_entries.into_iter().map(|(_, v)| v).collect(),
))
}
}
#[derive(Default, Debug)]
struct Tree(IndexMap<ArcOrCowStr, Box<TreeItem>>);
impl Tree {
#[instrument(skip(self, pack_file), err)]
fn into_packfile_entries(
self,
pack_file: &mut IndexMap<HashOutput, PackFileEntry>,
) -> Result<HashOutput, anyhow::Error> {
let mut tree = Vec::with_capacity(self.0.len());
for (name, item) in self.0 {
tree.push(match *item {
TreeItem::Blob(hash) => LowLevelTreeItem {
kind: TreeItemKind::File,
sort_name: name.to_string(),
name,
hash,
},
TreeItem::Tree(tree) => LowLevelTreeItem {
kind: TreeItemKind::Directory,
sort_name: format!("{}/", name),
name,
hash: tree.into_packfile_entries(pack_file)?,
},
});
}
tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name));
let tree = PackFileEntry::Tree(tree);
let hash = tree.hash()?;
pack_file.insert(hash, tree);
Ok(hash)
}
}
#[derive(Debug)]
enum TreeItem {
Blob(HashOutput),
Tree(Tree),
}
@@ -1,339 +1,0 @@
use crate::util::ArcOrCowStr;
use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use std::{
convert::TryInto,
fmt::{Display, Formatter, Write},
io::Write as IoWrite,
};
use tracing::instrument;
pub type HashOutput = [u8; 20];
pub struct PackFile<'a> {
entries: &'a [PackFileEntry],
}
impl<'a> PackFile<'a> {
#[must_use]
pub fn new(entries: &'a [PackFileEntry]) -> Self {
Self { entries }
}
#[must_use]
pub const fn header_size() -> usize {
"PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
}
#[must_use]
pub const fn footer_size() -> usize {
20
}
#[instrument(skip(self, original_buf), err)]
pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut buf = original_buf.split_off(original_buf.len());
buf.reserve(Self::header_size() + Self::footer_size());
buf.extend_from_slice(b"PACK");
buf.put_u32(2);
buf.put_u32(self.entries.len().try_into()?);
for entry in self.entries {
entry.encode_to(&mut buf)?;
}
buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
original_buf.unsplit(buf);
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Commit {
pub tree: HashOutput,
pub author: CommitUserInfo,
pub committer: CommitUserInfo,
pub message: &'static str,
}
impl Commit {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut tree_hex = [0_u8; 20 * 2];
hex::encode_to_slice(self.tree, &mut tree_hex)?;
out.write_str("tree ")?;
out.extend_from_slice(&tree_hex);
out.write_char('\n')?;
writeln!(out, "author {}", self.author)?;
writeln!(out, "committer {}", self.committer)?;
write!(out, "\n{}", self.message)?;
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
let mut len = 0;
len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
len += "author ".len() + self.author.size() + "\n".len();
len += "committer ".len() + self.committer.size() + "\n".len();
len += "\n".len() + self.message.len();
len
}
}
#[derive(Clone, Copy, Debug)]
pub struct CommitUserInfo {
pub name: &'static str,
pub email: &'static str,
pub time: time::OffsetDateTime,
}
impl Display for CommitUserInfo {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} <{}> {} +0000",
self.name,
self.email,
self.time.unix_timestamp()
)
}
}
impl CommitUserInfo {
#[must_use]
pub fn size(&self) -> usize {
let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();
self.name.len()
+ "< ".len()
+ self.email.len()
+ "> ".len()
+ timestamp_len
+ " +0000".len()
}
}
#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
File,
Directory,
}
impl TreeItemKind {
#[must_use]
pub const fn mode(&self) -> &'static str {
match self {
Self::File => "100644",
Self::Directory => "40000",
}
}
}
#[derive(Debug)]
pub struct TreeItem {
pub kind: TreeItemKind,
pub name: ArcOrCowStr,
pub hash: HashOutput,
pub sort_name: String,
}
impl TreeItem {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
out.write_str(self.kind.mode())?;
write!(out, " {}\0", self.name)?;
out.extend_from_slice(&self.hash);
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
}
}
#[derive(Debug)]
pub enum PackFileEntry {
Commit(Commit),
Tree(Vec<TreeItem>),
Blob(Bytes),
}
impl PackFileEntry {
#[instrument(skip(self, buf))]
fn write_header(&self, buf: &mut BytesMut) {
let mut size = self.uncompressed_size();
{
let mut val = 0b1000_0000_u8;
val |= match self {
Self::Commit(_) => 0b001,
Self::Tree(_) => 0b010,
Self::Blob(_) => 0b011,
} << 4;
#[allow(clippy::cast_possible_truncation)]
{
val |= (size & 0b1111) as u8;
}
size >>= 4;
buf.put_u8(val);
}
while size != 0 {
#[allow(clippy::cast_possible_truncation)]
let mut val = (size & 0b111_1111) as u8;
size >>= 7;
if size != 0 {
val |= 1 << 7;
}
buf.put_u8(val);
}
}
#[instrument(skip(self, original_out), err)]
pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
self.write_header(original_out);
let mut out = BytesMut::new();
let size = self.uncompressed_size();
original_out.reserve(size);
out.reserve(size);
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(data) => {
out.extend_from_slice(data);
}
}
debug_assert_eq!(out.len(), size);
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
e.write_all(&out)?;
let compressed_data = e.finish()?;
original_out.extend_from_slice(&compressed_data);
Ok(())
}
#[instrument(skip(self))]
#[must_use]
pub fn uncompressed_size(&self) -> usize {
match self {
Self::Commit(commit) => commit.size(),
Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
Self::Blob(data) => data.len(),
}
}
#[instrument(skip(self), err)]
pub fn hash(&self) -> Result<HashOutput, anyhow::Error> {
let size = self.uncompressed_size();
let file_prefix = match self {
Self::Commit(_) => "commit",
Self::Tree(_) => "tree",
Self::Blob(_) => "blob",
};
let size_len = itoa::Buffer::new().format(size).len();
let mut out =
BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);
write!(out, "{} {}\0", file_prefix, size)?;
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(blob) => {
out.extend_from_slice(blob);
}
}
Ok(sha1::Sha1::digest(&out).into())
}
}
@@ -1,4 +1,0 @@
pub mod codec;
pub mod high_level;
pub mod low_level;
pub mod packet_line;
@@ -1,73 +1,0 @@
use bytes::{BufMut, BytesMut};
use std::fmt::Write;
use tracing::instrument;
use super::low_level::PackFile;
pub enum PktLine<'a> {
Data(&'a [u8]),
SidebandData(PackFile<'a>),
SidebandMsg(&'a [u8]),
Flush,
Delimiter,
ResponseEnd,
}
impl PktLine<'_> {
#[instrument(skip(self, buf), err)]
pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> {
match self {
Self::Data(data) => {
write!(buf, "{:04x}", data.len() + 4)?;
buf.extend_from_slice(data);
}
Self::SidebandData(packfile) => {
let mut data_buf = buf.split_off(buf.len());
data_buf.put_u8(1);
packfile.encode_to(&mut data_buf)?;
write!(buf, "{:04x}", data_buf.len() + 4)?;
buf.unsplit(data_buf);
}
Self::SidebandMsg(msg) => {
write!(buf, "{:04x}", msg.len() + 4 + 1)?;
buf.put_u8(2);
buf.extend_from_slice(msg);
}
Self::Flush => buf.extend_from_slice(b"0000"),
Self::Delimiter => buf.extend_from_slice(b"0001"),
Self::ResponseEnd => buf.extend_from_slice(b"0002"),
}
Ok(())
}
}
impl<'a> From<&'a str> for PktLine<'a> {
fn from(val: &'a str) -> Self {
PktLine::Data(val.as_bytes())
}
}
#[cfg(test)]
mod test {
use bytes::BytesMut;
#[test]
fn test_pkt_line() {
let mut buffer = BytesMut::new();
super::PktLine::Data(b"agent=git/2.32.0\n")
.encode_to(&mut buffer)
.unwrap();
assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n");
}
}