From 85a1dd96c94028d9ddcb9d129c97b55bed8f79f9 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 22 Oct 2022 14:43:31 +0100 Subject: [PATCH] Initial commit --- .github/dependabot.yml | 12 ++++++++++++ .github/workflows/audit.yml | 16 ++++++++++++++++ .github/workflows/audit_cron.yml | 14 ++++++++++++++ .github/workflows/ci.yml | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .gitignore | 3 +++ Cargo.toml | 27 +++++++++++++++++++++++++++ LICENSE | 13 +++++++++++++ src/codec.rs | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 28 ++++++++++++++++++++++++++++ src/high_level.rs | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 30 ++++++++++++++++++++++++++++++ src/low_level.rs | 351 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/packet_line.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/util.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 14 files changed, 1024 insertions(+) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/audit.yml create mode 100644 .github/workflows/audit_cron.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 src/codec.rs create mode 100644 src/error.rs create mode 100644 src/high_level.rs create mode 100644 src/lib.rs create mode 100644 src/low_level.rs create mode 100644 src/packet_line.rs create mode 100644 src/util.rs diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..0dad033 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 + +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml new file mode 100644 index 0000000..f7302be --- /dev/null +++ b/.github/workflows/audit.yml @@ -0,0 +1,16 @@ +name: Security audit + +on: + push: + paths: + - '**/Cargo.toml' + - '**/Cargo.lock' + +jobs: + security_audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/audit_cron.yml b/.github/workflows/audit_cron.yml new file mode 100644 index 0000000..56de683 --- /dev/null +++ b/.github/workflows/audit_cron.yml @@ -0,0 +1,14 @@ +name: Security audit (cron) + +on: + schedule: + - cron: '0 0 * * *' + +jobs: + audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ead4831 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,62 @@ +on: [push, pull_request] + +name: CI + +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: check + + test: + name: Test Suite + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91b8835 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +/Cargo.lock +.idea/ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..854af54 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "packfile" +authors = ["Jordan Doyle "] +description = "A simple library providing utilities to generate Git Packfiles in memory and send them to clients" +version = "0.1.0" +edition = "2021" +license = "WTFPL" +keywords = ["git", "packfile", "in-memory", "protocol"] +categories = ["development-tools"] +exclude = ["/.github"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytes = "1.2" +flate2 = "1.0" +hex = "0.4" +indexmap = "1.9" +itoa = "1.0" +sha1 = "0.10" +thiserror = "1.0" +time = "0.3.15" +tokio-util = { version = "0.7", features = ["codec"], optional = true } +tracing = "0.1" + +[features] +default = ["tokio-util"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8b1a9d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + +Copyright (C) 2004 Sam Hocevar + +Everyone is permitted to copy and distribute verbatim or modified +copies of this license document, and changing it is allowed as long +as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. diff --git a/src/codec.rs b/src/codec.rs new file mode 100644 index 0000000..f0dd2ed --- /dev/null +++ b/src/codec.rs @@ -0,0 +1,151 @@ +//! A [`tokio_util::codec`] implementation for the [Git wire format]. +//! +//! [Git wire format]: https://git-scm.com/docs/protocol-v2 + +#![allow(clippy::module_name_repetitions)] + +use std::ops::RangeInclusive; + +use bytes::{Buf, Bytes, BytesMut}; +use tokio_util::codec; +use tracing::instrument; + +use crate::{packet_line::PktLine, Error}; + +const ALLOWED_PACKET_LENGTH: RangeInclusive = 4..=65520; + +pub struct Encoder; + +impl codec::Encoder> for Encoder { + type Error = Error; + + fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> { + item.encode_to(dst)?; + Ok(()) + } +} + +#[derive(Debug, Default, PartialEq, Eq)] +pub struct GitCommand { + pub command: Bytes, + pub metadata: Vec, +} + +#[derive(Default)] +pub struct GitCodec { + command: GitCommand, +} + +impl codec::Decoder for GitCodec { + type Item = GitCommand; + type Error = Error; + + #[instrument(skip(self, src), err)] + fn decode(&mut self, src: &mut bytes::BytesMut) -> Result, Self::Error> { + loop { + if src.len() < 4 { + return Ok(None); + } + + let mut length_bytes = [0_u8; 4]; + length_bytes.copy_from_slice(&src[..4]); + let length = u16::from_str_radix( + std::str::from_utf8(&length_bytes).map_err(Error::ParseLengthBytes)?, + 16, + ) + .map_err(Error::ParseLengthAsHex)? as usize; + + if length == 0 { + // flush + src.advance(4); + return Ok(Some(std::mem::take(&mut self.command))); + } else if length == 1 || length == 2 { + src.advance(4); + continue; + } else if !ALLOWED_PACKET_LENGTH.contains(&length) { + return Err(Error::PacketLengthExceedsSpec( + ALLOWED_PACKET_LENGTH, + length, + )); + } + + // not enough bytes in the buffer yet, ask for more + if src.len() < length { + src.reserve(length - src.len()); + return Ok(None); + } + + // length is inclusive of the 4 bytes that makes up itself + let mut data = src.split_to(length).freeze(); + data.advance(4); + + // strip newlines for conformity + if data.ends_with(b"\n") { + data.truncate(data.len() - 1); + } + + if self.command.command.is_empty() { + self.command.command = data; + } else { + self.command.metadata.push(data); + } + } + } +} + +#[cfg(test)] +mod test { + use bytes::{Bytes, BytesMut}; + use std::fmt::Write; + use tokio_util::codec::Decoder; + + #[test] + fn decode() { + let mut codec = super::GitCodec::default(); + + let mut bytes = BytesMut::new(); + + bytes.write_str("0015agent=git/2.32.0").unwrap(); + let res = codec.decode(&mut bytes).unwrap(); + assert_eq!(res, None); + + bytes.write_char('\n').unwrap(); + let res = codec.decode(&mut bytes).unwrap(); + assert_eq!(res, None); + + bytes.write_str("0000").unwrap(); + let res = codec.decode(&mut bytes).unwrap(); + assert_eq!( + res, + Some(super::GitCommand { + command: Bytes::from_static(b"agent=git/2.32.0"), + metadata: vec![], + }) + ); + + bytes.write_str("0000").unwrap(); + let res = codec.decode(&mut bytes).unwrap(); + assert_eq!( + res, + Some(super::GitCommand { + command: Bytes::new(), + metadata: vec![], + }) + ); + + bytes.write_str("0002").unwrap(); + bytes.write_str("0005a").unwrap(); + bytes.write_str("0001").unwrap(); + bytes.write_str("0005b").unwrap(); + bytes.write_str("0000").unwrap(); + + let res = codec.decode(&mut bytes).unwrap(); + assert_eq!( + res, + Some(super::GitCommand { + command: Bytes::from_static(b"a"), + metadata: vec![Bytes::from_static(b"b")], + }) + ); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..10cf135 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,28 @@ +use std::ops::RangeInclusive; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Failed to write formatted string to buffer: {0}")] + BufferWrite(#[from] std::fmt::Error), + #[error("{0} is not a directory")] + NotDirectory(&'static str), + #[cfg(feature = "tokio-util")] + #[error("Failed to parse utf-8 encoded prefix: {0}")] + ParseLengthBytes(std::str::Utf8Error), + #[cfg(feature = "tokio-util")] + #[error("Failed to parse length from hex string: {0}")] + ParseLengthAsHex(std::num::ParseIntError), + #[error("Failed to write bytes to compress to zlib: {0}")] + CompressWrite(std::io::Error), + #[error("Failed to compress packfile with zlib: {0}")] + Compress(std::io::Error), + #[error("Failed to encode tree hash to hex: {0}")] + EncodeTreeHash(hex::FromHexError), + #[error("Entries in packfile exceeds a u32: {0}")] + EntriesExceedsU32(std::num::TryFromIntError), + #[error("Packet length is not in the range {0:?} as defined by the spec, got {1}")] + PacketLengthExceedsSpec(RangeInclusive, usize), + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), +} diff --git a/src/high_level.rs b/src/high_level.rs new file mode 100644 index 0000000..b06a1e6 --- /dev/null +++ b/src/high_level.rs @@ -0,0 +1,180 @@ +//! A high-level interface for building packfiles. Wraps the `low_level` module +//! making a much easier interface for writing files and generating the root +//! commit. +//! +//! The output packfile will only have a single commit in it, which is fine +//! for our purposes because `cargo` will `git pull --force` from our Git +//! server, allowing us to ignore any history the client may have. + +use bytes::Bytes; +use indexmap::IndexMap; +use tracing::instrument; + +use crate::{ + low_level::{ + Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem, + TreeItemKind, + }, + util::ArcOrCowStr, + Error, +}; + +/// The main way of interacting with the high level Packfile builder +/// +/// Builds a whole packfile containing files, directories and commits - essentially +/// building out a full Git repository in memory. +#[derive(Default, Debug)] +pub struct GitRepository { + /// A map containing all the blobs and their corresponding hashes so they're + /// not inserted more than once for any files in the whole tree with the same + /// content. + packfile_entries: IndexMap, + /// An in-progress `Tree` currently being built out, the tree refers to items + /// in `file_entries` by hash. + tree: Tree, +} + +impl GitRepository { + /// Inserts a file into the repository, writing a file to the path + /// `path/to/my-file` would require a `path` of `["path", "to"]` + /// and a `file` of `"my-file"`. + #[instrument(skip(self, file, content), err)] + pub fn insert( + &mut self, + path: &[&'static str], + file: impl Into, + content: Bytes, + ) -> Result<(), Error> { + // we'll initialise the directory to the root of the tree, this means + // if a path isn't specified we'll just write it to the root directory + let mut directory = &mut self.tree; + + // loops through the parts in the path, recursing through the `directory` + // `Tree` until we get to our target directory, creating any missing + // directories along the way. + for part in path { + let tree_item = directory + .0 + .entry((*part).into()) + .or_insert_with(|| Box::new(TreeItem::Tree(Tree::default()))); + + if let TreeItem::Tree(d) = tree_item.as_mut() { + directory = d; + } else { + return Err(Error::NotDirectory(part)); + } + } + + // wrap the file in a Blob so it's ready for writing into the packfile, and also + // allows us to grab the hash of the file for use in the tree + let entry = PackFileEntry::Blob(content); + let file_hash = entry.hash()?; + + // todo: what should we do on overwrite? + directory + .0 + .insert(file.into(), Box::new(TreeItem::Blob(file_hash))); + + self.packfile_entries.insert(file_hash, entry); + + Ok(()) + } + + /// Finalises this `GitRepository` by writing a commit to the `packfile_entries`, + /// all the files currently in the `tree`, returning all the packfile entries + /// and also the commit hash so it can be referred to by `ls-ref`s. + #[instrument(skip(self, name, email, message), err)] + pub fn commit( + mut self, + name: &'static str, + email: &'static str, + message: &'static str, + ) -> Result<(HashOutput, Vec), Error> { + // gets the hash of the entire tree from the root + let tree_hash = self + .tree + .into_packfile_entries(&mut self.packfile_entries)?; + + // build the commit using the given inputs + let commit_user = CommitUserInfo { + name, + email, + time: time::OffsetDateTime::now_utc(), + }; + + let commit = PackFileEntry::Commit(Commit { + tree: tree_hash, + author: commit_user, + committer: commit_user, + message, + }); + + // write the commit out to the packfile_entries + let commit_hash = commit.hash()?; + self.packfile_entries.insert(commit_hash, commit); + + Ok(( + commit_hash, + self.packfile_entries.into_iter().map(|(_, v)| v).collect(), + )) + } +} + +/// An in-progress tree builder, containing file hashes along with their names or nested trees +#[derive(Default, Debug)] +struct Tree(IndexMap>); + +impl Tree { + /// Recursively writes the the whole tree out to the given `pack_file`, + /// the tree contains pointers to (hashes of) files contained within a + /// directory, and pointers to other directories. + #[instrument(skip(self, pack_file), err)] + fn into_packfile_entries( + self, + pack_file: &mut IndexMap, + ) -> Result { + let mut tree = Vec::with_capacity(self.0.len()); + + for (name, item) in self.0 { + tree.push(match *item { + TreeItem::Blob(hash) => LowLevelTreeItem { + kind: TreeItemKind::File, + sort_name: name.to_string(), + name, + hash, + }, + TreeItem::Tree(tree) => LowLevelTreeItem { + kind: TreeItemKind::Directory, + sort_name: format!("{}/", name), + name, + // we're essentially working through our tree from the bottom up, + // so we can grab the hash of each directory along the way and + // reference it from the parent directory + hash: tree.into_packfile_entries(pack_file)?, + }, + }); + } + + // we need to sort our tree alphabetically, otherwise Git will silently + // stop parsing the rest of the tree once it comes across a non-sorted + // tree entry. + tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name)); + + // gets the hash of the tree we've just worked on, and + // pushes it to the packfile + let tree = PackFileEntry::Tree(tree); + let hash = tree.hash()?; + pack_file.insert(hash, tree); + + Ok(hash) + } +} + +/// An item within a `Tree`, this could be a file blob or another directory. +#[derive(Debug)] +enum TreeItem { + /// Refers to a file by hash + Blob(HashOutput), + /// Refers to a nested directory + Tree(Tree), +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..3ec4e32 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,30 @@ +#![deny(clippy::pedantic)] +//! `packfile` is a simple library providing utilities to generate [Git Packfiles] in memory. +//! +//! Usage: +//! +//! ```rust +//! # use packfile::{high_level::GitRepository, low_level::PackFile}; +//! # +//! let mut repo = GitRepository::default(); +//! repo.insert(&["path", "to"], "file.txt", "hello world!".into()).unwrap(); +//! let (_commit_hash, entries) = +//! repo.commit("Linus Torvalds", "torvalds@example.com", "Some commit message").unwrap(); +//! +//! let _packfile = PackFile::new(&entries); +//! // ... packfile can then be encoded within a [`SidebandData`] to send the data to a client +//! ``` +//! +//! [Git Packfiles]: https://git-scm.com/book/en/v2/Git-Internals-Packfiles +//! [`SidebandData`]: crate::codec::Codec::SidebandData + +#[cfg(feature = "tokio-util")] +pub mod codec; +mod error; +pub mod high_level; +pub mod low_level; +mod packet_line; +mod util; + +pub use error::Error; +pub use packet_line::PktLine; diff --git a/src/low_level.rs b/src/low_level.rs new file mode 100644 index 0000000..f9ca68d --- /dev/null +++ b/src/low_level.rs @@ -0,0 +1,351 @@ +//! A low-level Git packfile builder. +//! +//! This implementation requires the caller to push directories to the packfile manually, in the +//! order that Git expects. + +use std::{ + convert::TryInto, + fmt::{Display, Formatter, Write}, + io::Write as IoWrite, +}; + +use bytes::{BufMut, Bytes, BytesMut}; +use flate2::{write::ZlibEncoder, Compression}; +use sha1::Digest; +use tracing::instrument; + +use crate::{util::ArcOrCowStr, Error}; + +pub type HashOutput = [u8; 20]; + +// The packfile itself is a very simple format. There is a header, a +// series of packed objects (each with it's own header and body) and +// then a checksum trailer. The first four bytes is the string 'PACK', +// which is sort of used to make sure you're getting the start of the +// packfile correctly. This is followed by a 4-byte packfile version +// number and then a 4-byte number of entries in that file. +pub struct PackFile<'a> { + entries: &'a [PackFileEntry], +} + +impl<'a> PackFile<'a> { + #[must_use] + pub fn new(entries: &'a [PackFileEntry]) -> Self { + Self { entries } + } + + #[must_use] + pub const fn header_size() -> usize { + "PACK".len() + std::mem::size_of::() + std::mem::size_of::() + } + + #[must_use] + pub const fn footer_size() -> usize { + 20 + } + + #[instrument(skip(self, original_buf), err)] + pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), Error> { + let mut buf = original_buf.split_off(original_buf.len()); + buf.reserve(Self::header_size() + Self::footer_size()); + + // header + buf.extend_from_slice(b"PACK"); // magic header + buf.put_u32(2); // version + buf.put_u32( + self.entries + .len() + .try_into() + .map_err(Error::EntriesExceedsU32)?, + ); // number of entries in the packfile + + // body + for entry in self.entries { + entry.encode_to(&mut buf)?; + } + + // footer + buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); + + original_buf.unsplit(buf); + + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct Commit { + pub tree: HashOutput, + // pub parent: [u8; 20], + pub author: CommitUserInfo, + pub committer: CommitUserInfo, + // pub gpgsig: &str, + pub message: &'static str, +} + +impl Commit { + #[instrument(skip(self, out), err)] + fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> { + let mut tree_hex = [0_u8; 20 * 2]; + hex::encode_to_slice(self.tree, &mut tree_hex).map_err(Error::EncodeTreeHash)?; + + out.write_str("tree ")?; + out.extend_from_slice(&tree_hex); + out.write_char('\n')?; + + writeln!(out, "author {}", self.author)?; + writeln!(out, "committer {}", self.committer)?; + write!(out, "\n{}", self.message)?; + + Ok(()) + } + + #[must_use] + pub fn size(&self) -> usize { + let mut len = 0; + len += "tree ".len() + (self.tree.len() * 2) + "\n".len(); + len += "author ".len() + self.author.size() + "\n".len(); + len += "committer ".len() + self.committer.size() + "\n".len(); + len += "\n".len() + self.message.len(); + len + } +} + +#[derive(Clone, Copy, Debug)] +pub struct CommitUserInfo { + pub name: &'static str, + pub email: &'static str, + pub time: time::OffsetDateTime, +} + +impl Display for CommitUserInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{} <{}> {} +0000", + self.name, + self.email, + self.time.unix_timestamp() + ) + } +} + +impl CommitUserInfo { + #[must_use] + pub fn size(&self) -> usize { + let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len(); + + self.name.len() + + "< ".len() + + self.email.len() + + "> ".len() + + timestamp_len + + " +0000".len() + } +} + +#[derive(Debug, Copy, Clone)] +pub enum TreeItemKind { + File, + Directory, +} + +impl TreeItemKind { + #[must_use] + pub const fn mode(&self) -> &'static str { + match self { + Self::File => "100644", + Self::Directory => "40000", + } + } +} + +#[derive(Debug)] +pub struct TreeItem { + pub kind: TreeItemKind, + pub name: ArcOrCowStr, + pub hash: HashOutput, + pub sort_name: String, +} + +// `[mode] [name]\0[hash]` +impl TreeItem { + #[instrument(skip(self, out), err)] + fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> { + out.write_str(self.kind.mode())?; + write!(out, " {}\0", self.name)?; + out.extend_from_slice(&self.hash); + Ok(()) + } + + #[must_use] + pub fn size(&self) -> usize { + self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len() + } +} + +#[derive(Debug)] // could be copy but Vec> +pub enum PackFileEntry { + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc + // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c + // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 + // author Jordan Doyle 1630244577 +0100 + // committer Jordan Doyle 1630244577 +0100 + // gpgsig -----BEGIN PGP SIGNATURE----- + // + // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt + // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2 + // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ + // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6 + // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX + // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E + // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO + // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G + // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG + // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE + // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf + // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y= + // =fXoH + // -----END PGP SIGNATURE----- + // + // test + Commit(Commit), + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc + // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� + // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� + Tree(Vec), + // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc + // blob 23try and find me in .git + Blob(Bytes), + // Tag, + // OfsDelta, + // RefDelta, +} + +impl PackFileEntry { + #[instrument(skip(self, buf))] + fn write_header(&self, buf: &mut BytesMut) { + let mut size = self.uncompressed_size(); + + // write header + { + let mut val = 0b1000_0000_u8; + + val |= match self { + Self::Commit(_) => 0b001, + Self::Tree(_) => 0b010, + Self::Blob(_) => 0b011, + // Self::Tag => 0b100, + // Self::OfsDelta => 0b110, + // Self::RefDelta => 0b111, + } << 4; + + // pack the 4 LSBs of the size into the header + #[allow(clippy::cast_possible_truncation)] // value is masked + { + val |= (size & 0b1111) as u8; + } + size >>= 4; + + buf.put_u8(val); + } + + // write size bytes + while size != 0 { + // read 7 LSBs from the `size` and push them off for the next iteration + #[allow(clippy::cast_possible_truncation)] // value is masked + let mut val = (size & 0b111_1111) as u8; + size >>= 7; + + if size != 0 { + // MSB set to 1 implies there's more size bytes to come, otherwise + // the data starts after this byte + val |= 1 << 7; + } + + buf.put_u8(val); + } + } + + #[instrument(skip(self, original_out), err)] + pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), Error> { + self.write_header(original_out); // TODO: this needs space reserving for it + + // todo is there a way to stream through the zlibencoder so we don't have to + // have this intermediate bytesmut and vec? + let mut out = BytesMut::new(); + + let size = self.uncompressed_size(); + original_out.reserve(size); + // the data ends up getting compressed but we'll need at least this many bytes + out.reserve(size); + + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(data) => { + out.extend_from_slice(data); + } + } + + debug_assert_eq!(out.len(), size); + + let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); + e.write_all(&out).map_err(Error::CompressWrite)?; + let compressed_data = e.finish().map_err(Error::Compress)?; + + original_out.extend_from_slice(&compressed_data); + + Ok(()) + } + + #[instrument(skip(self))] + #[must_use] + pub fn uncompressed_size(&self) -> usize { + match self { + Self::Commit(commit) => commit.size(), + Self::Tree(items) => items.iter().map(TreeItem::size).sum(), + Self::Blob(data) => data.len(), + } + } + + // wen const generics for RustCrypto? :-( + #[instrument(skip(self), err)] + pub fn hash(&self) -> Result { + let size = self.uncompressed_size(); + + let file_prefix = match self { + Self::Commit(_) => "commit", + Self::Tree(_) => "tree", + Self::Blob(_) => "blob", + }; + + let size_len = itoa::Buffer::new().format(size).len(); + + let mut out = + BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size); + + write!(out, "{} {}\0", file_prefix, size)?; + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(blob) => { + out.extend_from_slice(blob); + } + } + + Ok(sha1::Sha1::digest(&out).into()) + } +} diff --git a/src/packet_line.rs b/src/packet_line.rs new file mode 100644 index 0000000..5556760 --- /dev/null +++ b/src/packet_line.rs @@ -0,0 +1,78 @@ +use std::fmt::Write; + +use bytes::{BufMut, BytesMut}; +use tracing::instrument; + +use crate::{low_level::PackFile, Error}; + +/// A wrapper containing every possible type of message that can be sent to a Git client. +pub enum PktLine<'a> { + /// General data sent to a client, generally a UTF-8 encoded string. + Data(&'a [u8]), + /// Similar to a data packet, but used during packfile sending to indicate this + /// packet is a block of data by appending a byte containing the u8 `1`. + SidebandData(PackFile<'a>), + /// Similar to a data packet, but used during packfile sending to indicate this + /// packet is a status message by appending a byte containing the u8 `2`. + SidebandMsg(&'a [u8]), + /// Indicates the end of a response. + Flush, + /// Separates sections of a response. + Delimiter, + /// Indicates the end of the response, allowing the client to send another request. + ResponseEnd, +} + +impl PktLine<'_> { + #[instrument(skip(self, buf), err)] + pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), Error> { + match self { + Self::Data(data) => { + write!(buf, "{:04x}", data.len() + 4)?; + buf.extend_from_slice(data); + } + Self::SidebandData(packfile) => { + // split the buf off so the cost of counting the bytes to put in the + // data line prefix is just the cost of `unsplit` (an atomic decrement) + let mut data_buf = buf.split_off(buf.len()); + + data_buf.put_u8(1); // sideband, 1 = data + packfile.encode_to(&mut data_buf)?; + + // write into the buf not the data buf so it's at the start of the msg + write!(buf, "{:04x}", data_buf.len() + 4)?; + buf.unsplit(data_buf); + } + Self::SidebandMsg(msg) => { + write!(buf, "{:04x}", msg.len() + 4 + 1)?; + buf.put_u8(2); // sideband, 2 = msg + buf.extend_from_slice(msg); + } + Self::Flush => buf.extend_from_slice(b"0000"), + Self::Delimiter => buf.extend_from_slice(b"0001"), + Self::ResponseEnd => buf.extend_from_slice(b"0002"), + } + + Ok(()) + } +} + +impl<'a> From<&'a str> for PktLine<'a> { + fn from(val: &'a str) -> Self { + PktLine::Data(val.as_bytes()) + } +} + +#[cfg(test)] +mod test { + use bytes::BytesMut; + + #[test] + fn test_pkt_line() { + let mut buffer = BytesMut::new(); + super::PktLine::Data(b"agent=git/2.32.0\n") + .encode_to(&mut buffer) + .unwrap(); + assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n"); + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..7375df2 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,59 @@ +use std::{ + borrow::Cow, + fmt::{Display, Formatter}, + ops::Deref, + sync::Arc, +}; + +#[derive(Debug, Hash, PartialEq, Eq)] +pub enum ArcOrCowStr { + Arc(Arc), + Cow(Cow<'static, str>), +} + +impl From> for ArcOrCowStr { + fn from(v: Arc) -> Self { + Self::Arc(v) + } +} + +impl From> for ArcOrCowStr { + fn from(v: Cow<'static, str>) -> Self { + Self::Cow(v) + } +} + +impl From<&'static str> for ArcOrCowStr { + fn from(v: &'static str) -> Self { + Self::Cow(Cow::Borrowed(v)) + } +} + +impl From for ArcOrCowStr { + fn from(v: String) -> Self { + Self::Cow(Cow::Owned(v)) + } +} + +impl AsRef for ArcOrCowStr { + fn as_ref(&self) -> &str { + match self { + Self::Arc(v) => v.as_ref(), + Self::Cow(v) => v.as_ref(), + } + } +} + +impl Deref for ArcOrCowStr { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl Display for ArcOrCowStr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&**self, f) + } +} -- libgit2 1.7.2