Initial commit
Diff
.github/dependabot.yml | 12 +-
.github/workflows/audit.yml | 16 ++-
.github/workflows/audit_cron.yml | 14 ++-
.github/workflows/ci.yml | 62 +++++++-
.gitignore | 3 +-
Cargo.toml | 27 +++-
LICENSE | 13 +-
src/codec.rs | 151 +++++++++++++++++-
src/error.rs | 28 +++-
src/high_level.rs | 180 +++++++++++++++++++++-
src/lib.rs | 30 +++-
src/low_level.rs | 351 ++++++++++++++++++++++++++++++++++++++++-
src/packet_line.rs | 78 +++++++++-
src/util.rs | 59 +++++++-
14 files changed, 1024 insertions(+)
@@ -0,0 +1,12 @@
version: 2
updates:
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "monthly"
@@ -0,0 +1,16 @@
name: Security audit
on:
push:
paths:
- '**/Cargo.toml'
- '**/Cargo.lock'
jobs:
security_audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions-rs/audit-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,14 @@
name: Security audit (cron)
on:
schedule:
- cron: '0 0 * * *'
jobs:
audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/audit-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,62 @@
on: [push, pull_request]
name: CI
jobs:
check:
name: Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: check
test:
name: Test Suite
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: test
fmt:
name: Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
clippy:
name: Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add clippy
- uses: actions-rs/cargo@v1
with:
command: clippy
@@ -0,0 +1,3 @@
/target
/Cargo.lock
.idea/
@@ -0,0 +1,27 @@
[package]
name = "packfile"
authors = ["Jordan Doyle <jordan@doyle.la>"]
description = "A simple library providing utilities to generate Git Packfiles in memory and send them to clients"
version = "0.1.0"
edition = "2021"
license = "WTFPL"
keywords = ["git", "packfile", "in-memory", "protocol"]
categories = ["development-tools"]
exclude = ["/.github"]
[dependencies]
bytes = "1.2"
flate2 = "1.0"
hex = "0.4"
indexmap = "1.9"
itoa = "1.0"
sha1 = "0.10"
thiserror = "1.0"
time = "0.3.15"
tokio-util = { version = "0.7", features = ["codec"], optional = true }
tracing = "0.1"
[features]
default = ["tokio-util"]
@@ -0,0 +1,13 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
@@ -0,0 +1,151 @@
#![allow(clippy::module_name_repetitions)]
use std::ops::RangeInclusive;
use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;
use tracing::instrument;
use crate::{packet_line::PktLine, Error};
const ALLOWED_PACKET_LENGTH: RangeInclusive<usize> = 4..=65520;
pub struct Encoder;
impl codec::Encoder<PktLine<'_>> for Encoder {
type Error = Error;
fn encode(&mut self, item: PktLine<'_>, dst: &mut BytesMut) -> Result<(), Self::Error> {
item.encode_to(dst)?;
Ok(())
}
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct GitCommand {
pub command: Bytes,
pub metadata: Vec<Bytes>,
}
#[derive(Default)]
pub struct GitCodec {
command: GitCommand,
}
impl codec::Decoder for GitCodec {
type Item = GitCommand;
type Error = Error;
#[instrument(skip(self, src), err)]
fn decode(&mut self, src: &mut bytes::BytesMut) -> Result<Option<Self::Item>, Self::Error> {
loop {
if src.len() < 4 {
return Ok(None);
}
let mut length_bytes = [0_u8; 4];
length_bytes.copy_from_slice(&src[..4]);
let length = u16::from_str_radix(
std::str::from_utf8(&length_bytes).map_err(Error::ParseLengthBytes)?,
16,
)
.map_err(Error::ParseLengthAsHex)? as usize;
if length == 0 {
src.advance(4);
return Ok(Some(std::mem::take(&mut self.command)));
} else if length == 1 || length == 2 {
src.advance(4);
continue;
} else if !ALLOWED_PACKET_LENGTH.contains(&length) {
return Err(Error::PacketLengthExceedsSpec(
ALLOWED_PACKET_LENGTH,
length,
));
}
if src.len() < length {
src.reserve(length - src.len());
return Ok(None);
}
let mut data = src.split_to(length).freeze();
data.advance(4);
if data.ends_with(b"\n") {
data.truncate(data.len() - 1);
}
if self.command.command.is_empty() {
self.command.command = data;
} else {
self.command.metadata.push(data);
}
}
}
}
#[cfg(test)]
mod test {
use bytes::{Bytes, BytesMut};
use std::fmt::Write;
use tokio_util::codec::Decoder;
#[test]
fn decode() {
let mut codec = super::GitCodec::default();
let mut bytes = BytesMut::new();
bytes.write_str("0015agent=git/2.32.0").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(res, None);
bytes.write_char('\n').unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(res, None);
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::from_static(b"agent=git/2.32.0"),
metadata: vec![],
})
);
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::new(),
metadata: vec![],
})
);
bytes.write_str("0002").unwrap();
bytes.write_str("0005a").unwrap();
bytes.write_str("0001").unwrap();
bytes.write_str("0005b").unwrap();
bytes.write_str("0000").unwrap();
let res = codec.decode(&mut bytes).unwrap();
assert_eq!(
res,
Some(super::GitCommand {
command: Bytes::from_static(b"a"),
metadata: vec![Bytes::from_static(b"b")],
})
);
}
}
@@ -0,0 +1,28 @@
use std::ops::RangeInclusive;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum Error {
#[error("Failed to write formatted string to buffer: {0}")]
BufferWrite(#[from] std::fmt::Error),
#[error("{0} is not a directory")]
NotDirectory(&'static str),
#[cfg(feature = "tokio-util")]
#[error("Failed to parse utf-8 encoded prefix: {0}")]
ParseLengthBytes(std::str::Utf8Error),
#[cfg(feature = "tokio-util")]
#[error("Failed to parse length from hex string: {0}")]
ParseLengthAsHex(std::num::ParseIntError),
#[error("Failed to write bytes to compress to zlib: {0}")]
CompressWrite(std::io::Error),
#[error("Failed to compress packfile with zlib: {0}")]
Compress(std::io::Error),
#[error("Failed to encode tree hash to hex: {0}")]
EncodeTreeHash(hex::FromHexError),
#[error("Entries in packfile exceeds a u32: {0}")]
EntriesExceedsU32(std::num::TryFromIntError),
#[error("Packet length is not in the range {0:?} as defined by the spec, got {1}")]
PacketLengthExceedsSpec(RangeInclusive<usize>, usize),
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
}
@@ -0,0 +1,180 @@
use bytes::Bytes;
use indexmap::IndexMap;
use tracing::instrument;
use crate::{
low_level::{
Commit, CommitUserInfo, HashOutput, PackFileEntry, TreeItem as LowLevelTreeItem,
TreeItemKind,
},
util::ArcOrCowStr,
Error,
};
#[derive(Default, Debug)]
pub struct GitRepository {
packfile_entries: IndexMap<HashOutput, PackFileEntry>,
tree: Tree,
}
impl GitRepository {
#[instrument(skip(self, file, content), err)]
pub fn insert(
&mut self,
path: &[&'static str],
file: impl Into<ArcOrCowStr>,
content: Bytes,
) -> Result<(), Error> {
let mut directory = &mut self.tree;
for part in path {
let tree_item = directory
.0
.entry((*part).into())
.or_insert_with(|| Box::new(TreeItem::Tree(Tree::default())));
if let TreeItem::Tree(d) = tree_item.as_mut() {
directory = d;
} else {
return Err(Error::NotDirectory(part));
}
}
let entry = PackFileEntry::Blob(content);
let file_hash = entry.hash()?;
directory
.0
.insert(file.into(), Box::new(TreeItem::Blob(file_hash)));
self.packfile_entries.insert(file_hash, entry);
Ok(())
}
#[instrument(skip(self, name, email, message), err)]
pub fn commit(
mut self,
name: &'static str,
email: &'static str,
message: &'static str,
) -> Result<(HashOutput, Vec<PackFileEntry>), Error> {
let tree_hash = self
.tree
.into_packfile_entries(&mut self.packfile_entries)?;
let commit_user = CommitUserInfo {
name,
email,
time: time::OffsetDateTime::now_utc(),
};
let commit = PackFileEntry::Commit(Commit {
tree: tree_hash,
author: commit_user,
committer: commit_user,
message,
});
let commit_hash = commit.hash()?;
self.packfile_entries.insert(commit_hash, commit);
Ok((
commit_hash,
self.packfile_entries.into_iter().map(|(_, v)| v).collect(),
))
}
}
#[derive(Default, Debug)]
struct Tree(IndexMap<ArcOrCowStr, Box<TreeItem>>);
impl Tree {
#[instrument(skip(self, pack_file), err)]
fn into_packfile_entries(
self,
pack_file: &mut IndexMap<HashOutput, PackFileEntry>,
) -> Result<HashOutput, Error> {
let mut tree = Vec::with_capacity(self.0.len());
for (name, item) in self.0 {
tree.push(match *item {
TreeItem::Blob(hash) => LowLevelTreeItem {
kind: TreeItemKind::File,
sort_name: name.to_string(),
name,
hash,
},
TreeItem::Tree(tree) => LowLevelTreeItem {
kind: TreeItemKind::Directory,
sort_name: format!("{}/", name),
name,
hash: tree.into_packfile_entries(pack_file)?,
},
});
}
tree.sort_unstable_by(|a, b| a.sort_name.cmp(&b.sort_name));
let tree = PackFileEntry::Tree(tree);
let hash = tree.hash()?;
pack_file.insert(hash, tree);
Ok(hash)
}
}
#[derive(Debug)]
enum TreeItem {
Blob(HashOutput),
Tree(Tree),
}
@@ -0,0 +1,30 @@
#![deny(clippy::pedantic)]
#[cfg(feature = "tokio-util")]
pub mod codec;
mod error;
pub mod high_level;
pub mod low_level;
mod packet_line;
mod util;
pub use error::Error;
pub use packet_line::PktLine;
@@ -0,0 +1,351 @@
use std::{
convert::TryInto,
fmt::{Display, Formatter, Write},
io::Write as IoWrite,
};
use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use tracing::instrument;
use crate::{util::ArcOrCowStr, Error};
pub type HashOutput = [u8; 20];
pub struct PackFile<'a> {
entries: &'a [PackFileEntry],
}
impl<'a> PackFile<'a> {
#[must_use]
pub fn new(entries: &'a [PackFileEntry]) -> Self {
Self { entries }
}
#[must_use]
pub const fn header_size() -> usize {
"PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
}
#[must_use]
pub const fn footer_size() -> usize {
20
}
#[instrument(skip(self, original_buf), err)]
pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), Error> {
let mut buf = original_buf.split_off(original_buf.len());
buf.reserve(Self::header_size() + Self::footer_size());
buf.extend_from_slice(b"PACK"); buf.put_u32(2); buf.put_u32(
self.entries
.len()
.try_into()
.map_err(Error::EntriesExceedsU32)?,
);
for entry in self.entries {
entry.encode_to(&mut buf)?;
}
buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
original_buf.unsplit(buf);
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Commit {
pub tree: HashOutput,
pub author: CommitUserInfo,
pub committer: CommitUserInfo,
pub message: &'static str,
}
impl Commit {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
let mut tree_hex = [0_u8; 20 * 2];
hex::encode_to_slice(self.tree, &mut tree_hex).map_err(Error::EncodeTreeHash)?;
out.write_str("tree ")?;
out.extend_from_slice(&tree_hex);
out.write_char('\n')?;
writeln!(out, "author {}", self.author)?;
writeln!(out, "committer {}", self.committer)?;
write!(out, "\n{}", self.message)?;
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
let mut len = 0;
len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
len += "author ".len() + self.author.size() + "\n".len();
len += "committer ".len() + self.committer.size() + "\n".len();
len += "\n".len() + self.message.len();
len
}
}
#[derive(Clone, Copy, Debug)]
pub struct CommitUserInfo {
pub name: &'static str,
pub email: &'static str,
pub time: time::OffsetDateTime,
}
impl Display for CommitUserInfo {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} <{}> {} +0000",
self.name,
self.email,
self.time.unix_timestamp()
)
}
}
impl CommitUserInfo {
#[must_use]
pub fn size(&self) -> usize {
let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();
self.name.len()
+ "< ".len()
+ self.email.len()
+ "> ".len()
+ timestamp_len
+ " +0000".len()
}
}
#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
File,
Directory,
}
impl TreeItemKind {
#[must_use]
pub const fn mode(&self) -> &'static str {
match self {
Self::File => "100644",
Self::Directory => "40000",
}
}
}
#[derive(Debug)]
pub struct TreeItem {
pub kind: TreeItemKind,
pub name: ArcOrCowStr,
pub hash: HashOutput,
pub sort_name: String,
}
impl TreeItem {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
out.write_str(self.kind.mode())?;
write!(out, " {}\0", self.name)?;
out.extend_from_slice(&self.hash);
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
}
}
#[derive(Debug)] pub enum PackFileEntry {
Commit(Commit),
Tree(Vec<TreeItem>),
Blob(Bytes),
}
impl PackFileEntry {
#[instrument(skip(self, buf))]
fn write_header(&self, buf: &mut BytesMut) {
let mut size = self.uncompressed_size();
{
let mut val = 0b1000_0000_u8;
val |= match self {
Self::Commit(_) => 0b001,
Self::Tree(_) => 0b010,
Self::Blob(_) => 0b011,
} << 4;
#[allow(clippy::cast_possible_truncation)] {
val |= (size & 0b1111) as u8;
}
size >>= 4;
buf.put_u8(val);
}
while size != 0 {
#[allow(clippy::cast_possible_truncation)] let mut val = (size & 0b111_1111) as u8;
size >>= 7;
if size != 0 {
val |= 1 << 7;
}
buf.put_u8(val);
}
}
#[instrument(skip(self, original_out), err)]
pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), Error> {
self.write_header(original_out);
let mut out = BytesMut::new();
let size = self.uncompressed_size();
original_out.reserve(size);
out.reserve(size);
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(data) => {
out.extend_from_slice(data);
}
}
debug_assert_eq!(out.len(), size);
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
e.write_all(&out).map_err(Error::CompressWrite)?;
let compressed_data = e.finish().map_err(Error::Compress)?;
original_out.extend_from_slice(&compressed_data);
Ok(())
}
#[instrument(skip(self))]
#[must_use]
pub fn uncompressed_size(&self) -> usize {
match self {
Self::Commit(commit) => commit.size(),
Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
Self::Blob(data) => data.len(),
}
}
#[instrument(skip(self), err)]
pub fn hash(&self) -> Result<HashOutput, Error> {
let size = self.uncompressed_size();
let file_prefix = match self {
Self::Commit(_) => "commit",
Self::Tree(_) => "tree",
Self::Blob(_) => "blob",
};
let size_len = itoa::Buffer::new().format(size).len();
let mut out =
BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);
write!(out, "{} {}\0", file_prefix, size)?;
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(blob) => {
out.extend_from_slice(blob);
}
}
Ok(sha1::Sha1::digest(&out).into())
}
}
@@ -0,0 +1,78 @@
use std::fmt::Write;
use bytes::{BufMut, BytesMut};
use tracing::instrument;
use crate::{low_level::PackFile, Error};
pub enum PktLine<'a> {
Data(&'a [u8]),
SidebandData(PackFile<'a>),
SidebandMsg(&'a [u8]),
Flush,
Delimiter,
ResponseEnd,
}
impl PktLine<'_> {
#[instrument(skip(self, buf), err)]
pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), Error> {
match self {
Self::Data(data) => {
write!(buf, "{:04x}", data.len() + 4)?;
buf.extend_from_slice(data);
}
Self::SidebandData(packfile) => {
let mut data_buf = buf.split_off(buf.len());
data_buf.put_u8(1); packfile.encode_to(&mut data_buf)?;
write!(buf, "{:04x}", data_buf.len() + 4)?;
buf.unsplit(data_buf);
}
Self::SidebandMsg(msg) => {
write!(buf, "{:04x}", msg.len() + 4 + 1)?;
buf.put_u8(2); buf.extend_from_slice(msg);
}
Self::Flush => buf.extend_from_slice(b"0000"),
Self::Delimiter => buf.extend_from_slice(b"0001"),
Self::ResponseEnd => buf.extend_from_slice(b"0002"),
}
Ok(())
}
}
impl<'a> From<&'a str> for PktLine<'a> {
fn from(val: &'a str) -> Self {
PktLine::Data(val.as_bytes())
}
}
#[cfg(test)]
mod test {
use bytes::BytesMut;
#[test]
fn test_pkt_line() {
let mut buffer = BytesMut::new();
super::PktLine::Data(b"agent=git/2.32.0\n")
.encode_to(&mut buffer)
.unwrap();
assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n");
}
}
@@ -0,0 +1,59 @@
use std::{
borrow::Cow,
fmt::{Display, Formatter},
ops::Deref,
sync::Arc,
};
#[derive(Debug, Hash, PartialEq, Eq)]
pub enum ArcOrCowStr {
Arc(Arc<str>),
Cow(Cow<'static, str>),
}
impl From<Arc<str>> for ArcOrCowStr {
fn from(v: Arc<str>) -> Self {
Self::Arc(v)
}
}
impl From<Cow<'static, str>> for ArcOrCowStr {
fn from(v: Cow<'static, str>) -> Self {
Self::Cow(v)
}
}
impl From<&'static str> for ArcOrCowStr {
fn from(v: &'static str) -> Self {
Self::Cow(Cow::Borrowed(v))
}
}
impl From<String> for ArcOrCowStr {
fn from(v: String) -> Self {
Self::Cow(Cow::Owned(v))
}
}
impl AsRef<str> for ArcOrCowStr {
fn as_ref(&self) -> &str {
match self {
Self::Arc(v) => v.as_ref(),
Self::Cow(v) => v.as_ref(),
}
}
}
impl Deref for ArcOrCowStr {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl Display for ArcOrCowStr {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&**self, f)
}
}