Clean up packfile creation
Diff
Cargo.lock | 25 +++++++++++++++++++++++++
Cargo.toml | 2 ++
src/main.rs | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
src/git/codec.rs | 6 ++++--
src/git/mod.rs | 35 ++++++++++++++++++++++++++++-------
src/git/packfile.rs | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
6 files changed, 332 insertions(+), 262 deletions(-)
@@ -190,6 +190,7 @@
"async-trait",
"axum",
"bytes",
"chrono",
"const-sha1",
"crc",
"env_logger",
@@ -197,6 +198,7 @@
"format-bytes",
"futures",
"hex",
"itoa",
"sha-1",
"thrussh",
"thrussh-keys",
@@ -204,6 +206,19 @@
"tokio-util",
"tower",
"tower-http",
]
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
@@ -1192,6 +1207,16 @@
"libsodium-sys",
"pkg-config",
"vcpkg",
]
[[package]]
name = "time"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
dependencies = [
"libc",
"winapi",
]
[[package]]
@@ -23,6 +23,8 @@
sha-1 = "0.9"
const-sha1 = "0.2"
crc = "2"
chrono = "0.4"
itoa = "0.4"
format-bytes = "0.1"
@@ -1,26 +1,32 @@
#![deny(clippy::pedantic)]
#[allow(clippy::missing_errors_doc)]
pub mod git;
use crate::git::PktLine;
use crate::git::{
codec::{Encoder, GitCodec},
packfile::{Commit, CommitUserInfo, PackFileEntry, TreeItem, TreeItemKind},
PktLine,
};
use bytes::BufMut;
use bytes::BytesMut;
use bytes::{BytesMut};
use futures::future::Future;
use git::codec::Encoder;
use git::codec::GitCodec;
use std::{fmt::Write, pin::Pin, sync::Arc};
use thrussh::server::{Auth, Session};
use thrussh::*;
use thrussh_keys::*;
use thrussh::{
ChannelId, CryptoVec, server::{self, Auth, Session},
};
use thrussh_keys::key;
use tokio_util::codec::{Decoder, Encoder as TokioEncoder};
#[tokio::main]
#[allow(clippy::semicolon_if_nothing_returned)]
async fn main() {
env_logger::init();
let mut config = thrussh::server::Config::default();
config
.keys
.push(thrussh_keys::key::KeyPair::generate_ed25519().unwrap());
.push(key::KeyPair::generate_ed25519().unwrap());
let config = Arc::new(config);
thrussh::server::run(config, "127.0.0.1:2233", Server)
.await
@@ -54,14 +60,16 @@
session.data(
channel,
CryptoVec::from_slice(self.output_bytes.split().as_ref()),
)
);
}
}
type AsyncHandlerFn = Pin<Box<dyn Future<Output = Result<(Handler, Session), <Handler as server::Handler>::Error>> + Send>>;
impl server::Handler for Handler {
type Error = anyhow::Error;
type FutureAuth = futures::future::Ready<Result<(Self, server::Auth), anyhow::Error>>;
type FutureUnit = Pin<Box<dyn Future<Output = Result<(Self, Session), Self::Error>> + Send>>;
type FutureUnit = AsyncHandlerFn;
type FutureBool = futures::future::Ready<Result<(Self, Session, bool), anyhow::Error>>;
fn finished_auth(self, auth: Auth) -> Self::FutureAuth {
@@ -158,45 +166,33 @@
}
let tree_bytes = format_bytes::format_bytes!(
b"100644 test\0{}",
const_sha1::sha1(&const_sha1::ConstBuffer::from_slice(
"blob 33\0testing this is a test cool test!".as_bytes()
))
.bytes()
);
let tree = format_bytes::format_bytes!(
b"tree {}\0{}",
tree_bytes.len().to_string().as_bytes(),
tree_bytes
);
let tree_hash = hex::encode(sha1::Sha1::digest(&tree));
let commit_bytes = format!(
"tree {}
author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
test",
tree_hash
);
let commit = format!("commit {}\0{}", commit_bytes.len(), commit_bytes);
let commit_hash = hex::encode(sha1::Sha1::digest(commit.as_bytes()));
let file = PackFileEntry::Blob(b"this is some text inside my cool test file!");
let tree = PackFileEntry::Tree(vec![TreeItem {
kind: TreeItemKind::File,
name: "test",
hash: file.hash()?,
}]);
let commit_user = CommitUserInfo {
name: "Jordan Doyle",
email: "jordan@doyle.la",
time: chrono::Utc::now(),
};
let commit = PackFileEntry::Commit(Commit {
tree: tree.hash()?,
author: commit_user,
committer: commit_user,
message: "cool commit",
});
use sha1::Digest;
println!(
"commit hash: {} - tree hash: {} - file hash: {}",
commit_hash,
tree_hash,
const_sha1::sha1(&const_sha1::ConstBuffer::from_slice(
"blob 33\0testing this is a test cool test!".as_bytes()
))
hex::encode(&commit.hash()?),
hex::encode(&tree.hash()?),
hex::encode(&file.hash()?),
);
@@ -207,7 +203,11 @@
if ls_refs {
self.write(PktLine::Data(
format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(),
format!(
"{} HEAD symref-target:refs/heads/master\n",
hex::encode(&commit.hash()?)
)
.as_bytes(),
))?;
self.write(PktLine::Flush)?;
self.flush(&mut session, channel);
@@ -224,62 +224,15 @@
if done {
self.write(PktLine::Data(b"packfile\n"))?;
{
let mut buf = BytesMut::new();
buf.put_u8(2);
buf.extend_from_slice(b"Hello from chartered!\n");
self.write(PktLine::Data(buf.as_ref()))?;
self.flush(&mut session, channel);
}
let packfile = git::packfile::PackFile::new(vec![
git::packfile::PackFileEntry::new(
git::packfile::PackFileEntryType::Commit,
commit_bytes.as_bytes(),
)?,
git::packfile::PackFileEntry::new(
git::packfile::PackFileEntryType::Tree,
&tree_bytes,
)?,
git::packfile::PackFileEntry::new(
git::packfile::PackFileEntryType::Blob,
b"testing this is a test cool test!",
)?,
]);
{
let mut buf = BytesMut::new();
buf.put_u8(1);
packfile.encode_to(&mut buf)?;
self.write(PktLine::Data(buf.as_ref()))?;
}
self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?;
self.flush(&mut session, channel);
let packfile = git::packfile::PackFile::new(vec![commit, tree, file]);
self.write(PktLine::SidebandData(packfile))?;
self.write(PktLine::Flush)?;
self.flush(&mut session, channel);
session.exit_status_request(channel, 0);
session.eof(channel);
session.close(channel);
@@ -1,3 +1,5 @@
#![allow(clippy::module_name_repetitions)]
use bytes::{Buf, Bytes, BytesMut};
use tokio_util::codec;
@@ -28,7 +30,7 @@
return Ok(None);
}
let mut length_bytes = [0u8; 4];
let mut length_bytes = [0_u8; 4];
length_bytes.copy_from_slice(&src[..4]);
let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize;
@@ -42,7 +44,7 @@
return self.decode(src);
}
if length > 65520 || length < 4 {
if !(4..=65520).contains(&length) {
return Err(
std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(),
);
@@ -1,11 +1,19 @@
pub mod codec;
pub mod packfile;
use bytes::BytesMut;
use bytes::{BufMut, BytesMut};
use std::fmt::Write;
use self::packfile::PackFile;
pub enum PktLine<'a> {
Data(&'a [u8]),
SidebandData(PackFile<'a>),
SidebandMsg(&'a [u8]),
Flush,
Delimiter,
ResponseEnd,
@@ -16,8 +24,25 @@
match self {
Self::Data(data) => {
write!(buf, "{:04x}", data.len() + 4)?;
buf.extend_from_slice(&data);
buf.extend_from_slice(data);
}
Self::SidebandData(packfile) => {
let mut data_buf = buf.split_off(buf.len());
data_buf.put_u8(1);
packfile.encode_to(&mut data_buf)?;
write!(buf, "{:04x}", data_buf.len() + 4)?;
buf.unsplit(data_buf);
}
Self::SidebandMsg(msg) => {
write!(buf, "{:04x}", msg.len() + 4 + 1)?;
buf.put_u8(2);
buf.extend_from_slice(msg);
}
Self::Flush => buf.extend_from_slice(b"0000"),
Self::Delimiter => buf.extend_from_slice(b"0001"),
Self::ResponseEnd => buf.extend_from_slice(b"0002"),
@@ -26,12 +51,6 @@
Ok(())
}
}
impl<'a> From<&'a str> for PktLine<'a> {
fn from(val: &'a str) -> Self {
@@ -1,138 +1,164 @@
use bytes::{BufMut, BytesMut};
use const_sha1::{sha1, ConstBuffer};
use flate2::{write::ZlibEncoder, Compression};
use sha1::{Digest, Sha1};
use std::convert::TryInto;
use std::io::Write as IoWrite;
pub struct PackFileIndex<'a> {
pub packfile: &'a PackFile,
use sha1::{
digest::{generic_array::GenericArray, FixedOutputDirty},
Digest, Sha1,
};
use std::{convert::TryInto, fmt::Write, io::Write as IoWrite};
pub struct PackFile<'a> {
entries: Vec<PackFileEntry<'a>>,
}
impl<'a> PackFileIndex<'a> {
pub fn encode_to(self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut buf = original_buf.split_off(original_buf.len());
impl<'a> PackFile<'a> {
#[must_use]
pub fn new(entries: Vec<PackFileEntry<'a>>) -> Self {
Self { entries }
}
buf.extend_from_slice(b"\xfftOc");
buf.put_u32(2);
#[must_use]
pub const fn header_size() -> usize {
"PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
}
let mut totals_by_first_byte = [0u32; 256];
for entry in &self.packfile.entries {
totals_by_first_byte[entry.uncompressed_sha1[0] as usize] += 1;
}
#[must_use]
pub const fn footer_size() -> usize {
20
}
let mut cumulative = 0;
for i in 0..256usize {
cumulative += totals_by_first_byte[i];
buf.put_u32(cumulative);
}
pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut buf = original_buf.split_off(original_buf.len());
buf.reserve(Self::header_size() + Self::footer_size());
for entry in &self.packfile.entries {
buf.extend_from_slice(&entry.uncompressed_sha1);
}
buf.extend_from_slice(b"PACK");
buf.put_u32(2);
buf.put_u32(self.entries.len().try_into()?);
for entry in &self.packfile.entries {
buf.put_u32(entry.compressed_crc32);
for entry in &self.entries {
entry.encode_to(&mut buf)?;
}
let mut offset = PackFile::header_size();
for entry in &self.packfile.entries {
offset += entry.compressed_data.len();
let mut offset_be = offset.to_be();
buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
while offset_be != 0 {
let mut val = (offset_be & 0b1111111) as u8;
offset_be >>= 7;
original_buf.unsplit(buf);
if offset_be != 0 {
val |= 1 << 7;
}
Ok(())
}
}
buf.put_u8(val);
}
}
pub struct Commit<'a> {
pub tree: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>,
pub author: CommitUserInfo<'a>,
pub committer: CommitUserInfo<'a>,
pub message: &'a str,
}
buf.extend_from_slice(&self.packfile.hash);
impl Commit<'_> {
fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut tree_hex = [0_u8; 20 * 2];
hex::encode_to_slice(self.tree, &mut tree_hex)?;
let mut hasher = Sha1::new();
hasher.update(&buf);
let result = hasher.finalize();
buf.extend_from_slice(result.as_ref());
out.write_str("tree ")?;
out.extend_from_slice(&tree_hex);
out.write_char('\n')?;
original_buf.unsplit(buf);
writeln!(out, "author {}", self.author.encode())?;
writeln!(out, "committer {}", self.committer.encode())?;
write!(out, "\n{}", self.message)?;
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
let mut len = 0;
len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
len += "author ".len() + self.author.size() + "\n".len();
len += "committer ".len() + self.committer.size() + "\n".len();
len += "\n".len() + self.message.len();
len
}
}
pub struct PackFile {
entries: Vec<PackFileEntry>,
hash: [u8; 20],
#[derive(Copy, Clone, Debug)]
pub struct CommitUserInfo<'a> {
pub name: &'a str,
pub email: &'a str,
pub time: chrono::DateTime<chrono::Utc>,
}
impl PackFile {
pub fn new(mut entries: Vec<PackFileEntry>) -> Self {
entries.sort_unstable_by_key(|v| v.uncompressed_sha1[0]);
let hash_buffer = entries.iter().fold(ConstBuffer::new(), |acc, curr| {
acc.push_slice(&curr.uncompressed_sha1)
});
Self {
entries,
hash: sha1(&hash_buffer).bytes(),
}
impl CommitUserInfo<'_> {
fn encode(&self) -> String {
format!(
"{} <{}> {} +0000",
self.name,
self.email,
self.time.timestamp()
)
}
pub const fn header_size() -> usize {
4 + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
#[must_use]
pub fn size(&self) -> usize {
let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len();
self.name.len()
+ "< ".len()
+ self.email.len()
+ "> ".len()
+ timestamp_len
+ " +0000".len()
}
pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> {
let mut buf = original_buf.split_off(original_buf.len());
}
buf.extend_from_slice(b"PACK");
buf.put_u32(2);
buf.put_u32(self.entries.len().try_into().unwrap());
pub enum TreeItemKind {
File,
Directory,
}
for entry in &self.entries {
entry.encode_to(&mut buf)?;
impl TreeItemKind {
#[must_use]
pub const fn mode(&self) -> &'static str {
match self {
Self::File => "100644",
Self::Directory => "0000",
}
buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
}
}
original_buf.unsplit(buf);
pub struct TreeItem<'a> {
pub kind: TreeItemKind,
pub name: &'a str,
pub hash: GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>,
}
impl TreeItem<'_> {
fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> {
out.write_str(self.kind.mode())?;
write!(out, " {}\0", self.name)?;
out.extend_from_slice(&self.hash);
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
}
}
pub enum PackFileEntryType {
pub enum PackFileEntry<'a> {
@@ -156,73 +182,41 @@
Commit,
Commit(Commit<'a>),
Tree,
Tree(Vec<TreeItem<'a>>),
Blob,
Blob(&'a [u8]),
}
pub struct PackFileEntry {
entry_type: PackFileEntryType,
compressed_data: Vec<u8>,
compressed_crc32: u32,
pub uncompressed_sha1: [u8; 20],
uncompressed_size: usize,
}
impl PackFileEntry {
pub fn new(entry_type: PackFileEntryType, data: &[u8]) -> Result<Self, anyhow::Error> {
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
e.write_all(data)?;
let compressed_data = e.finish()?;
let compressed_crc32 = crc::Crc::<u32>::new(&crc::CRC_32_CKSUM).checksum(&compressed_data);
Ok(Self {
entry_type,
compressed_data,
compressed_crc32,
uncompressed_sha1: sha1(&ConstBuffer::new().push_slice(data)).bytes(),
uncompressed_size: data.len(),
})
}
impl PackFileEntry<'_> {
fn write_header(&self, buf: &mut BytesMut) {
let mut size = self.uncompressed_size;
let mut size = self.uncompressed_size();
{
let mut val = 0b10000000u8;
val |= match self.entry_type {
PackFileEntryType::Commit => 0b001,
PackFileEntryType::Tree => 0b010,
PackFileEntryType::Blob => 0b011,
let mut val = 0b1000_0000_u8;
val |= match self {
Self::Commit(_) => 0b001,
Self::Tree(_) => 0b010,
Self::Blob(_) => 0b011,
} << 4;
val |= (size & 0b1111) as u8;
#[allow(clippy::cast_possible_truncation)]
{
val |= (size & 0b1111) as u8;
}
size >>= 4;
buf.put_u8(val);
@@ -231,7 +225,8 @@
while size != 0 {
let mut val = (size & 0b1111111) as u8;
#[allow(clippy::cast_possible_truncation)]
let mut val = (size & 0b111_1111) as u8;
size >>= 7;
if size != 0 {
@@ -244,10 +239,84 @@
}
}
pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> {
self.write_header(buf);
buf.extend_from_slice(&self.compressed_data);
pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> {
self.write_header(original_out);
let mut out = BytesMut::new();
let size = self.uncompressed_size();
original_out.reserve(size);
out.reserve(size);
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(data) => {
out.extend_from_slice(data);
}
}
debug_assert_eq!(out.len(), size);
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
e.write_all(&out)?;
let compressed_data = e.finish()?;
original_out.extend_from_slice(&compressed_data);
Ok(())
}
#[must_use]
pub fn uncompressed_size(&self) -> usize {
match self {
Self::Commit(commit) => commit.size(),
Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
Self::Blob(data) => data.len(),
}
}
pub fn hash(
&self,
) -> Result<GenericArray<u8, <Sha1 as FixedOutputDirty>::OutputSize>, anyhow::Error> {
let size = self.uncompressed_size();
let file_prefix = match self {
Self::Commit(_) => "commit",
Self::Tree(_) => "tree",
Self::Blob(_) => "blob",
};
let size_len = itoa::Buffer::new().format(size).len();
let mut out =
BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);
write!(out, "{} {}\0", file_prefix, size)?;
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(blob) => {
out.extend_from_slice(blob);
}
}
Ok(sha1::Sha1::digest(&out))
}
}