From 39329a3fc67f11814a1277f842bf7b07d728bd7d Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Mon, 30 Aug 2021 22:32:56 +0100 Subject: [PATCH] Clean up packfile creation --- Cargo.lock | 25 +++++++++++++++++++++++++ Cargo.toml | 2 ++ src/main.rs | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------- src/git/codec.rs | 6 ++++-- src/git/mod.rs | 35 ++++++++++++++++++++++++++++------- src/git/packfile.rs | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- 6 files changed, 332 insertions(+), 262 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9fd759c..82ecddb 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -190,6 +190,7 @@ "async-trait", "axum", "bytes", + "chrono", "const-sha1", "crc", "env_logger", @@ -197,6 +198,7 @@ "format-bytes", "futures", "hex", + "itoa", "sha-1", "thrussh", "thrussh-keys", @@ -204,6 +206,19 @@ "tokio-util", "tower", "tower-http", +] + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", ] [[package]] @@ -1192,6 +1207,16 @@ "libsodium-sys", "pkg-config", "vcpkg", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b65ea86..c2a9079 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -23,6 +23,8 @@ sha-1 = "0.9" const-sha1 = "0.2" crc = "2" +chrono = "0.4" +itoa = "0.4" format-bytes = "0.1" diff --git a/src/main.rs b/src/main.rs index 06cb08f..31b7a0e 100644 --- a/src/main.rs +++ a/src/main.rs @@ -1,26 +1,32 @@ +#![deny(clippy::pedantic)] +#[allow(clippy::missing_errors_doc)] + pub mod git; -use crate::git::PktLine; +use crate::git::{ + codec::{Encoder, GitCodec}, + packfile::{Commit, CommitUserInfo, PackFileEntry, TreeItem, TreeItemKind}, + PktLine, +}; -use bytes::BufMut; -use bytes::BytesMut; +use bytes::{BytesMut}; use futures::future::Future; -use git::codec::Encoder; -use git::codec::GitCodec; use std::{fmt::Write, pin::Pin, sync::Arc}; -use thrussh::server::{Auth, Session}; -use thrussh::*; -use thrussh_keys::*; +use thrussh::{ + ChannelId, CryptoVec, server::{self, Auth, Session}, +}; +use thrussh_keys::key; use tokio_util::codec::{Decoder, Encoder as TokioEncoder}; #[tokio::main] +#[allow(clippy::semicolon_if_nothing_returned)] // broken clippy lint async fn main() { env_logger::init(); let mut config = thrussh::server::Config::default(); config .keys - .push(thrussh_keys::key::KeyPair::generate_ed25519().unwrap()); + .push(key::KeyPair::generate_ed25519().unwrap()); let config = Arc::new(config); thrussh::server::run(config, "127.0.0.1:2233", Server) .await @@ -54,14 +60,16 @@ session.data( channel, CryptoVec::from_slice(self.output_bytes.split().as_ref()), - ) + ); } } + +type AsyncHandlerFn = Pin::Error>> + Send>>; impl server::Handler for Handler { type Error = anyhow::Error; type FutureAuth = futures::future::Ready>; - type FutureUnit = Pin> + Send>>; + type FutureUnit = AsyncHandlerFn; type FutureBool = futures::future::Ready>; fn finished_auth(self, auth: Auth) -> Self::FutureAuth { @@ -158,45 +166,33 @@ } // echo -ne "0012command=fetch\n0001000ethin-pack\n0010include-tag\n000eofs-delta\n0032want d24d8020163b5fee57c9babfd0c595b8c90ba253\n0009done\n" - // echo -ne - - let tree_bytes = format_bytes::format_bytes!( - b"100644 test\0{}", - const_sha1::sha1(&const_sha1::ConstBuffer::from_slice( - "blob 33\0testing this is a test cool test!".as_bytes() - )) - .bytes() - ); - - let tree = format_bytes::format_bytes!( - b"tree {}\0{}", - tree_bytes.len().to_string().as_bytes(), - tree_bytes - ); - - let tree_hash = hex::encode(sha1::Sha1::digest(&tree)); - - let commit_bytes = format!( - "tree {} -author Jordan Doyle 1630244577 +0100 -committer Jordan Doyle 1630244577 +0100 - -test", - tree_hash - ); - - let commit = format!("commit {}\0{}", commit_bytes.len(), commit_bytes); - let commit_hash = hex::encode(sha1::Sha1::digest(commit.as_bytes())); + let file = PackFileEntry::Blob(b"this is some text inside my cool test file!"); + + let tree = PackFileEntry::Tree(vec![TreeItem { + kind: TreeItemKind::File, + name: "test", + hash: file.hash()?, + }]); + + let commit_user = CommitUserInfo { + name: "Jordan Doyle", + email: "jordan@doyle.la", + time: chrono::Utc::now(), + }; + + let commit = PackFileEntry::Commit(Commit { + tree: tree.hash()?, + author: commit_user, + committer: commit_user, + message: "cool commit", + }); - use sha1::Digest; println!( "commit hash: {} - tree hash: {} - file hash: {}", - commit_hash, - tree_hash, - const_sha1::sha1(&const_sha1::ConstBuffer::from_slice( - "blob 33\0testing this is a test cool test!".as_bytes() - )) + hex::encode(&commit.hash()?), + hex::encode(&tree.hash()?), + hex::encode(&file.hash()?), ); // echo -ne "0014command=ls-refs\n0014agent=git/2.321\n00010008peel000bsymrefs000aunborn0014ref-prefix HEAD\n0000" @@ -207,7 +203,11 @@ // https://shafiul.github.io/gitbook/7_the_packfile.html if ls_refs { self.write(PktLine::Data( - format!("{} HEAD symref-target:refs/heads/master\n", commit_hash).as_bytes(), + format!( + "{} HEAD symref-target:refs/heads/master\n", + hex::encode(&commit.hash()?) + ) + .as_bytes(), ))?; self.write(PktLine::Flush)?; self.flush(&mut session, channel); @@ -224,62 +224,15 @@ if done { self.write(PktLine::Data(b"packfile\n"))?; - - { - let mut buf = BytesMut::new(); - buf.put_u8(2); // sideband, 1 = msg - buf.extend_from_slice(b"Hello from chartered!\n"); - self.write(PktLine::Data(buf.as_ref()))?; - self.flush(&mut session, channel); - } - // fatal: bad object 4ff484817ca2f1a10183da210a6e74f29764857d - // error: ssh://127.0.0.1:2233/ did not send all necessary objects - let packfile = git::packfile::PackFile::new(vec![ - git::packfile::PackFileEntry::new( - git::packfile::PackFileEntryType::Commit, - commit_bytes.as_bytes(), - )?, - git::packfile::PackFileEntry::new( - git::packfile::PackFileEntryType::Tree, - &tree_bytes, - )?, - git::packfile::PackFileEntry::new( - git::packfile::PackFileEntryType::Blob, - b"testing this is a test cool test!", - )?, - ]); - - // { - // let mut buf = BytesMut::new(); - // buf.put_u8(1); - // git::packfile::PackFileIndex { - // packfile: &packfile, - // } - // .encode_to(&mut buf)?; - // self.write(PktLine::Data(buf.as_ref()))?; - // self.write(PktLine::Flush)?; - // } - - { - let mut buf = BytesMut::new(); - buf.put_u8(1); // sideband, 1 = continue - packfile.encode_to(&mut buf)?; - self.write(PktLine::Data(buf.as_ref()))?; - } - - // { - // let mut buf = BytesMut::new(); - // buf.put_u8(2); // sideband, 1 = msg - // buf.extend_from_slice( - // b"Total 3 (delta 0), reused 0 (delta 0), pack-reused 0\n", - // ); - // self.write(PktLine::Data(buf.as_ref()))?; - // self.flush(&mut session, channel); - // } + self.write(PktLine::SidebandMsg(b"Hello from chartered!\n"))?; + self.flush(&mut session, channel); + let packfile = git::packfile::PackFile::new(vec![commit, tree, file]); + self.write(PktLine::SidebandData(packfile))?; self.write(PktLine::Flush)?; self.flush(&mut session, channel); + session.exit_status_request(channel, 0); session.eof(channel); session.close(channel); diff --git a/src/git/codec.rs b/src/git/codec.rs index fc12996..96ce563 100644 --- a/src/git/codec.rs +++ a/src/git/codec.rs @@ -1,3 +1,5 @@ +#![allow(clippy::module_name_repetitions)] + use bytes::{Buf, Bytes, BytesMut}; use tokio_util::codec; @@ -28,7 +30,7 @@ return Ok(None); } - let mut length_bytes = [0u8; 4]; + let mut length_bytes = [0_u8; 4]; length_bytes.copy_from_slice(&src[..4]); let length = u16::from_str_radix(std::str::from_utf8(&length_bytes)?, 16)? as usize; @@ -42,7 +44,7 @@ return self.decode(src); } - if length > 65520 || length < 4 { + if !(4..=65520).contains(&length) { return Err( std::io::Error::new(std::io::ErrorKind::InvalidData, "protocol abuse").into(), ); diff --git a/src/git/mod.rs b/src/git/mod.rs index 6844310..e06d861 100644 --- a/src/git/mod.rs +++ a/src/git/mod.rs @@ -1,11 +1,19 @@ pub mod codec; pub mod packfile; -use bytes::BytesMut; +use bytes::{BufMut, BytesMut}; use std::fmt::Write; +use self::packfile::PackFile; + pub enum PktLine<'a> { Data(&'a [u8]), + /// Similar to a data packet, but used during packfile sending to indicate this + /// packet is a block of data by appending a byte containing the u8 `1`. + SidebandData(PackFile<'a>), + /// Similar to a data packet, but used during packfile sending to indicate this + /// packet is a status message by appending a byte containing the u8 `2`. + SidebandMsg(&'a [u8]), Flush, Delimiter, ResponseEnd, @@ -16,8 +24,25 @@ match self { Self::Data(data) => { write!(buf, "{:04x}", data.len() + 4)?; - buf.extend_from_slice(&data); + buf.extend_from_slice(data); + } + Self::SidebandData(packfile) => { + // split the buf off so the cost of counting the bytes to put in the + // data line prefix is just the cost of `unsplit` (an atomic decrement) + let mut data_buf = buf.split_off(buf.len()); + + data_buf.put_u8(1); // sideband, 1 = data + packfile.encode_to(&mut data_buf)?; + + // write into the buf not the data buf so it's at the start of the msg + write!(buf, "{:04x}", data_buf.len() + 4)?; + buf.unsplit(data_buf); } + Self::SidebandMsg(msg) => { + write!(buf, "{:04x}", msg.len() + 4 + 1)?; + buf.put_u8(2); // sideband, 2 = msg + buf.extend_from_slice(msg); + } Self::Flush => buf.extend_from_slice(b"0000"), Self::Delimiter => buf.extend_from_slice(b"0001"), Self::ResponseEnd => buf.extend_from_slice(b"0002"), @@ -26,12 +51,6 @@ Ok(()) } } - -// impl From> for CryptoVec { -// fn from(val: PktLine<'_>) -> Self { -// Self::from(val.encode()) -// } -// } impl<'a> From<&'a str> for PktLine<'a> { fn from(val: &'a str) -> Self { diff --git a/src/git/packfile.rs b/src/git/packfile.rs index 48c06f0..befdcfc 100644 --- a/src/git/packfile.rs +++ a/src/git/packfile.rs @@ -1,138 +1,164 @@ use bytes::{BufMut, BytesMut}; -use const_sha1::{sha1, ConstBuffer}; use flate2::{write::ZlibEncoder, Compression}; -use sha1::{Digest, Sha1}; -use std::convert::TryInto; -use std::io::Write as IoWrite; - -// The offset/sha1[] tables are sorted by sha1[] values (this is to -// allow binary search of this table), and fanout[] table points at -// the offset/sha1[] table in a specific way (so that part of the -// latter table that covers all hashes that start with a given byte -// can be found to avoid 8 iterations of the binary search). -pub struct PackFileIndex<'a> { - pub packfile: &'a PackFile, +use sha1::{ + digest::{generic_array::GenericArray, FixedOutputDirty}, + Digest, Sha1, +}; +use std::{convert::TryInto, fmt::Write, io::Write as IoWrite}; + +// The packfile itself is a very simple format. There is a header, a +// series of packed objects (each with it's own header and body) and +// then a checksum trailer. The first four bytes is the string 'PACK', +// which is sort of used to make sure you're getting the start of the +// packfile correctly. This is followed by a 4-byte packfile version +// number and then a 4-byte number of entries in that file. +pub struct PackFile<'a> { + entries: Vec>, } -impl<'a> PackFileIndex<'a> { - pub fn encode_to(self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { - // split the buffer so we can hash only what we're currently generating at the - // end of this function - let mut buf = original_buf.split_off(original_buf.len()); +impl<'a> PackFile<'a> { + #[must_use] + pub fn new(entries: Vec>) -> Self { + Self { entries } + } - buf.extend_from_slice(b"\xfftOc"); // magic header - buf.put_u32(2); // version + #[must_use] + pub const fn header_size() -> usize { + "PACK".len() + std::mem::size_of::() + std::mem::size_of::() + } - // calculate total `PackFileEntry` hashes beginning with the same first byte - let mut totals_by_first_byte = [0u32; 256]; - for entry in &self.packfile.entries { - totals_by_first_byte[entry.uncompressed_sha1[0] as usize] += 1; - } + #[must_use] + pub const fn footer_size() -> usize { + 20 + } - // calculate fanout value by taking cumulative totals of first byte counts - let mut cumulative = 0; - for i in 0..256usize { - cumulative += totals_by_first_byte[i]; - buf.put_u32(cumulative); - } + pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { + let mut buf = original_buf.split_off(original_buf.len()); + buf.reserve(Self::header_size() + Self::footer_size()); - // write all the sha hashes out, this needs to be sorted by the hash which should've - // been done by `PackFile::new()` - for entry in &self.packfile.entries { - buf.extend_from_slice(&entry.uncompressed_sha1); - } + // header + buf.extend_from_slice(b"PACK"); // magic header + buf.put_u32(2); // version + buf.put_u32(self.entries.len().try_into()?); // number of entries in the packfile - for entry in &self.packfile.entries { - buf.put_u32(entry.compressed_crc32); + // body + for entry in &self.entries { + entry.encode_to(&mut buf)?; } - - let mut offset = PackFile::header_size(); - // encode offsets into the packfile - for entry in &self.packfile.entries { - offset += entry.compressed_data.len(); - - let mut offset_be = offset.to_be(); + // footer + buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); - while offset_be != 0 { - // read 7 LSBs from the `offset_be` and push them off for the next iteration - let mut val = (offset_be & 0b1111111) as u8; - offset_be >>= 7; + original_buf.unsplit(buf); - if offset_be != 0 { - // MSB set to 1 implies there's more offset_be bytes to come, otherwise - // the data starts after this byte - val |= 1 << 7; - } + Ok(()) + } +} - buf.put_u8(val); - } - } +pub struct Commit<'a> { + pub tree: GenericArray::OutputSize>, // [u8; 20], but sha-1 returns a GenericArray + // pub parent: [u8; 20], + pub author: CommitUserInfo<'a>, + pub committer: CommitUserInfo<'a>, + // pub gpgsig: &str, + pub message: &'a str, +} - // push a copy of the hash that appears at the end of the packfile - buf.extend_from_slice(&self.packfile.hash); +impl Commit<'_> { + fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { + let mut tree_hex = [0_u8; 20 * 2]; + hex::encode_to_slice(self.tree, &mut tree_hex)?; - // hash of the whole buffer we've just generated for the index - let mut hasher = Sha1::new(); - hasher.update(&buf); - let result = hasher.finalize(); - buf.extend_from_slice(result.as_ref()); + out.write_str("tree ")?; + out.extend_from_slice(&tree_hex); + out.write_char('\n')?; - // put the buffer we've just generated back into the mutable buffer we were passed - original_buf.unsplit(buf); + writeln!(out, "author {}", self.author.encode())?; + writeln!(out, "committer {}", self.committer.encode())?; + write!(out, "\n{}", self.message)?; Ok(()) } + + #[must_use] + pub fn size(&self) -> usize { + let mut len = 0; + len += "tree ".len() + (self.tree.len() * 2) + "\n".len(); + len += "author ".len() + self.author.size() + "\n".len(); + len += "committer ".len() + self.committer.size() + "\n".len(); + len += "\n".len() + self.message.len(); + len + } } -// The packfile itself is a very simple format. There is a header, a -// series of packed objects (each with it's own header and body) and -// then a checksum trailer. The first four bytes is the string 'PACK', -// which is sort of used to make sure you're getting the start of the -// packfile correctly. This is followed by a 4-byte packfile version -// number and then a 4-byte number of entries in that file. -pub struct PackFile { - entries: Vec, - hash: [u8; 20], +#[derive(Copy, Clone, Debug)] +pub struct CommitUserInfo<'a> { + pub name: &'a str, + pub email: &'a str, + pub time: chrono::DateTime, } -impl PackFile { - pub fn new(mut entries: Vec) -> Self { - entries.sort_unstable_by_key(|v| v.uncompressed_sha1[0]); - let hash_buffer = entries.iter().fold(ConstBuffer::new(), |acc, curr| { - acc.push_slice(&curr.uncompressed_sha1) - }); - - Self { - entries, - hash: sha1(&hash_buffer).bytes(), - } +impl CommitUserInfo<'_> { + fn encode(&self) -> String { + // TODO: remove `format!`, `format_args!`? + format!( + "{} <{}> {} +0000", + self.name, + self.email, + self.time.timestamp() + ) } - pub const fn header_size() -> usize { - 4 + std::mem::size_of::() + std::mem::size_of::() + #[must_use] + pub fn size(&self) -> usize { + let timestamp_len = itoa::Buffer::new().format(self.time.timestamp()).len(); + + self.name.len() + + "< ".len() + + self.email.len() + + "> ".len() + + timestamp_len + + " +0000".len() } - - pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), anyhow::Error> { - let mut buf = original_buf.split_off(original_buf.len()); +} - buf.extend_from_slice(b"PACK"); // magic header - buf.put_u32(2); // version - buf.put_u32(self.entries.len().try_into().unwrap()); // number of entries in the packfile +pub enum TreeItemKind { + File, + Directory, +} - for entry in &self.entries { - entry.encode_to(&mut buf)?; +impl TreeItemKind { + #[must_use] + pub const fn mode(&self) -> &'static str { + match self { + Self::File => "100644", + Self::Directory => "0000", } - - buf.extend_from_slice(&sha1::Sha1::digest(&buf[..])); + } +} - original_buf.unsplit(buf); +pub struct TreeItem<'a> { + pub kind: TreeItemKind, + pub name: &'a str, + pub hash: GenericArray::OutputSize>, // [u8; 20] - but we have to deal with GenericArrays +} +// `[mode] [name]\0[hash]` +impl TreeItem<'_> { + fn encode_to(&self, out: &mut BytesMut) -> Result<(), anyhow::Error> { + out.write_str(self.kind.mode())?; + write!(out, " {}\0", self.name)?; + out.extend_from_slice(&self.hash); Ok(()) + } + + #[must_use] + pub fn size(&self) -> usize { + self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len() } } -pub enum PackFileEntryType { +pub enum PackFileEntry<'a> { // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 @@ -156,73 +182,41 @@ // -----END PGP SIGNATURE----- // // test - Commit, + Commit(Commit<'a>), // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� - Tree, + Tree(Vec>), // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc // blob 23try and find me in .git - Blob, + Blob(&'a [u8]), // Tag, // OfsDelta, // RefDelta, -} - -pub struct PackFileEntry { - entry_type: PackFileEntryType, - compressed_data: Vec, - compressed_crc32: u32, - pub uncompressed_sha1: [u8; 20], - uncompressed_size: usize, } -impl PackFileEntry { - pub fn new(entry_type: PackFileEntryType, data: &[u8]) -> Result { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(data)?; - let compressed_data = e.finish()?; - - let compressed_crc32 = crc::Crc::::new(&crc::CRC_32_CKSUM).checksum(&compressed_data); - - Ok(Self { - entry_type, - compressed_data, - compressed_crc32, - uncompressed_sha1: sha1(&ConstBuffer::new().push_slice(data)).bytes(), - uncompressed_size: data.len(), - }) - } - - // fn size_of_data_be(&self) -> usize { - // self.uncompressed_size.to_be() - // } - - // The object header is a series of one or more 1 byte (8 bit) hunks - // that specify the type of object the following data is, and the size - // of the data when expanded. Each byte is really 7 bits of data, with - // the first bit being used to say if that hunk is the last one or not - // before the data starts. If the first bit is a 1, you will read another - // byte, otherwise the data starts next. The first 3 bits in the first - // byte specifies the type of data, according to the table below. +impl PackFileEntry<'_> { fn write_header(&self, buf: &mut BytesMut) { - let mut size = self.uncompressed_size; + let mut size = self.uncompressed_size(); // write header { - let mut val = 0b10000000u8; - - val |= match self.entry_type { - PackFileEntryType::Commit => 0b001, - PackFileEntryType::Tree => 0b010, - PackFileEntryType::Blob => 0b011, - // PackFileEntryType::Tag => 0b100, - // PackFileEntryType::OfsDelta => 0b110, - // PackFileEntryType::RefDelta => 0b111, + let mut val = 0b1000_0000_u8; + + val |= match self { + Self::Commit(_) => 0b001, + Self::Tree(_) => 0b010, + Self::Blob(_) => 0b011, + // Self::Tag => 0b100, + // Self::OfsDelta => 0b110, + // Self::RefDelta => 0b111, } << 4; // pack the 4 LSBs of the size into the header - val |= (size & 0b1111) as u8; + #[allow(clippy::cast_possible_truncation)] // value is masked + { + val |= (size & 0b1111) as u8; + } size >>= 4; buf.put_u8(val); @@ -231,7 +225,8 @@ // write size bytes while size != 0 { // read 7 LSBs from the `size` and push them off for the next iteration - let mut val = (size & 0b1111111) as u8; + #[allow(clippy::cast_possible_truncation)] // value is masked + let mut val = (size & 0b111_1111) as u8; size >>= 7; if size != 0 { @@ -244,10 +239,84 @@ } } - pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> { - self.write_header(buf); - buf.extend_from_slice(&self.compressed_data); + pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), anyhow::Error> { + self.write_header(original_out); // TODO: this needs space reserving for it + // todo is there a way to stream through the zlibencoder so we don't have to + // have this intermediate bytesmut and vec? + let mut out = BytesMut::new(); + + let size = self.uncompressed_size(); + original_out.reserve(size); + // the data ends up getting compressed but we'll need at least this many bytes + out.reserve(size); + + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(data) => { + out.extend_from_slice(data); + } + } + + debug_assert_eq!(out.len(), size); + + let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); + e.write_all(&out)?; + let compressed_data = e.finish()?; + + original_out.extend_from_slice(&compressed_data); + Ok(()) + } + + #[must_use] + pub fn uncompressed_size(&self) -> usize { + match self { + Self::Commit(commit) => commit.size(), + Self::Tree(items) => items.iter().map(TreeItem::size).sum(), + Self::Blob(data) => data.len(), + } + } + + // wen const generics for RustCrypto? :-( + pub fn hash( + &self, + ) -> Result::OutputSize>, anyhow::Error> { + let size = self.uncompressed_size(); + + let file_prefix = match self { + Self::Commit(_) => "commit", + Self::Tree(_) => "tree", + Self::Blob(_) => "blob", + }; + + let size_len = itoa::Buffer::new().format(size).len(); + + let mut out = + BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size); + + write!(out, "{} {}\0", file_prefix, size)?; + match self { + Self::Commit(commit) => { + commit.encode_to(&mut out)?; + } + Self::Tree(items) => { + for item in items { + item.encode_to(&mut out)?; + } + } + Self::Blob(blob) => { + out.extend_from_slice(blob); + } + } + + Ok(sha1::Sha1::digest(&out)) } } -- rgit 0.1.3