From f09b9d182e2331820debbd7eb1636148137efa2c Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 29 Aug 2021 14:55:15 +0100 Subject: [PATCH] some packfile reverse engineering --- Cargo.lock | 15 +++++++++++++++ Cargo.toml | 2 ++ src/main.rs | 19 ++++++++++++++++++- src/git/mod.rs | 4 +++- src/git/packfile.rs | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 182 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 656b3c2..9723d11 100644 --- a/Cargo.lock +++ a/Cargo.lock @@ -191,7 +191,9 @@ "axum", "bytes", "env_logger", + "flate2", "futures", + "sha-1", "thrussh", "thrussh-keys", "tokio", @@ -961,6 +963,19 @@ "itoa", "ryu", "serde", +] + +[[package]] +name = "sha-1" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" +dependencies = [ + "block-buffer", + "cfg-if", + "cpufeatures", + "digest", + "opaque-debug", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 76ddaef..4c6566a 100644 --- a/Cargo.toml +++ a/Cargo.toml @@ -19,3 +19,5 @@ env_logger = "0.9" tokio-util = { version = "0.6", features = ["codec"] } bytes = "1" +flate2 = "1.0" +sha-1 = "0.9"diff --git a/src/main.rs b/src/main.rs index 1befe48..9929608 100644 --- a/src/main.rs +++ a/src/main.rs @@ -1,8 +1,8 @@ pub mod git; use crate::git::PktLine; -use crate::git::codec::GitCodec; +use git::codec::GitCodec; use bytes::BytesMut; use futures::future::Future; use git::codec::Encoder; @@ -140,12 +140,15 @@ Box::pin(async move { let mut ls_refs = false; + let mut fetch = false; while let Some(frame) = self.codec.decode(&mut self.input_bytes)? { eprintln!("data: {:x?}", frame); if frame.as_ref() == "command=ls-refs".as_bytes() { ls_refs = true; + } else if frame.as_ref() == "command=fetch".as_bytes() { + fetch = true; } } @@ -159,8 +162,20 @@ self.write(PktLine::Data(b"1a1b25ae7c87a0e87b7a9aa478a6bc4331c6b954 HEAD symref-target:refs/heads/master\n"))?; self.write(PktLine::Flush)?; self.flush(&mut session, channel); + } - // next command will be a fetch like above + if fetch { + git::packfile::PackFile { + entries: vec![ + git::packfile::PackFileEntry { + entry_type: git::packfile::PackFileEntryType::Commit, + data: vec![0, 1, 2, 3, 4], + sha1: [0; 20], + } + ] + }.encode_to(&mut self.output_bytes).unwrap(); + + self.flush(&mut session, channel); } Ok((self, session)) diff --git a/src/git/mod.rs b/src/git/mod.rs index 405c092..6844310 100644 --- a/src/git/mod.rs +++ a/src/git/mod.rs @@ -46,7 +46,9 @@ #[test] fn test_pkt_line() { let mut buffer = BytesMut::new(); - super::PktLine::Data(b"agent=git/2.32.0\n").encode_to(&mut buffer).unwrap(); + super::PktLine::Data(b"agent=git/2.32.0\n") + .encode_to(&mut buffer) + .unwrap(); assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n"); } } diff --git a/src/git/packfile.rs b/src/git/packfile.rs index b83c539..6665bab 100644 --- a/src/git/packfile.rs +++ a/src/git/packfile.rs @@ -1,3 +1,9 @@ +use bytes::{BufMut, BytesMut}; +use flate2::{write::ZlibEncoder, Compression}; +use sha1::{Digest, Sha1}; +use std::fmt::Write; +use std::io::Write as IoWrite; + // The offset/sha1[] tables are sorted by sha1[] values (this is to // allow binary search of this table), and fanout[] table points at // the offset/sha1[] table in a specific way (so that part of the @@ -6,9 +12,6 @@ // // packfile indexes are not neccesary to extract objects from a packfile pub struct PackFileIndex { - // S should be u16 - pub magic: [u8; 4], // "\x337t0c" - header magic value - pub version: [u8; 4], // "0002", - header version pub fanout: [[u8; 4]; 255], pub size: u16, // fanout[256] => size == S pub sha1: [[u8; 20]; S], // sha listing @@ -17,6 +20,16 @@ // 64b_offset: [[u8; 8]; N], // for packfiles over 2gb pub packfile_checksum: [u8; 20], // sha1 pub idxfiel_checksum: [u8; 20], // sha1 +} + +impl PackFileIndex { + pub fn encode_to(self, buf: &mut BytesMut) -> Result<(), anyhow::Error> { + buf.extend_from_slice(b"\xFFtOc"); // magic header + buf.put_u8(2); // version + + + Ok(()) + } } // The packfile itself is a very simple format. There is a header, a @@ -25,3 +38,132 @@ // which is sort of used to make sure you're getting the start of the // packfile correctly. This is followed by a 4-byte packfile version // number and then a 4-byte number of entries in that file. +pub struct PackFile { + pub entries: Vec, +} + +impl PackFile { + pub fn encode_to(self, buf: &mut BytesMut) -> Result<(), anyhow::Error> { + buf.extend_from_slice(b"PACK"); // magic header + buf.extend_from_slice(b"0002"); // version + write!(buf, "{:04x}", self.entries.len())?; // number of entries in the packfile + + for entry in &self.entries { + entry.encode_to(buf)?; + } + + let mut hasher = Sha1::new(); + for entry in &self.entries { + hasher.update(entry.sha1); + } + let hash = hasher.finalize(); + buf.extend_from_slice(&hash.as_slice()); + + Ok(()) + } +} + +pub enum PackFileEntryType { + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3-test | gzip -dc + // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c + // parent c2a862612a14346ae95234f26efae1ee69b5b7a9 + // author Jordan Doyle 1630244577 +0100 + // committer Jordan Doyle 1630244577 +0100 + // gpgsig -----BEGIN PGP SIGNATURE----- + // + // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt + // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2 + // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ + // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6 + // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX + // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E + // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO + // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G + // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG + // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE + // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf + // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y= + // =fXoH + // -----END PGP SIGNATURE----- + // + // test + Commit, + // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc + // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/�� + // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut� + Tree, + // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc + // blob 23try and find me in .git + Blob, + // Tag, + // OfsDelta, + // RefDelta, +} + +pub struct PackFileEntry { + pub entry_type: PackFileEntryType, + pub data: Vec, + pub sha1: [u8; 20], +} + +impl PackFileEntry { + fn size_of_data(&self) -> usize { + self.data.len() as usize + } + + // The object header is a series of one or more 1 byte (8 bit) hunks + // that specify the type of object the following data is, and the size + // of the data when expanded. Each byte is really 7 bits of data, with + // the first bit being used to say if that hunk is the last one or not + // before the data starts. If the first bit is a 1, you will read another + // byte, otherwise the data starts next. The first 3 bits in the first + // byte specifies the type of data, according to the table below. + fn write_header(&self, buf: &mut BytesMut) { + let mut size = self.size_of_data(); + + // write header + { + let mut val = 0b10000000u8; + + val |= match self.entry_type { + PackFileEntryType::Commit => 0b001, + PackFileEntryType::Tree => 0b010, + PackFileEntryType::Blob => 0b011, + // PackFileEntryType::Tag => 0b100, + // PackFileEntryType::OfsDelta => 0b110, + // PackFileEntryType::RefDelta => 0b111, + } << 4; + + // pack the last 4 bits of the size into the header + val |= (size & 0b1111) as u8; + size >>= 4; + + buf.put_u8(val); + } + + // write size bytes + while size != 0 { + // read 7 bits from the `size` and push them off for the next iteration + let mut val = (size & 0b1111111) as u8; + size >>= 7; + + if size != 0 { + // first bit implies there's more size bytes to come, otherwise the + // data starts after this byte + val |= 1 << 7; + } + + buf.put_u8(val); + } + } + + pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), anyhow::Error> { + self.write_header(buf); + + let mut e = ZlibEncoder::new(buf.as_mut(), Compression::default()); + e.write_all(self.data.as_ref())?; + e.finish()?; + + Ok(()) + } +} -- rgit 0.1.3