From 3046db9472a95d5945c22dee1c17bad3c266fb68 Mon Sep 17 00:00:00 2001 From: jordan Date: Fri, 23 Feb 2024 10:12:55 +0000 Subject: [PATCH] Merge pull request #3 from alexheretic/chunk-pkt-lines Fix pkt-line encoding to handle encoding of data larger than 65516 bytes & release 0.1.2 --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- src/high_level.rs | 2 +- src/low_level.rs | 2 +- src/packet_line.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76683db..f3352dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v0.1.2 +* Fix pkt-line encoding to handle encoding of data larger than 65516 bytes. + # v0.1.1 This release makes packfile generation deterministic by setting the committed and authored time to the unix epoch. Thanks @david-monroe for the contribution. diff --git a/Cargo.toml b/Cargo.toml index 9f07e28..47f8a4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "packfile" authors = ["Jordan Doyle "] description = "A simple library providing utilities to generate Git Packfiles in memory and send them to clients" -version = "0.1.1" +version = "0.1.2" edition = "2021" license = "WTFPL" keywords = ["git", "packfile", "in-memory", "protocol"] diff --git a/src/high_level.rs b/src/high_level.rs index 2f46e9d..cb32827 100644 --- a/src/high_level.rs +++ b/src/high_level.rs @@ -226,7 +226,7 @@ mod test { let stdout = crate::test::verify_pack_file(output.freeze()); insta::with_settings!({filters => vec![ - (r#"/(.*)/example.pack"#, "/path/to/example.pack") + (r"/(.*)/example.pack", "/path/to/example.pack") ]}, { insta::assert_snapshot!(stdout); }); diff --git a/src/low_level.rs b/src/low_level.rs index 4a36227..ef7158b 100644 --- a/src/low_level.rs +++ b/src/low_level.rs @@ -411,7 +411,7 @@ mod test { let stdout = crate::test::verify_pack_file(example()); insta::with_settings!({filters => vec![ - (r#"/(.*)/example.pack"#, "/path/to/example.pack") + (r"/(.*)/example.pack", "/path/to/example.pack") ]}, { insta::assert_snapshot!(stdout); }); diff --git a/src/packet_line.rs b/src/packet_line.rs index 40c8c9f..fbd4ff5 100644 --- a/src/packet_line.rs +++ b/src/packet_line.rs @@ -1,8 +1,13 @@ +use crate::{low_level::PackFile, Error}; +use bytes::{BufMut, BytesMut}; use std::fmt::Write; -use bytes::{BufMut, BytesMut}; - -use crate::{low_level::PackFile, Error}; +/// The maximum length of a pkt-line's data component is 65516 bytes. +/// Implementations MUST NOT send pkt-line whose length exceeds 65520 +/// (65516 bytes of payload + 4 bytes of length data). +/// +/// +const MAX_DATA_LEN: usize = 65516; /// A wrapper containing every possible type of message that can be sent to a Git client. pub enum PktLine<'a> { @@ -27,25 +32,37 @@ impl PktLine<'_> { pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), Error> { match self { Self::Data(data) => { - write!(buf, "{:04x}", data.len() + 4)?; - buf.extend_from_slice(data); + for chunk in data.chunks(MAX_DATA_LEN) { + write!(buf, "{:04x}", chunk.len() + 4)?; + buf.extend_from_slice(chunk); + } } Self::SidebandData(packfile) => { // split the buf off so the cost of counting the bytes to put in the // data line prefix is just the cost of `unsplit` (an atomic decrement) let mut data_buf = buf.split_off(buf.len()); - data_buf.put_u8(1); // sideband, 1 = data packfile.encode_to(&mut data_buf)?; // write into the buf not the data buf so it's at the start of the msg - write!(buf, "{:04x}", data_buf.len() + 4)?; - buf.unsplit(data_buf); + if data_buf.len() + 5 <= MAX_DATA_LEN - 1 { + write!(buf, "{:04x}", data_buf.len() + 5)?; + buf.put_u8(1); // sideband, 1 = data + buf.unsplit(data_buf); + } else { + for chunk in data_buf.chunks(MAX_DATA_LEN - 1) { + write!(buf, "{:04x}", chunk.len() + 5)?; + buf.put_u8(1); // sideband, 1 = data + buf.extend_from_slice(chunk); + } + } } Self::SidebandMsg(msg) => { - write!(buf, "{:04x}", msg.len() + 4 + 1)?; - buf.put_u8(2); // sideband, 2 = msg - buf.extend_from_slice(msg); + for chunk in msg.chunks(MAX_DATA_LEN - 1) { + write!(buf, "{:04x}", chunk.len() + 5)?; + buf.put_u8(2); // sideband, 2 = msg + buf.extend_from_slice(chunk); + } } Self::Flush => buf.extend_from_slice(b"0000"), Self::Delimiter => buf.extend_from_slice(b"0001"), @@ -64,6 +81,7 @@ impl<'a> From<&'a str> for PktLine<'a> { #[cfg(test)] mod test { + use crate::packet_line::MAX_DATA_LEN; use bytes::BytesMut; #[test] @@ -74,4 +92,41 @@ mod test { .unwrap(); assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n"); } + + #[test] + fn test_large_pkt_line() { + let mut buffer = BytesMut::new(); + super::PktLine::from("a".repeat(70000).as_str()) + .encode_to(&mut buffer) + .unwrap(); + assert_eq!( + buffer.len(), + 70008, + "should be two chunks each with a 4-byte len header" + ); + + // chunk 1 + assert_eq!( + std::str::from_utf8(&buffer[..4]).unwrap(), + format!("{:04x}", 4 + MAX_DATA_LEN) + ); + assert!( + &buffer[4..4 + MAX_DATA_LEN] + .iter() + .all(|b| char::from(*b) == 'a'), + "data should be all 'a's" + ); + + // chunk 2 + assert_eq!( + std::str::from_utf8(&buffer[4 + MAX_DATA_LEN..][..4]).unwrap(), + format!("{:04x}", 4 + (70000 - MAX_DATA_LEN)) + ); + assert!( + &buffer[4 + MAX_DATA_LEN + 4..] + .iter() + .all(|b| char::from(*b) == 'a'), + "data should be all 'a's" + ); + } } -- libgit2 1.7.2