🏡 index : ~doyle/packfile.git

author jordan <jordan@doyle.la> 2024-02-23 10:12:55.0 +00:00:00
committer GitHub <noreply@github.com> 2024-02-23 10:12:55.0 +00:00:00
commit
3046db9472a95d5945c22dee1c17bad3c266fb68 [patch]
tree
0443653c442634aa1182fc55f26b67af465815c1
parent
019a0525cdf8372ab56e34aab9dc04e037cade89
parent
01fac7ff9a04896a8e7248e209e6f3d3a9d78b10
download
3046db9472a95d5945c22dee1c17bad3c266fb68.tar.gz

Merge pull request #3 from alexheretic/chunk-pkt-lines

Fix pkt-line encoding to handle encoding of data larger than 65516 bytes & release 0.1.2

Diff

 CHANGELOG.md       |  3 ++-
 Cargo.toml         |  2 +-
 src/high_level.rs  |  2 +-
 src/low_level.rs   |  2 +-
 src/packet_line.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++--------
 5 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76683db..f3352dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
# v0.1.2
* Fix pkt-line encoding to handle encoding of data larger than 65516 bytes.

# v0.1.1

This release makes packfile generation deterministic by setting the committed and authored time to the unix epoch. Thanks @david-monroe for the contribution.
diff --git a/Cargo.toml b/Cargo.toml
index 9f07e28..47f8a4a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,7 @@
name = "packfile"
authors = ["Jordan Doyle <jordan@doyle.la>"]
description = "A simple library providing utilities to generate Git Packfiles in memory and send them to clients"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
license = "WTFPL"
keywords = ["git", "packfile", "in-memory", "protocol"]
diff --git a/src/high_level.rs b/src/high_level.rs
index 2f46e9d..cb32827 100644
--- a/src/high_level.rs
+++ b/src/high_level.rs
@@ -226,7 +226,7 @@ mod test {
        let stdout = crate::test::verify_pack_file(output.freeze());

        insta::with_settings!({filters => vec![
            (r#"/(.*)/example.pack"#, "/path/to/example.pack")
            (r"/(.*)/example.pack", "/path/to/example.pack")
        ]}, {
            insta::assert_snapshot!(stdout);
        });
diff --git a/src/low_level.rs b/src/low_level.rs
index 4a36227..ef7158b 100644
--- a/src/low_level.rs
+++ b/src/low_level.rs
@@ -411,7 +411,7 @@ mod test {
            let stdout = crate::test::verify_pack_file(example());

            insta::with_settings!({filters => vec![
                (r#"/(.*)/example.pack"#, "/path/to/example.pack")
                (r"/(.*)/example.pack", "/path/to/example.pack")
            ]}, {
                insta::assert_snapshot!(stdout);
            });
diff --git a/src/packet_line.rs b/src/packet_line.rs
index 40c8c9f..fbd4ff5 100644
--- a/src/packet_line.rs
+++ b/src/packet_line.rs
@@ -1,8 +1,13 @@
use crate::{low_level::PackFile, Error};
use bytes::{BufMut, BytesMut};
use std::fmt::Write;

use bytes::{BufMut, BytesMut};

use crate::{low_level::PackFile, Error};
/// The maximum length of a pkt-line's data component is 65516 bytes.
/// Implementations MUST NOT send pkt-line whose length exceeds 65520
/// (65516 bytes of payload + 4 bytes of length data).
///
/// <https://git-scm.com/docs/protocol-common#_pkt_line_format>
const MAX_DATA_LEN: usize = 65516;

/// A wrapper containing every possible type of message that can be sent to a Git client.
pub enum PktLine<'a> {
@@ -27,25 +32,37 @@ impl PktLine<'_> {
    pub fn encode_to(&self, buf: &mut BytesMut) -> Result<(), Error> {
        match self {
            Self::Data(data) => {
                write!(buf, "{:04x}", data.len() + 4)?;
                buf.extend_from_slice(data);
                for chunk in data.chunks(MAX_DATA_LEN) {
                    write!(buf, "{:04x}", chunk.len() + 4)?;
                    buf.extend_from_slice(chunk);
                }
            }
            Self::SidebandData(packfile) => {
                // split the buf off so the cost of counting the bytes to put in the
                // data line prefix is just the cost of `unsplit` (an atomic decrement)
                let mut data_buf = buf.split_off(buf.len());

                data_buf.put_u8(1); // sideband, 1 = data
                packfile.encode_to(&mut data_buf)?;

                // write into the buf not the data buf so it's at the start of the msg
                write!(buf, "{:04x}", data_buf.len() + 4)?;
                buf.unsplit(data_buf);
                if data_buf.len() + 5 <= MAX_DATA_LEN - 1 {
                    write!(buf, "{:04x}", data_buf.len() + 5)?;
                    buf.put_u8(1); // sideband, 1 = data
                    buf.unsplit(data_buf);
                } else {
                    for chunk in data_buf.chunks(MAX_DATA_LEN - 1) {
                        write!(buf, "{:04x}", chunk.len() + 5)?;
                        buf.put_u8(1); // sideband, 1 = data
                        buf.extend_from_slice(chunk);
                    }
                }
            }
            Self::SidebandMsg(msg) => {
                write!(buf, "{:04x}", msg.len() + 4 + 1)?;
                buf.put_u8(2); // sideband, 2 = msg
                buf.extend_from_slice(msg);
                for chunk in msg.chunks(MAX_DATA_LEN - 1) {
                    write!(buf, "{:04x}", chunk.len() + 5)?;
                    buf.put_u8(2); // sideband, 2 = msg
                    buf.extend_from_slice(chunk);
                }
            }
            Self::Flush => buf.extend_from_slice(b"0000"),
            Self::Delimiter => buf.extend_from_slice(b"0001"),
@@ -64,6 +81,7 @@ impl<'a> From<&'a str> for PktLine<'a> {

#[cfg(test)]
mod test {
    use crate::packet_line::MAX_DATA_LEN;
    use bytes::BytesMut;

    #[test]
@@ -74,4 +92,41 @@ mod test {
            .unwrap();
        assert_eq!(buffer.as_ref(), b"0015agent=git/2.32.0\n");
    }

    #[test]
    fn test_large_pkt_line() {
        let mut buffer = BytesMut::new();
        super::PktLine::from("a".repeat(70000).as_str())
            .encode_to(&mut buffer)
            .unwrap();
        assert_eq!(
            buffer.len(),
            70008,
            "should be two chunks each with a 4-byte len header"
        );

        // chunk 1
        assert_eq!(
            std::str::from_utf8(&buffer[..4]).unwrap(),
            format!("{:04x}", 4 + MAX_DATA_LEN)
        );
        assert!(
            &buffer[4..4 + MAX_DATA_LEN]
                .iter()
                .all(|b| char::from(*b) == 'a'),
            "data should be all 'a's"
        );

        // chunk 2
        assert_eq!(
            std::str::from_utf8(&buffer[4 + MAX_DATA_LEN..][..4]).unwrap(),
            format!("{:04x}", 4 + (70000 - MAX_DATA_LEN))
        );
        assert!(
            &buffer[4 + MAX_DATA_LEN + 4..]
                .iter()
                .all(|b| char::from(*b) == 'a'),
            "data should be all 'a's"
        );
    }
}