From bdce7122ccf598ff86bd18ec28d4fb37d0827cf8 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sat, 24 Jul 2021 21:11:12 +0100 Subject: [PATCH] Reserve bytes up front since we can know the size of it quickly --- src/byte.rs | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 24 ++++++++++++++++++++++-- src/ser.rs | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------- 3 files changed, 212 insertions(+), 53 deletions(-) create mode 100644 src/byte.rs diff --git a/src/byte.rs b/src/byte.rs new file mode 100644 index 0000000..595d30f --- /dev/null +++ b/src/byte.rs @@ -0,0 +1,140 @@ +use bytes::{BufMut, BytesMut}; + +pub trait BytesLikeBuf { + type Out: BytesLikeBuf; + + fn put_u8(&mut self, v: u8); + fn put_i32_le(&mut self, v: i32); + fn put_i64_le(&mut self, v: i64); + fn put_f64_le(&mut self, v: f64); + fn put_slice(&mut self, s: &[u8]); + fn split_off(&mut self, at: usize) -> Self::Out; + fn unsplit(&mut self, other: Self::Out); + fn len(&mut self) -> usize; + fn byte_mut(&mut self, at: usize) -> &mut u8; +} + +macro_rules! deref_impl { + ( + impl $trait:ident for $ty:ident { + $(fn $func:ident(&mut self, $($param_name:ident$(: $param_ty:ty)?),*)$( -> $ret:ty)?$( where Self: $deref:ident)?;)* + } + ) => { + impl $trait for $ty { + type Out = $ty; + + $( + fn $func(&mut self, $($param_name$(: $param_ty)?,)*)$( -> $ret)? { + ::$func(self, $($param_name,)*) + } + )* + + fn byte_mut(&mut self, at: usize) -> &mut u8 { + &mut self[at] + } + } + }; +} + +deref_impl!( + impl BytesLikeBuf for BytesMut { + fn put_u8(&mut self, v: u8) where Self: BufMut; + fn put_i32_le(&mut self, v: i32) where Self: BufMut; + fn put_i64_le(&mut self, v: i64) where Self: BufMut; + fn put_f64_le(&mut self, v: f64) where Self: BufMut; + fn put_slice(&mut self, s: &[u8]) where Self: BufMut; + fn split_off(&mut self, at: usize) -> BytesMut; + fn unsplit(&mut self, other: Self); + fn len(&mut self,) -> usize; + } +); + +impl<'a, B: BytesLikeBuf> BytesLikeBuf for &mut B { + type Out = ::Out; + + fn put_u8(&mut self, v: u8) { + B::put_u8(self, v) + } + + fn put_i32_le(&mut self, v: i32) { + B::put_i32_le(self, v) + } + + fn put_i64_le(&mut self, v: i64) { + B::put_i64_le(self, v) + } + + fn put_f64_le(&mut self, v: f64) { + B::put_f64_le(self, v) + } + + fn put_slice(&mut self, s: &[u8]) { + B::put_slice(self, s) + } + + fn split_off(&mut self, at: usize) -> Self::Out { + B::split_off(self, at) + } + + fn unsplit(&mut self, other: Self::Out) { + B::unsplit(self, other) + } + + fn len(&mut self) -> usize { + B::len(self) + } + + fn byte_mut(&mut self, at: usize) -> &mut u8 { + B::byte_mut(self, at) + } +} + +#[derive(Default)] +pub struct CountingBytes { + pub bytes: usize, + fake_byte: u8, +} + +impl BytesLikeBuf for CountingBytes { + type Out = CountingBytes; + + fn put_u8(&mut self, _v: u8) { + self.bytes += std::mem::size_of::(); + } + + fn put_i32_le(&mut self, _v: i32) { + self.bytes += std::mem::size_of::(); + } + + fn put_i64_le(&mut self, _v: i64) { + self.bytes += std::mem::size_of::(); + } + + fn put_f64_le(&mut self, _v: f64) { + self.bytes += std::mem::size_of::(); + } + + fn put_slice(&mut self, s: &[u8]) { + self.bytes += std::mem::size_of::() * s.len(); + } + + fn split_off(&mut self, _at: usize) -> Self { + CountingBytes { + bytes: 0, + fake_byte: 0, + } + } + + fn unsplit(&mut self, other: Self) { + self.bytes += other.bytes; + } + + fn len(&mut self) -> usize { + self.bytes + } + + fn byte_mut(&mut self, _at: usize) -> &mut u8 { + self.fake_byte = 0; + &mut self.fake_byte + } +} diff --git a/src/lib.rs b/src/lib.rs index b6eeaba..0168b00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,33 @@ +mod byte; mod error; pub mod ser; pub use error::Error; +use byte::CountingBytes; use bytes::BytesMut; use serde::Serialize; pub fn to_string(val: &T, output: &mut BytesMut) -> Result<(), Error> { + // do a quick pass over the value using our `CountingBytes` impl so we can do + // one big allocation rather than multiple smaller ones. + output.reserve(serialised_size_of(val)?); + val.serialize(ser::Serializer { key: None, output }) } +pub fn serialised_size_of(val: &T) -> Result { + let mut counting_bytes = CountingBytes::default(); + val.serialize(ser::Serializer { + key: None, + output: &mut counting_bytes, + })?; + Ok(counting_bytes.bytes) +} + #[cfg(test)] mod test { - use super::to_string; + use super::{serialised_size_of, to_string}; use bytes::{BufMut, BytesMut}; use serde::Serialize; @@ -75,6 +90,11 @@ mod test { .to_writer(&mut theirs) .unwrap(); - assert_eq!(ours, theirs.into_inner()); + let theirs = theirs.into_inner(); + assert_eq!(ours, theirs); + + let calculated_size = serialised_size_of(&test).unwrap(); + assert_eq!(calculated_size, ours.len()); + assert_eq!(calculated_size, theirs.len()); } } diff --git a/src/ser.rs b/src/ser.rs index 32e4665..d22f015 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -1,14 +1,13 @@ -use crate::Error; -use bytes::{BufMut, BytesMut}; +use crate::{byte::BytesLikeBuf, Error}; use serde::{ ser::{SerializeSeq, SerializeStruct}, Serialize, }; use std::convert::TryFrom; -pub struct Serializer<'a> { +pub struct Serializer<'a, B: BytesLikeBuf> { pub key: Option, - pub output: &'a mut BytesMut, + pub output: &'a mut B, } macro_rules! write_key_or_error { @@ -23,17 +22,17 @@ macro_rules! write_key_or_error { }; } -impl<'a> serde::Serializer for Serializer<'a> { +impl<'a, B: BytesLikeBuf> serde::Serializer for Serializer<'a, B> { type Ok = (); type Error = Error; - type SerializeSeq = SeqSerializer<'a>; - type SerializeTuple = TupleSerializer<'a>; - type SerializeTupleStruct = TupleStructSerializer<'a>; - type SerializeTupleVariant = TupleVariantSerializer<'a>; + type SerializeSeq = SeqSerializer<'a, B>; + type SerializeTuple = TupleSerializer<'a, B>; + type SerializeTupleStruct = TupleStructSerializer<'a, B>; + type SerializeTupleVariant = TupleVariantSerializer<'a, B>; type SerializeMap = serde::ser::Impossible; - type SerializeStruct = StructSerializer<'a>; - type SerializeStructVariant = StructVariantSerializer<'a>; + type SerializeStruct = StructSerializer<'a, B>; + type SerializeStructVariant = StructVariantSerializer<'a, B>; fn serialize_bool(self, v: bool) -> Result { write_key_or_error!(0x01, self.key, self.output); @@ -287,13 +286,13 @@ impl<'a> serde::Serializer for Serializer<'a> { } } -pub struct TupleSerializer<'a> { - inner: SeqSerializer<'a>, +pub struct TupleSerializer<'a, B: BytesLikeBuf> { + inner: SeqSerializer<'a, B>, } -impl<'a> serde::ser::SerializeTuple for TupleSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeTuple for TupleSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where @@ -307,16 +306,16 @@ impl<'a> serde::ser::SerializeTuple for TupleSerializer<'a> { } } -pub struct TupleVariantSerializer<'a> { - original_output: &'a mut BytesMut, - array_output: BytesMut, - doc_output: BytesMut, +pub struct TupleVariantSerializer<'a, B: BytesLikeBuf> { + original_output: &'a mut B, + array_output: ::Out, + doc_output: B::Out, key: usize, } -impl<'a> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where @@ -341,15 +340,15 @@ impl<'a> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a> { } } -pub struct StructVariantSerializer<'a> { - original_output: &'a mut BytesMut, - nested_doc_output: BytesMut, - doc_output: BytesMut, +pub struct StructVariantSerializer<'a, B: BytesLikeBuf> { + original_output: &'a mut B, + nested_doc_output: ::Out, + doc_output: B::Out, } -impl<'a> serde::ser::SerializeStructVariant for StructVariantSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeStructVariant for StructVariantSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_field( &mut self, @@ -379,13 +378,13 @@ impl<'a> serde::ser::SerializeStructVariant for StructVariantSerializer<'a> { } } -pub struct TupleStructSerializer<'a> { - inner: SeqSerializer<'a>, +pub struct TupleStructSerializer<'a, B: BytesLikeBuf> { + inner: SeqSerializer<'a, B>, } -impl<'a> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where @@ -399,15 +398,15 @@ impl<'a> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a> { } } -pub struct SeqSerializer<'a> { - original_output: &'a mut BytesMut, - doc_output: BytesMut, +pub struct SeqSerializer<'a, B: BytesLikeBuf> { + original_output: &'a mut B, + doc_output: B::Out, key: usize, } -impl<'a> serde::ser::SerializeSeq for SeqSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeSeq for SeqSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where @@ -427,14 +426,14 @@ impl<'a> serde::ser::SerializeSeq for SeqSerializer<'a> { } } -pub struct StructSerializer<'a> { - original_output: &'a mut BytesMut, - doc_output: BytesMut, +pub struct StructSerializer<'a, B: BytesLikeBuf> { + original_output: &'a mut B, + doc_output: B::Out, } -impl<'a> serde::ser::SerializeStruct for StructSerializer<'a> { +impl<'a, B: BytesLikeBuf> serde::ser::SerializeStruct for StructSerializer<'a, B> { type Ok = (); - type Error = as serde::Serializer>::Error; + type Error = as serde::Serializer>::Error; fn serialize_field( &mut self, @@ -462,7 +461,7 @@ pub enum DocumentKey { } impl DocumentKey { - pub fn write_to_buf(&self, buf: &mut BytesMut) { + pub fn write_to_buf(&self, buf: &mut B) { match self { Self::Str(s) => buf.put_slice(s.as_bytes()), Self::Int(i) => { @@ -473,27 +472,27 @@ impl DocumentKey { } } -pub fn start_document(buffer: &mut BytesMut) -> BytesMut { +pub fn start_document(buffer: &mut B) -> B::Out { + let len = buffer.len(); + // splits the output for the doc to be written to, this is appended back onto to the // output when `StructSerializer::close` is called. - let mut doc_output = buffer.split_off(buffer.len()); + let mut doc_output = buffer.split_off(len); // reserves a i32 we can write the document size to later - doc_output.put_i32(0); + doc_output.put_i32_le(0); doc_output } -pub fn terminate_document(original_buffer: &mut BytesMut, mut document: BytesMut) { +pub fn terminate_document(original_buffer: &mut B, mut document: B::Out) { document.put_u8(0x00); // doc terminator // writes the total length of the output to the i32 we reserved earlier for (i, byte) in (document.len() as i32).to_le_bytes().iter().enumerate() { - debug_assert_eq!( - document[i], 0, - "document didn't reserve bytes for the length" - ); - document[i] = *byte; + let byte_ref = document.byte_mut(i); + debug_assert_eq!(*byte_ref, 0, "document didn't reserve bytes for the length"); + *byte_ref = *byte; } original_buffer.unsplit(document); -- libgit2 1.7.2