🏡 index : ~doyle/serde_bson.git

author Jordan Doyle <jordan@doyle.la> 2021-07-24 20:11:12.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-07-24 20:11:12.0 +00:00:00
commit
bdce7122ccf598ff86bd18ec28d4fb37d0827cf8 [patch]
tree
00bddda8d9ddaff386b3edbd8c70c99e76d60788
parent
a3fbfc26cc12d4602d09dc93425e7e241c1dfa47
download
bdce7122ccf598ff86bd18ec28d4fb37d0827cf8.tar.gz

Reserve bytes up front since we can know the size of it quickly



Diff

 src/byte.rs | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/lib.rs  |  24 +++++++++-
 src/ser.rs  | 101 +++++++++++++++++++++-----------------------
 3 files changed, 212 insertions(+), 53 deletions(-)

diff --git a/src/byte.rs b/src/byte.rs
new file mode 100644
index 0000000..595d30f
--- /dev/null
+++ b/src/byte.rs
@@ -0,0 +1,140 @@
use bytes::{BufMut, BytesMut};

pub trait BytesLikeBuf {
    type Out: BytesLikeBuf;

    fn put_u8(&mut self, v: u8);
    fn put_i32_le(&mut self, v: i32);
    fn put_i64_le(&mut self, v: i64);
    fn put_f64_le(&mut self, v: f64);
    fn put_slice(&mut self, s: &[u8]);
    fn split_off(&mut self, at: usize) -> Self::Out;
    fn unsplit(&mut self, other: Self::Out);
    fn len(&mut self) -> usize;
    fn byte_mut(&mut self, at: usize) -> &mut u8;
}

macro_rules! deref_impl {
    (
        impl $trait:ident for $ty:ident {
            $(fn $func:ident(&mut self, $($param_name:ident$(: $param_ty:ty)?),*)$( -> $ret:ty)?$( where Self: $deref:ident)?;)*
        }
    ) => {
        impl $trait for $ty {
            type Out = $ty;

            $(
                fn $func(&mut self, $($param_name$(: $param_ty)?,)*)$( -> $ret)? {
                    <Self$( as $deref)?>::$func(self, $($param_name,)*)
                }
            )*

            fn byte_mut(&mut self, at: usize) -> &mut u8 {
                &mut self[at]
            }
        }
    };
}

deref_impl!(
    impl BytesLikeBuf for BytesMut {
        fn put_u8(&mut self, v: u8) where Self: BufMut;
        fn put_i32_le(&mut self, v: i32) where Self: BufMut;
        fn put_i64_le(&mut self, v: i64) where Self: BufMut;
        fn put_f64_le(&mut self, v: f64) where Self: BufMut;
        fn put_slice(&mut self, s: &[u8]) where Self: BufMut;
        fn split_off(&mut self, at: usize) -> BytesMut;
        fn unsplit(&mut self, other: Self);
        fn len(&mut self,) -> usize;
    }
);

impl<'a, B: BytesLikeBuf> BytesLikeBuf for &mut B {
    type Out = <B as BytesLikeBuf>::Out;

    fn put_u8(&mut self, v: u8) {
        B::put_u8(self, v)
    }

    fn put_i32_le(&mut self, v: i32) {
        B::put_i32_le(self, v)
    }

    fn put_i64_le(&mut self, v: i64) {
        B::put_i64_le(self, v)
    }

    fn put_f64_le(&mut self, v: f64) {
        B::put_f64_le(self, v)
    }

    fn put_slice(&mut self, s: &[u8]) {
        B::put_slice(self, s)
    }

    fn split_off(&mut self, at: usize) -> Self::Out {
        B::split_off(self, at)
    }

    fn unsplit(&mut self, other: Self::Out) {
        B::unsplit(self, other)
    }

    fn len(&mut self) -> usize {
        B::len(self)
    }

    fn byte_mut(&mut self, at: usize) -> &mut u8 {
        B::byte_mut(self, at)
    }
}

#[derive(Default)]
pub struct CountingBytes {
    pub bytes: usize,
    fake_byte: u8,
}

impl BytesLikeBuf for CountingBytes {
    type Out = CountingBytes;

    fn put_u8(&mut self, _v: u8) {
        self.bytes += std::mem::size_of::<u8>();
    }

    fn put_i32_le(&mut self, _v: i32) {
        self.bytes += std::mem::size_of::<i32>();
    }

    fn put_i64_le(&mut self, _v: i64) {
        self.bytes += std::mem::size_of::<i64>();
    }

    fn put_f64_le(&mut self, _v: f64) {
        self.bytes += std::mem::size_of::<f64>();
    }

    fn put_slice(&mut self, s: &[u8]) {
        self.bytes += std::mem::size_of::<u8>() * s.len();
    }

    fn split_off(&mut self, _at: usize) -> Self {
        CountingBytes {
            bytes: 0,
            fake_byte: 0,
        }
    }

    fn unsplit(&mut self, other: Self) {
        self.bytes += other.bytes;
    }

    fn len(&mut self) -> usize {
        self.bytes
    }

    fn byte_mut(&mut self, _at: usize) -> &mut u8 {
        self.fake_byte = 0;
        &mut self.fake_byte
    }
}
diff --git a/src/lib.rs b/src/lib.rs
index b6eeaba..0168b00 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,18 +1,33 @@
mod byte;
mod error;
pub mod ser;

pub use error::Error;

use byte::CountingBytes;
use bytes::BytesMut;
use serde::Serialize;

pub fn to_string<T: Serialize>(val: &T, output: &mut BytesMut) -> Result<(), Error> {
    // do a quick pass over the value using our `CountingBytes` impl so we can do
    // one big allocation rather than multiple smaller ones.
    output.reserve(serialised_size_of(val)?);

    val.serialize(ser::Serializer { key: None, output })
}

pub fn serialised_size_of<T: Serialize>(val: &T) -> Result<usize, Error> {
    let mut counting_bytes = CountingBytes::default();
    val.serialize(ser::Serializer {
        key: None,
        output: &mut counting_bytes,
    })?;
    Ok(counting_bytes.bytes)
}

#[cfg(test)]
mod test {
    use super::to_string;
    use super::{serialised_size_of, to_string};
    use bytes::{BufMut, BytesMut};
    use serde::Serialize;

@@ -75,6 +90,11 @@ mod test {
            .to_writer(&mut theirs)
            .unwrap();

        assert_eq!(ours, theirs.into_inner());
        let theirs = theirs.into_inner();
        assert_eq!(ours, theirs);

        let calculated_size = serialised_size_of(&test).unwrap();
        assert_eq!(calculated_size, ours.len());
        assert_eq!(calculated_size, theirs.len());
    }
}
diff --git a/src/ser.rs b/src/ser.rs
index 32e4665..d22f015 100644
--- a/src/ser.rs
+++ b/src/ser.rs
@@ -1,14 +1,13 @@
use crate::Error;
use bytes::{BufMut, BytesMut};
use crate::{byte::BytesLikeBuf, Error};
use serde::{
    ser::{SerializeSeq, SerializeStruct},
    Serialize,
};
use std::convert::TryFrom;

pub struct Serializer<'a> {
pub struct Serializer<'a, B: BytesLikeBuf> {
    pub key: Option<DocumentKey>,
    pub output: &'a mut BytesMut,
    pub output: &'a mut B,
}

macro_rules! write_key_or_error {
@@ -23,17 +22,17 @@ macro_rules! write_key_or_error {
    };
}

impl<'a> serde::Serializer for Serializer<'a> {
impl<'a, B: BytesLikeBuf> serde::Serializer for Serializer<'a, B> {
    type Ok = ();
    type Error = Error;

    type SerializeSeq = SeqSerializer<'a>;
    type SerializeTuple = TupleSerializer<'a>;
    type SerializeTupleStruct = TupleStructSerializer<'a>;
    type SerializeTupleVariant = TupleVariantSerializer<'a>;
    type SerializeSeq = SeqSerializer<'a, B>;
    type SerializeTuple = TupleSerializer<'a, B>;
    type SerializeTupleStruct = TupleStructSerializer<'a, B>;
    type SerializeTupleVariant = TupleVariantSerializer<'a, B>;
    type SerializeMap = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeStruct = StructSerializer<'a>;
    type SerializeStructVariant = StructVariantSerializer<'a>;
    type SerializeStruct = StructSerializer<'a, B>;
    type SerializeStructVariant = StructVariantSerializer<'a, B>;

    fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x01, self.key, self.output);
@@ -287,13 +286,13 @@ impl<'a> serde::Serializer for Serializer<'a> {
    }
}

pub struct TupleSerializer<'a> {
    inner: SeqSerializer<'a>,
pub struct TupleSerializer<'a, B: BytesLikeBuf> {
    inner: SeqSerializer<'a, B>,
}

impl<'a> serde::ser::SerializeTuple for TupleSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeTuple for TupleSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
    where
@@ -307,16 +306,16 @@ impl<'a> serde::ser::SerializeTuple for TupleSerializer<'a> {
    }
}

pub struct TupleVariantSerializer<'a> {
    original_output: &'a mut BytesMut,
    array_output: BytesMut,
    doc_output: BytesMut,
pub struct TupleVariantSerializer<'a, B: BytesLikeBuf> {
    original_output: &'a mut B,
    array_output: <B::Out as BytesLikeBuf>::Out,
    doc_output: B::Out,
    key: usize,
}

impl<'a> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
    where
@@ -341,15 +340,15 @@ impl<'a> serde::ser::SerializeTupleVariant for TupleVariantSerializer<'a> {
    }
}

pub struct StructVariantSerializer<'a> {
    original_output: &'a mut BytesMut,
    nested_doc_output: BytesMut,
    doc_output: BytesMut,
pub struct StructVariantSerializer<'a, B: BytesLikeBuf> {
    original_output: &'a mut B,
    nested_doc_output: <B::Out as BytesLikeBuf>::Out,
    doc_output: B::Out,
}

impl<'a> serde::ser::SerializeStructVariant for StructVariantSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeStructVariant for StructVariantSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_field<T: ?Sized>(
        &mut self,
@@ -379,13 +378,13 @@ impl<'a> serde::ser::SerializeStructVariant for StructVariantSerializer<'a> {
    }
}

pub struct TupleStructSerializer<'a> {
    inner: SeqSerializer<'a>,
pub struct TupleStructSerializer<'a, B: BytesLikeBuf> {
    inner: SeqSerializer<'a, B>,
}

impl<'a> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
    where
@@ -399,15 +398,15 @@ impl<'a> serde::ser::SerializeTupleStruct for TupleStructSerializer<'a> {
    }
}

pub struct SeqSerializer<'a> {
    original_output: &'a mut BytesMut,
    doc_output: BytesMut,
pub struct SeqSerializer<'a, B: BytesLikeBuf> {
    original_output: &'a mut B,
    doc_output: B::Out,
    key: usize,
}

impl<'a> serde::ser::SerializeSeq for SeqSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeSeq for SeqSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
    where
@@ -427,14 +426,14 @@ impl<'a> serde::ser::SerializeSeq for SeqSerializer<'a> {
    }
}

pub struct StructSerializer<'a> {
    original_output: &'a mut BytesMut,
    doc_output: BytesMut,
pub struct StructSerializer<'a, B: BytesLikeBuf> {
    original_output: &'a mut B,
    doc_output: B::Out,
}

impl<'a> serde::ser::SerializeStruct for StructSerializer<'a> {
impl<'a, B: BytesLikeBuf> serde::ser::SerializeStruct for StructSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a> as serde::Serializer>::Error;
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_field<T: ?Sized>(
        &mut self,
@@ -462,7 +461,7 @@ pub enum DocumentKey {
}

impl DocumentKey {
    pub fn write_to_buf(&self, buf: &mut BytesMut) {
    pub fn write_to_buf<B: BytesLikeBuf>(&self, buf: &mut B) {
        match self {
            Self::Str(s) => buf.put_slice(s.as_bytes()),
            Self::Int(i) => {
@@ -473,27 +472,27 @@ impl DocumentKey {
    }
}

pub fn start_document(buffer: &mut BytesMut) -> BytesMut {
pub fn start_document<B: BytesLikeBuf>(buffer: &mut B) -> B::Out {
    let len = buffer.len();

    // splits the output for the doc to be written to, this is appended back onto to the
    // output when `StructSerializer::close` is called.
    let mut doc_output = buffer.split_off(buffer.len());
    let mut doc_output = buffer.split_off(len);

    // reserves a i32 we can write the document size to later
    doc_output.put_i32(0);
    doc_output.put_i32_le(0);

    doc_output
}

pub fn terminate_document(original_buffer: &mut BytesMut, mut document: BytesMut) {
pub fn terminate_document<B: BytesLikeBuf>(original_buffer: &mut B, mut document: B::Out) {
    document.put_u8(0x00); // doc terminator

    // writes the total length of the output to the i32 we reserved earlier
    for (i, byte) in (document.len() as i32).to_le_bytes().iter().enumerate() {
        debug_assert_eq!(
            document[i], 0,
            "document didn't reserve bytes for the length"
        );
        document[i] = *byte;
        let byte_ref = document.byte_mut(i);
        debug_assert_eq!(*byte_ref, 0, "document didn't reserve bytes for the length");
        *byte_ref = *byte;
    }

    original_buffer.unsplit(document);