🏡 index : ~doyle/serde_bson.git

author Jordan Doyle <jordan@doyle.la> 2021-07-23 1:50:01.0 +00:00:00
committer Jordan Doyle <jordan@doyle.la> 2021-07-23 2:04:37.0 +00:00:00
commit
fd15ebb44b65ce7a8aa81250edbef294c79e3592 [patch]
tree
4b9206230fde3eb97b84a75962b32bbe311eacfb
download
fd15ebb44b65ce7a8aa81250edbef294c79e3592.tar.gz

initial commit



Diff

 .gitignore          |   3 +-
 Cargo.toml          |  30 ++++++-
 LICENSE             |  12 ++-
 README.md           |  23 +++++-
 benches/borrowed.rs |  58 ++++++++++++-
 benches/owned.rs    |  60 ++++++++++++-
 src/error.rs        |  31 ++++++-
 src/lib.rs          |  71 +++++++++++++++-
 src/ser.rs          | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 544 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..77147e2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
/target
Cargo.lock
.idea/
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f43bd78
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,30 @@
[package]
name = "serde_bson"
authors = ["Jordan D. <jordan@doyle.la>"]
description = "Fast bson serde implementation"
repository = "https://github.com/w4/serde_bson"
version = "0.0.1"
edition = "2018"
license = "0BSD"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
serde = "1"
bytes = "1"
take_mut = "0.2"

[dev-dependencies]
serde = { version = "1", features = ["derive"] }
serde_bytes = "0.11"
bson = "1.2"
criterion = "0.3"
rand = "0.8"

[[bench]]
name = "borrowed"
harness = false

[[bench]]
name = "owned"
harness = false
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e9d84ae
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,12 @@
Copyright (C) 2006 by Rob Landley <rob@landley.net>

Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1016dda
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
## serde_bson

Originally implemented as a workaround to the `bson` crate cloning every value it
comes across and it's looking like it shows significant improvement across the board
for serialisation (~80% improvement).

```
mongodb's bson  time:   [1.1160 us 1.1171 us 1.1183 us]
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

serde_bson      time:   [201.99 ns 202.17 ns 202.38 ns]                                 
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) low mild
  4 (4.00%) high mild
  2 (2.00%) high severe
```

There's a few pieces missing such as arrays and nested documents but they're not
too difficult to add, it's just that it's 2:38am and I've smashed this out in an
hour.

Pull requests welcome as always.
\ No newline at end of file
diff --git a/benches/borrowed.rs b/benches/borrowed.rs
new file mode 100644
index 0000000..44f382a
--- /dev/null
+++ b/benches/borrowed.rs
@@ -0,0 +1,58 @@
use bytes::BufMut;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

#[derive(serde::Serialize)]
pub struct A<'a> {
    a: &'a str,
    b: &'a str,
    c: &'a str,
    d: i64,
    e: f64,
    #[serde(with = "serde_bytes")]
    f: &'a [u8],
}

fn benchmark(c: &mut Criterion) {
    let val = A {
        a: "Now this is a story all about how
            My life got flipped turned upside down
            And I'd like to take a minute, just sit right there
            I'll tell you how I became the prince of a town called Bel-Air",
        b: "In West Philadelphia born and raised
            On the playground is where I spent most of my days
            Chillin' out, maxin', relaxin' all cool
            And all shootin' some b-ball outside of the school
            When a couple of guys who were up to no good
            Started makin' trouble in my neighborhood",
        c: "I got in one little fight and my mom got scared
            And said 'You're movin' with your auntie and uncle in Bel-Air'",
        d: 420,
        e: 420.69696969696969,
        f: "Above are some popular 'pop culture' references for your perusal and enjoyment"
            .as_bytes(),
    };

    c.bench_function("borrowed: mongodb's bson", |b| {
        let mut theirs = Vec::new();

        b.iter(|| {
            bson::ser::to_document(black_box(&val))
                .unwrap()
                .to_writer(&mut theirs)
                .unwrap();
            theirs.clear();
        })
    });

    c.bench_function("borrowed: serde_bson", |b| {
        let mut out = bytes::BytesMut::new();

        b.iter(|| {
            serde_bson::to_string(black_box(&val), &mut out).unwrap();
            drop(out.split());
        });
    });
}

criterion_group!(benches, benchmark);
criterion_main!(benches);
diff --git a/benches/owned.rs b/benches/owned.rs
new file mode 100644
index 0000000..773567e
--- /dev/null
+++ b/benches/owned.rs
@@ -0,0 +1,60 @@
use bytes::BufMut;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

#[derive(serde::Serialize)]
pub struct A {
    a: String,
    b: String,
    c: String,
    d: i64,
    e: f64,
    #[serde(with = "serde_bytes")]
    f: Vec<u8>,
}

fn benchmark(c: &mut Criterion) {
    let val = A {
        a: "Now this is a story all about how
            My life got flipped turned upside down
            And I'd like to take a minute, just sit right there
            I'll tell you how I became the prince of a town called Bel-Air"
            .to_string(),
        b: "In West Philadelphia born and raised
            On the playground is where I spent most of my days
            Chillin' out, maxin', relaxin' all cool
            And all shootin' some b-ball outside of the school
            When a couple of guys who were up to no good
            Started makin' trouble in my neighborhood"
            .to_string(),
        c: "I got in one little fight and my mom got scared
            And said 'You're movin' with your auntie and uncle in Bel-Air'"
            .to_string(),
        d: 420,
        e: 420.69696969696969,
        f: "Above are some popular 'pop culture' references for your perusal and enjoyment".into(),
    };

    c.bench_function("owned: mongodb's bson", |b| {
        let mut theirs = Vec::new();

        b.iter(|| {
            bson::ser::to_document(black_box(&val))
                .unwrap()
                .to_writer(&mut theirs)
                .unwrap();
            theirs.clear();
        })
    });

    c.bench_function("owned: serde_bson", |b| {
        let mut out = bytes::BytesMut::new();

        b.iter(|| {
            serde_bson::to_string(black_box(&val), &mut out).unwrap();
            drop(out.split());
        });
    });
}

criterion_group!(benches, benchmark);
criterion_main!(benches);
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..8bf71b9
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,31 @@
use std::fmt::{Display, Formatter};

#[derive(Debug)]
pub enum Error {
    NotSerializingStruct,
    Serde(String),
    UnsignedIntNotInSpec,
}

impl Display for Error {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::NotSerializingStruct => write!(
                f,
                "individual values cannot be serialised, try serialising a struct instead"
            ),
            Self::Serde(context) => write!(f, "error from value serialiser: {}", context),
            Self::UnsignedIntNotInSpec => {
                write!(f, "unsigned ints are not supported in the bson spec")
            }
        }
    }
}

impl std::error::Error for Error {}

impl serde::ser::Error for Error {
    fn custom<T: Display>(msg: T) -> Self {
        Error::Serde(msg.to_string())
    }
}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..f42503f
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,71 @@
mod error;
pub mod ser;

pub use error::Error;

use bytes::{BufMut, BytesMut};
use serde::Serialize;

pub fn to_string<T: Serialize>(val: &T, output: &mut BytesMut) -> Result<(), Error> {
    const SIZE_OF_SIZE: usize = std::mem::size_of::<i32>();

    // essentially reserves a i32 we can prepend back onto the BytesMut later
    // at the cost of an atomic increment
    output.put_i32(0);
    let mut size = output.split_to(SIZE_OF_SIZE);

    val.serialize(ser::Serializer { key: None, output })?;

    // writes the total length of the output to the i32 we split off before
    for (i, byte) in ((output.len() + SIZE_OF_SIZE) as i32)
        .to_le_bytes()
        .iter()
        .enumerate()
    {
        size[i] = *byte;
    }

    // this is safe because `unsplit` can't panic
    take_mut::take(output, move |output| {
        // O(1) prepend since `size` originally came from `output`.
        size.unsplit(output);
        size
    });

    Ok(())
}

#[cfg(test)]
mod test {
    use super::to_string;
    use bytes::{BufMut, BytesMut};
    use serde::Serialize;

    #[test]
    pub fn test_basic() {
        #[derive(Serialize)]
        pub struct A<'a> {
            cool: i32,
            #[serde(with = "serde_bytes")]
            beans: &'a [u8],
            bro: &'a str,
        }

        let test = &A {
            cool: 999,
            beans: "so there was this one time at bandcamp".as_bytes(),
            bro: "the craziest thing happened",
        };

        let mut ours = BytesMut::new();
        to_string(&test, &mut ours);

        let mut theirs = BytesMut::new().writer();
        bson::ser::to_document(&test)
            .unwrap()
            .to_writer(&mut theirs)
            .unwrap();

        assert_eq!(ours, theirs.into_inner());
    }
}
diff --git a/src/ser.rs b/src/ser.rs
new file mode 100644
index 0000000..8fdb2e0
--- /dev/null
+++ b/src/ser.rs
@@ -0,0 +1,256 @@
use crate::Error;
use bytes::BufMut;
use serde::Serialize;
use std::convert::TryFrom;

pub struct Serializer<'a, B: BufMut> {
    pub key: Option<&'static str>,
    pub output: &'a mut B,
}

macro_rules! write_key_or_error {
    ($id:literal, $key:expr, $output:expr) => {
        if let Some(key) = $key {
            $output.put_u8($id);
            $output.put_slice(key.as_bytes());
            $output.put_u8(0x00);
        } else {
            return Err(Error::NotSerializingStruct);
        }
    };
}

impl<'a, B: BufMut> serde::Serializer for Serializer<'a, B> {
    type Ok = ();
    type Error = Error;

    type SerializeSeq = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTuple = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleStruct = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeMap = serde::ser::Impossible<Self::Ok, Self::Error>;
    type SerializeStruct = StructSerializer<'a, B>;
    type SerializeStructVariant = serde::ser::Impossible<Self::Ok, Self::Error>;

    fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x01, self.key, self.output);
        self.output.put_u8(v as u8);
        Ok(())
    }

    fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
        self.serialize_i32(v as i32)
    }

    fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
        self.serialize_i32(v as i32)
    }

    fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x10, self.key, self.output);
        self.output.put_i32_le(v);
        Ok(())
    }

    fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x12, self.key, self.output);
        self.output.put_i64_le(v);
        Ok(())
    }

    fn serialize_u8(self, _v: u8) -> Result<Self::Ok, Self::Error> {
        Err(Error::UnsignedIntNotInSpec)
    }

    fn serialize_u16(self, _v: u16) -> Result<Self::Ok, Self::Error> {
        Err(Error::UnsignedIntNotInSpec)
    }

    fn serialize_u32(self, _v: u32) -> Result<Self::Ok, Self::Error> {
        Err(Error::UnsignedIntNotInSpec)
    }

    fn serialize_u64(self, _v: u64) -> Result<Self::Ok, Self::Error> {
        Err(Error::UnsignedIntNotInSpec)
    }

    fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
        self.serialize_f64(v as f64)
    }

    fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x01, self.key, self.output);
        self.output.put_f64_le(v);
        Ok(())
    }

    fn serialize_char(self, _: char) -> Result<Self::Ok, Self::Error> {
        Err(Error::UnsignedIntNotInSpec)
    }

    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x02, self.key, self.output);

        let v = v.as_bytes();
        let len = i32::try_from(v.len() + 1) // `+ 1` for the null byte at the end of the str
            .unwrap_or_else(|_| panic!(
                "encoded string exceeds max size: {}",
                i32::MAX - 1
            ));

        self.output.put_i32_le(len);
        self.output.put_slice(v);
        self.output.put_u8(0x00);

        Ok(())
    }

    fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x05, self.key, self.output);

        // we don't need the + 1 here since there's no null terminator
        let len = i32::try_from(v.len())
            .unwrap_or_else(|_| panic!("bytes exceeds max size: {}", i32::MAX));

        self.output.put_i32_le(len);
        self.output.put_u8(0x00); // subtype, we'll just assume 0x00
        self.output.put_slice(v);

        Ok(())
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
        write_key_or_error!(0x0A, self.key, self.output);
        Ok(())
    }

    fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
    where
        T: Serialize,
    {
        value.serialize(self)
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
        self.serialize_none()
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
        unimplemented!("unit struct")
    }

    fn serialize_unit_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
    ) -> Result<Self::Ok, Self::Error> {
        unimplemented!("unit variant")
    }

    fn serialize_newtype_struct<T: ?Sized>(
        self,
        _name: &'static str,
        value: &T,
    ) -> Result<Self::Ok, Self::Error>
    where
        T: Serialize,
    {
        value.serialize(self)
    }

    fn serialize_newtype_variant<T: ?Sized>(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _value: &T,
    ) -> Result<Self::Ok, Self::Error>
    where
        T: Serialize,
    {
        todo!("newtype variant")
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
        todo!("seq")
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
        todo!("tuple")
    }

    fn serialize_tuple_struct(
        self,
        _name: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeTupleStruct, Self::Error> {
        todo!("tuple struct")
    }

    fn serialize_tuple_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeTupleVariant, Self::Error> {
        todo!("tuple variant")
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
        todo!("map")
    }

    fn serialize_struct(
        self,
        _name: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStruct, Self::Error> {
        if self.key.is_some() {
            todo!("nested struct: {:?}", self.key);
        }

        Ok(StructSerializer {
            output: self.output,
        })
    }

    fn serialize_struct_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStructVariant, Self::Error> {
        todo!("struct variant")
    }
}

pub struct StructSerializer<'a, B: BufMut> {
    output: &'a mut B,
}

impl<'a, B: BufMut> serde::ser::SerializeStruct for StructSerializer<'a, B> {
    type Ok = ();
    type Error = <Serializer<'a, B> as serde::Serializer>::Error;

    fn serialize_field<T: ?Sized>(
        &mut self,
        key: &'static str,
        value: &T,
    ) -> Result<(), Self::Error>
    where
        T: Serialize,
    {
        value.serialize(Serializer {
            key: Some(key),
            output: &mut self.output,
        })
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        self.output.put_u8(0x00); // doc terminator
        Ok(())
    }
}