From fd15ebb44b65ce7a8aa81250edbef294c79e3592 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Fri, 23 Jul 2021 02:50:01 +0100 Subject: [PATCH] initial commit --- .gitignore | 3 +++ Cargo.toml | 30 ++++++++++++++++++++++++++++++ LICENSE | 12 ++++++++++++ README.md | 23 +++++++++++++++++++++++ benches/borrowed.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ benches/owned.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 31 +++++++++++++++++++++++++++++++ src/lib.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ser.rs | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 544 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 benches/borrowed.rs create mode 100644 benches/owned.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/ser.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77147e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +Cargo.lock +.idea/ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f43bd78 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "serde_bson" +authors = ["Jordan D. "] +description = "Fast bson serde implementation" +repository = "https://github.com/w4/serde_bson" +version = "0.0.1" +edition = "2018" +license = "0BSD" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = "1" +bytes = "1" +take_mut = "0.2" + +[dev-dependencies] +serde = { version = "1", features = ["derive"] } +serde_bytes = "0.11" +bson = "1.2" +criterion = "0.3" +rand = "0.8" + +[[bench]] +name = "borrowed" +harness = false + +[[bench]] +name = "owned" +harness = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e9d84ae --- /dev/null +++ b/LICENSE @@ -0,0 +1,12 @@ +Copyright (C) 2006 by Rob Landley + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1016dda --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +## serde_bson + +Originally implemented as a workaround to the `bson` crate cloning every value it +comes across and it's looking like it shows significant improvement across the board +for serialisation (~80% improvement). + +``` +mongodb's bson time: [1.1160 us 1.1171 us 1.1183 us] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +serde_bson time: [201.99 ns 202.17 ns 202.38 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) low mild + 4 (4.00%) high mild + 2 (2.00%) high severe +``` + +There's a few pieces missing such as arrays and nested documents but they're not +too difficult to add, it's just that it's 2:38am and I've smashed this out in an +hour. + +Pull requests welcome as always. \ No newline at end of file diff --git a/benches/borrowed.rs b/benches/borrowed.rs new file mode 100644 index 0000000..44f382a --- /dev/null +++ b/benches/borrowed.rs @@ -0,0 +1,58 @@ +use bytes::BufMut; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +#[derive(serde::Serialize)] +pub struct A<'a> { + a: &'a str, + b: &'a str, + c: &'a str, + d: i64, + e: f64, + #[serde(with = "serde_bytes")] + f: &'a [u8], +} + +fn benchmark(c: &mut Criterion) { + let val = A { + a: "Now this is a story all about how + My life got flipped turned upside down + And I'd like to take a minute, just sit right there + I'll tell you how I became the prince of a town called Bel-Air", + b: "In West Philadelphia born and raised + On the playground is where I spent most of my days + Chillin' out, maxin', relaxin' all cool + And all shootin' some b-ball outside of the school + When a couple of guys who were up to no good + Started makin' trouble in my neighborhood", + c: "I got in one little fight and my mom got scared + And said 'You're movin' with your auntie and uncle in Bel-Air'", + d: 420, + e: 420.69696969696969, + f: "Above are some popular 'pop culture' references for your perusal and enjoyment" + .as_bytes(), + }; + + c.bench_function("borrowed: mongodb's bson", |b| { + let mut theirs = Vec::new(); + + b.iter(|| { + bson::ser::to_document(black_box(&val)) + .unwrap() + .to_writer(&mut theirs) + .unwrap(); + theirs.clear(); + }) + }); + + c.bench_function("borrowed: serde_bson", |b| { + let mut out = bytes::BytesMut::new(); + + b.iter(|| { + serde_bson::to_string(black_box(&val), &mut out).unwrap(); + drop(out.split()); + }); + }); +} + +criterion_group!(benches, benchmark); +criterion_main!(benches); diff --git a/benches/owned.rs b/benches/owned.rs new file mode 100644 index 0000000..773567e --- /dev/null +++ b/benches/owned.rs @@ -0,0 +1,60 @@ +use bytes::BufMut; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +#[derive(serde::Serialize)] +pub struct A { + a: String, + b: String, + c: String, + d: i64, + e: f64, + #[serde(with = "serde_bytes")] + f: Vec, +} + +fn benchmark(c: &mut Criterion) { + let val = A { + a: "Now this is a story all about how + My life got flipped turned upside down + And I'd like to take a minute, just sit right there + I'll tell you how I became the prince of a town called Bel-Air" + .to_string(), + b: "In West Philadelphia born and raised + On the playground is where I spent most of my days + Chillin' out, maxin', relaxin' all cool + And all shootin' some b-ball outside of the school + When a couple of guys who were up to no good + Started makin' trouble in my neighborhood" + .to_string(), + c: "I got in one little fight and my mom got scared + And said 'You're movin' with your auntie and uncle in Bel-Air'" + .to_string(), + d: 420, + e: 420.69696969696969, + f: "Above are some popular 'pop culture' references for your perusal and enjoyment".into(), + }; + + c.bench_function("owned: mongodb's bson", |b| { + let mut theirs = Vec::new(); + + b.iter(|| { + bson::ser::to_document(black_box(&val)) + .unwrap() + .to_writer(&mut theirs) + .unwrap(); + theirs.clear(); + }) + }); + + c.bench_function("owned: serde_bson", |b| { + let mut out = bytes::BytesMut::new(); + + b.iter(|| { + serde_bson::to_string(black_box(&val), &mut out).unwrap(); + drop(out.split()); + }); + }); +} + +criterion_group!(benches, benchmark); +criterion_main!(benches); diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..8bf71b9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,31 @@ +use std::fmt::{Display, Formatter}; + +#[derive(Debug)] +pub enum Error { + NotSerializingStruct, + Serde(String), + UnsignedIntNotInSpec, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotSerializingStruct => write!( + f, + "individual values cannot be serialised, try serialising a struct instead" + ), + Self::Serde(context) => write!(f, "error from value serialiser: {}", context), + Self::UnsignedIntNotInSpec => { + write!(f, "unsigned ints are not supported in the bson spec") + } + } + } +} + +impl std::error::Error for Error {} + +impl serde::ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Serde(msg.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f42503f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,71 @@ +mod error; +pub mod ser; + +pub use error::Error; + +use bytes::{BufMut, BytesMut}; +use serde::Serialize; + +pub fn to_string(val: &T, output: &mut BytesMut) -> Result<(), Error> { + const SIZE_OF_SIZE: usize = std::mem::size_of::(); + + // essentially reserves a i32 we can prepend back onto the BytesMut later + // at the cost of an atomic increment + output.put_i32(0); + let mut size = output.split_to(SIZE_OF_SIZE); + + val.serialize(ser::Serializer { key: None, output })?; + + // writes the total length of the output to the i32 we split off before + for (i, byte) in ((output.len() + SIZE_OF_SIZE) as i32) + .to_le_bytes() + .iter() + .enumerate() + { + size[i] = *byte; + } + + // this is safe because `unsplit` can't panic + take_mut::take(output, move |output| { + // O(1) prepend since `size` originally came from `output`. + size.unsplit(output); + size + }); + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::to_string; + use bytes::{BufMut, BytesMut}; + use serde::Serialize; + + #[test] + pub fn test_basic() { + #[derive(Serialize)] + pub struct A<'a> { + cool: i32, + #[serde(with = "serde_bytes")] + beans: &'a [u8], + bro: &'a str, + } + + let test = &A { + cool: 999, + beans: "so there was this one time at bandcamp".as_bytes(), + bro: "the craziest thing happened", + }; + + let mut ours = BytesMut::new(); + to_string(&test, &mut ours); + + let mut theirs = BytesMut::new().writer(); + bson::ser::to_document(&test) + .unwrap() + .to_writer(&mut theirs) + .unwrap(); + + assert_eq!(ours, theirs.into_inner()); + } +} diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..8fdb2e0 --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,256 @@ +use crate::Error; +use bytes::BufMut; +use serde::Serialize; +use std::convert::TryFrom; + +pub struct Serializer<'a, B: BufMut> { + pub key: Option<&'static str>, + pub output: &'a mut B, +} + +macro_rules! write_key_or_error { + ($id:literal, $key:expr, $output:expr) => { + if let Some(key) = $key { + $output.put_u8($id); + $output.put_slice(key.as_bytes()); + $output.put_u8(0x00); + } else { + return Err(Error::NotSerializingStruct); + } + }; +} + +impl<'a, B: BufMut> serde::Serializer for Serializer<'a, B> { + type Ok = (); + type Error = Error; + + type SerializeSeq = serde::ser::Impossible; + type SerializeTuple = serde::ser::Impossible; + type SerializeTupleStruct = serde::ser::Impossible; + type SerializeTupleVariant = serde::ser::Impossible; + type SerializeMap = serde::ser::Impossible; + type SerializeStruct = StructSerializer<'a, B>; + type SerializeStructVariant = serde::ser::Impossible; + + fn serialize_bool(self, v: bool) -> Result { + write_key_or_error!(0x01, self.key, self.output); + self.output.put_u8(v as u8); + Ok(()) + } + + fn serialize_i8(self, v: i8) -> Result { + self.serialize_i32(v as i32) + } + + fn serialize_i16(self, v: i16) -> Result { + self.serialize_i32(v as i32) + } + + fn serialize_i32(self, v: i32) -> Result { + write_key_or_error!(0x10, self.key, self.output); + self.output.put_i32_le(v); + Ok(()) + } + + fn serialize_i64(self, v: i64) -> Result { + write_key_or_error!(0x12, self.key, self.output); + self.output.put_i64_le(v); + Ok(()) + } + + fn serialize_u8(self, _v: u8) -> Result { + Err(Error::UnsignedIntNotInSpec) + } + + fn serialize_u16(self, _v: u16) -> Result { + Err(Error::UnsignedIntNotInSpec) + } + + fn serialize_u32(self, _v: u32) -> Result { + Err(Error::UnsignedIntNotInSpec) + } + + fn serialize_u64(self, _v: u64) -> Result { + Err(Error::UnsignedIntNotInSpec) + } + + fn serialize_f32(self, v: f32) -> Result { + self.serialize_f64(v as f64) + } + + fn serialize_f64(self, v: f64) -> Result { + write_key_or_error!(0x01, self.key, self.output); + self.output.put_f64_le(v); + Ok(()) + } + + fn serialize_char(self, _: char) -> Result { + Err(Error::UnsignedIntNotInSpec) + } + + fn serialize_str(self, v: &str) -> Result { + write_key_or_error!(0x02, self.key, self.output); + + let v = v.as_bytes(); + let len = i32::try_from(v.len() + 1) // `+ 1` for the null byte at the end of the str + .unwrap_or_else(|_| panic!( + "encoded string exceeds max size: {}", + i32::MAX - 1 + )); + + self.output.put_i32_le(len); + self.output.put_slice(v); + self.output.put_u8(0x00); + + Ok(()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + write_key_or_error!(0x05, self.key, self.output); + + // we don't need the + 1 here since there's no null terminator + let len = i32::try_from(v.len()) + .unwrap_or_else(|_| panic!("bytes exceeds max size: {}", i32::MAX)); + + self.output.put_i32_le(len); + self.output.put_u8(0x00); // subtype, we'll just assume 0x00 + self.output.put_slice(v); + + Ok(()) + } + + fn serialize_none(self) -> Result { + write_key_or_error!(0x0A, self.key, self.output); + Ok(()) + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + value.serialize(self) + } + + fn serialize_unit(self) -> Result { + self.serialize_none() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + unimplemented!("unit struct") + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + unimplemented!("unit variant") + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + todo!("newtype variant") + } + + fn serialize_seq(self, _len: Option) -> Result { + todo!("seq") + } + + fn serialize_tuple(self, _len: usize) -> Result { + todo!("tuple") + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + todo!("tuple struct") + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + todo!("tuple variant") + } + + fn serialize_map(self, _len: Option) -> Result { + todo!("map") + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + if self.key.is_some() { + todo!("nested struct: {:?}", self.key); + } + + Ok(StructSerializer { + output: self.output, + }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + todo!("struct variant") + } +} + +pub struct StructSerializer<'a, B: BufMut> { + output: &'a mut B, +} + +impl<'a, B: BufMut> serde::ser::SerializeStruct for StructSerializer<'a, B> { + type Ok = (); + type Error = as serde::Serializer>::Error; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(Serializer { + key: Some(key), + output: &mut self.output, + }) + } + + fn end(self) -> Result { + self.output.put_u8(0x00); // doc terminator + Ok(()) + } +} -- libgit2 1.7.2