initial commit
Diff
.gitignore | 3 +-
Cargo.toml | 30 ++++++-
LICENSE | 12 ++-
README.md | 23 +++++-
benches/borrowed.rs | 58 ++++++++++++-
benches/owned.rs | 60 ++++++++++++-
src/error.rs | 31 ++++++-
src/lib.rs | 71 +++++++++++++++-
src/ser.rs | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
9 files changed, 544 insertions(+)
@@ -0,0 +1,3 @@
/target
Cargo.lock
.idea/
@@ -0,0 +1,30 @@
[package]
name = "serde_bson"
authors = ["Jordan D. <jordan@doyle.la>"]
description = "Fast bson serde implementation"
repository = "https://github.com/w4/serde_bson"
version = "0.0.1"
edition = "2018"
license = "0BSD"
[dependencies]
serde = "1"
bytes = "1"
take_mut = "0.2"
[dev-dependencies]
serde = { version = "1", features = ["derive"] }
serde_bytes = "0.11"
bson = "1.2"
criterion = "0.3"
rand = "0.8"
[[bench]]
name = "borrowed"
harness = false
[[bench]]
name = "owned"
harness = false
@@ -0,0 +1,12 @@
Copyright (C) 2006 by Rob Landley <rob@landley.net>
Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
@@ -0,0 +1,23 @@
## serde_bson
Originally implemented as a workaround to the `bson` crate cloning every value it
comes across and it's looking like it shows significant improvement across the board
for serialisation (~80% improvement).
```
mongodb's bson time: [1.1160 us 1.1171 us 1.1183 us]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
serde_bson time: [201.99 ns 202.17 ns 202.38 ns]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) low mild
4 (4.00%) high mild
2 (2.00%) high severe
```
There's a few pieces missing such as arrays and nested documents but they're not
too difficult to add, it's just that it's 2:38am and I've smashed this out in an
hour.
Pull requests welcome as always.
\ No newline at end of file
@@ -0,0 +1,58 @@
use bytes::BufMut;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
#[derive(serde::Serialize)]
pub struct A<'a> {
a: &'a str,
b: &'a str,
c: &'a str,
d: i64,
e: f64,
#[serde(with = "serde_bytes")]
f: &'a [u8],
}
fn benchmark(c: &mut Criterion) {
let val = A {
a: "Now this is a story all about how
My life got flipped turned upside down
And I'd like to take a minute, just sit right there
I'll tell you how I became the prince of a town called Bel-Air",
b: "In West Philadelphia born and raised
On the playground is where I spent most of my days
Chillin' out, maxin', relaxin' all cool
And all shootin' some b-ball outside of the school
When a couple of guys who were up to no good
Started makin' trouble in my neighborhood",
c: "I got in one little fight and my mom got scared
And said 'You're movin' with your auntie and uncle in Bel-Air'",
d: 420,
e: 420.69696969696969,
f: "Above are some popular 'pop culture' references for your perusal and enjoyment"
.as_bytes(),
};
c.bench_function("borrowed: mongodb's bson", |b| {
let mut theirs = Vec::new();
b.iter(|| {
bson::ser::to_document(black_box(&val))
.unwrap()
.to_writer(&mut theirs)
.unwrap();
theirs.clear();
})
});
c.bench_function("borrowed: serde_bson", |b| {
let mut out = bytes::BytesMut::new();
b.iter(|| {
serde_bson::to_string(black_box(&val), &mut out).unwrap();
drop(out.split());
});
});
}
criterion_group!(benches, benchmark);
criterion_main!(benches);
@@ -0,0 +1,60 @@
use bytes::BufMut;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
#[derive(serde::Serialize)]
pub struct A {
a: String,
b: String,
c: String,
d: i64,
e: f64,
#[serde(with = "serde_bytes")]
f: Vec<u8>,
}
fn benchmark(c: &mut Criterion) {
let val = A {
a: "Now this is a story all about how
My life got flipped turned upside down
And I'd like to take a minute, just sit right there
I'll tell you how I became the prince of a town called Bel-Air"
.to_string(),
b: "In West Philadelphia born and raised
On the playground is where I spent most of my days
Chillin' out, maxin', relaxin' all cool
And all shootin' some b-ball outside of the school
When a couple of guys who were up to no good
Started makin' trouble in my neighborhood"
.to_string(),
c: "I got in one little fight and my mom got scared
And said 'You're movin' with your auntie and uncle in Bel-Air'"
.to_string(),
d: 420,
e: 420.69696969696969,
f: "Above are some popular 'pop culture' references for your perusal and enjoyment".into(),
};
c.bench_function("owned: mongodb's bson", |b| {
let mut theirs = Vec::new();
b.iter(|| {
bson::ser::to_document(black_box(&val))
.unwrap()
.to_writer(&mut theirs)
.unwrap();
theirs.clear();
})
});
c.bench_function("owned: serde_bson", |b| {
let mut out = bytes::BytesMut::new();
b.iter(|| {
serde_bson::to_string(black_box(&val), &mut out).unwrap();
drop(out.split());
});
});
}
criterion_group!(benches, benchmark);
criterion_main!(benches);
@@ -0,0 +1,31 @@
use std::fmt::{Display, Formatter};
#[derive(Debug)]
pub enum Error {
NotSerializingStruct,
Serde(String),
UnsignedIntNotInSpec,
}
impl Display for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotSerializingStruct => write!(
f,
"individual values cannot be serialised, try serialising a struct instead"
),
Self::Serde(context) => write!(f, "error from value serialiser: {}", context),
Self::UnsignedIntNotInSpec => {
write!(f, "unsigned ints are not supported in the bson spec")
}
}
}
}
impl std::error::Error for Error {}
impl serde::ser::Error for Error {
fn custom<T: Display>(msg: T) -> Self {
Error::Serde(msg.to_string())
}
}
@@ -0,0 +1,71 @@
mod error;
pub mod ser;
pub use error::Error;
use bytes::{BufMut, BytesMut};
use serde::Serialize;
pub fn to_string<T: Serialize>(val: &T, output: &mut BytesMut) -> Result<(), Error> {
const SIZE_OF_SIZE: usize = std::mem::size_of::<i32>();
output.put_i32(0);
let mut size = output.split_to(SIZE_OF_SIZE);
val.serialize(ser::Serializer { key: None, output })?;
for (i, byte) in ((output.len() + SIZE_OF_SIZE) as i32)
.to_le_bytes()
.iter()
.enumerate()
{
size[i] = *byte;
}
take_mut::take(output, move |output| {
size.unsplit(output);
size
});
Ok(())
}
#[cfg(test)]
mod test {
use super::to_string;
use bytes::{BufMut, BytesMut};
use serde::Serialize;
#[test]
pub fn test_basic() {
#[derive(Serialize)]
pub struct A<'a> {
cool: i32,
#[serde(with = "serde_bytes")]
beans: &'a [u8],
bro: &'a str,
}
let test = &A {
cool: 999,
beans: "so there was this one time at bandcamp".as_bytes(),
bro: "the craziest thing happened",
};
let mut ours = BytesMut::new();
to_string(&test, &mut ours);
let mut theirs = BytesMut::new().writer();
bson::ser::to_document(&test)
.unwrap()
.to_writer(&mut theirs)
.unwrap();
assert_eq!(ours, theirs.into_inner());
}
}
@@ -0,0 +1,256 @@
use crate::Error;
use bytes::BufMut;
use serde::Serialize;
use std::convert::TryFrom;
pub struct Serializer<'a, B: BufMut> {
pub key: Option<&'static str>,
pub output: &'a mut B,
}
macro_rules! write_key_or_error {
($id:literal, $key:expr, $output:expr) => {
if let Some(key) = $key {
$output.put_u8($id);
$output.put_slice(key.as_bytes());
$output.put_u8(0x00);
} else {
return Err(Error::NotSerializingStruct);
}
};
}
impl<'a, B: BufMut> serde::Serializer for Serializer<'a, B> {
type Ok = ();
type Error = Error;
type SerializeSeq = serde::ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = serde::ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = serde::ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = serde::ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = serde::ser::Impossible<Self::Ok, Self::Error>;
type SerializeStruct = StructSerializer<'a, B>;
type SerializeStructVariant = serde::ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x01, self.key, self.output);
self.output.put_u8(v as u8);
Ok(())
}
fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
self.serialize_i32(v as i32)
}
fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
self.serialize_i32(v as i32)
}
fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x10, self.key, self.output);
self.output.put_i32_le(v);
Ok(())
}
fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x12, self.key, self.output);
self.output.put_i64_le(v);
Ok(())
}
fn serialize_u8(self, _v: u8) -> Result<Self::Ok, Self::Error> {
Err(Error::UnsignedIntNotInSpec)
}
fn serialize_u16(self, _v: u16) -> Result<Self::Ok, Self::Error> {
Err(Error::UnsignedIntNotInSpec)
}
fn serialize_u32(self, _v: u32) -> Result<Self::Ok, Self::Error> {
Err(Error::UnsignedIntNotInSpec)
}
fn serialize_u64(self, _v: u64) -> Result<Self::Ok, Self::Error> {
Err(Error::UnsignedIntNotInSpec)
}
fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
self.serialize_f64(v as f64)
}
fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x01, self.key, self.output);
self.output.put_f64_le(v);
Ok(())
}
fn serialize_char(self, _: char) -> Result<Self::Ok, Self::Error> {
Err(Error::UnsignedIntNotInSpec)
}
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x02, self.key, self.output);
let v = v.as_bytes();
let len = i32::try_from(v.len() + 1) .unwrap_or_else(|_| panic!(
"encoded string exceeds max size: {}",
i32::MAX - 1
));
self.output.put_i32_le(len);
self.output.put_slice(v);
self.output.put_u8(0x00);
Ok(())
}
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x05, self.key, self.output);
let len = i32::try_from(v.len())
.unwrap_or_else(|_| panic!("bytes exceeds max size: {}", i32::MAX));
self.output.put_i32_le(len);
self.output.put_u8(0x00); self.output.put_slice(v);
Ok(())
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
write_key_or_error!(0x0A, self.key, self.output);
Ok(())
}
fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
value.serialize(self)
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
self.serialize_none()
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
unimplemented!("unit struct")
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
unimplemented!("unit variant")
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
todo!("newtype variant")
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
todo!("seq")
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
todo!("tuple")
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
todo!("tuple struct")
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
todo!("tuple variant")
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
todo!("map")
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
if self.key.is_some() {
todo!("nested struct: {:?}", self.key);
}
Ok(StructSerializer {
output: self.output,
})
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
todo!("struct variant")
}
}
pub struct StructSerializer<'a, B: BufMut> {
output: &'a mut B,
}
impl<'a, B: BufMut> serde::ser::SerializeStruct for StructSerializer<'a, B> {
type Ok = ();
type Error = <Serializer<'a, B> as serde::Serializer>::Error;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where
T: Serialize,
{
value.serialize(Serializer {
key: Some(key),
output: &mut self.output,
})
}
fn end(self) -> Result<Self::Ok, Self::Error> {
self.output.put_u8(0x00); Ok(())
}
}