diff --git a/Cargo.toml b/Cargo.toml index 2ae7bfce..a4e3757c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,8 @@ regex = "1.5" lru = "0.10" bitvec = "1.0" tempfile = "3.1" +bson = "2.7" [features] noreadlock = [] -eprint_log = [] \ No newline at end of file +eprint_log = [] diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs index 9172a8f4..3e1cbbcd 100644 --- a/src/structure/tfc/block.rs +++ b/src/structure/tfc/block.rs @@ -751,6 +751,8 @@ fn record_size_decoding(enc: u8) -> Option { 0 => None, 3 => Some(4), 4 => Some(8), + 5 => Some(12), + 6 => Some(16), _ => panic!("Ok, this is not known"), } } @@ -760,6 +762,8 @@ fn record_size_encoding(record_size: Option) -> u8 { None => 0, Some(4) => 3 << 3, Some(8) => 4 << 3, + Some(12) => 5 << 3, + Some(16) => 6 << 3, _ => { panic!("This is really bad!") } diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs index a9b11234..d0dccf91 100644 --- a/src/structure/tfc/datatypes.rs +++ b/src/structure/tfc/datatypes.rs @@ -5,6 +5,7 @@ use super::{ TypedDictEntry, }; use base64::display::Base64Display; +use bson::Decimal128; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use chrono::{NaiveDateTime, NaiveTime}; @@ -59,6 +60,14 @@ pub enum Datatype { Base64Binary, HexBinary, AnySimpleType, + + Decimal128, + BSONObjectId, + TimeStamp64, + BSONTimeStamp, + Regex, + Javascript, + BSONBinary, } impl Datatype { @@ -84,6 +93,10 @@ impl Datatype { Datatype::BigInt => None, Datatype::Token => None, Datatype::LangString => None, + Datatype::Decimal128 => Some(16), + Datatype::BSONObjectId => Some(12), + Datatype::TimeStamp64 => Some(8), + Datatype::BSONTimeStamp => Some(8), _ => None, } } @@ -998,6 +1011,67 @@ impl TdbDataType for HexBinary { } } +pub struct BSONObjectId([u8; 12]); + +impl FromLexical for BSONObjectId { + fn from_lexical(mut b: B) -> Self { + let mut result = [0; 12]; + b.copy_to_slice(&mut result); + + BSONObjectId(result) + } +} + +impl ToLexical for BSONObjectId { + fn to_lexical(&self) -> Bytes { + Bytes::copy_from_slice(&self.0) + } +} + +impl ToLexical for [u8; 12] { + fn to_lexical(&self) -> Bytes { + Bytes::copy_from_slice(self) + } +} + +impl TdbDataType for BSONObjectId { + fn datatype() -> Datatype { + Datatype::BSONObjectId + } +} + +const DEC128_SIGN_MASK: u128 = 0x8000_0000_0000_0000_0000_0000_0000_0000; +const DEC128_COMPLEMENT: u128 = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff; + +impl ToLexical for Decimal128 { + fn to_lexical(&self) -> Bytes { + let bits: u128 = u128::from_le_bytes(self.bytes()); + let transformed = if bits & DEC128_SIGN_MASK > 0 { + bits ^ DEC128_COMPLEMENT + } else { + bits ^ DEC128_SIGN_MASK + }; + Bytes::copy_from_slice(&transformed.to_be_bytes()) + } +} + +impl FromLexical for Decimal128 { + fn from_lexical(mut b: B) -> Self { + let i = b.get_u128(); + if i & DEC128_SIGN_MASK > 0 { + Decimal128::from_bytes((i ^ DEC128_SIGN_MASK).to_le_bytes()) + } else { + Decimal128::from_bytes((i ^ DEC128_COMPLEMENT).to_le_bytes()) + } + } +} + +impl TdbDataType for Decimal128 { + fn datatype() -> Datatype { + Datatype::Decimal128 + } +} + macro_rules! stringy_type { ($ty:ident) => { stringy_type!($ty, $ty); @@ -1082,6 +1156,39 @@ macro_rules! biginty_type { }; } +macro_rules! u64y_type { + ($ty:ident) => { + u64y_type!($ty, $ty); + }; + ($ty:ident, $datatype:ident) => { + #[derive(PartialEq, Debug)] + pub struct $ty(pub u64); + + impl TdbDataType for $ty { + fn datatype() -> Datatype { + Datatype::$datatype + } + } + + impl FromLexical<$ty> for $ty { + fn from_lexical(b: B) -> Self { + $ty(FromLexical::::from_lexical(b)) + } + } + impl FromLexical<$ty> for u64 { + fn from_lexical(b: B) -> Self { + FromLexical::::from_lexical(b) + } + } + + impl ToLexical<$ty> for $ty { + fn to_lexical(&self) -> Bytes { + self.0.to_lexical() + } + } + }; +} + stringy_type!(LangString); stringy_type!(NCName); stringy_type!(Name); @@ -1098,7 +1205,13 @@ stringy_type!(Entity); stringy_type!(AnySimpleType); +stringy_type!(Regex); +stringy_type!(Javascript); + biginty_type!(PositiveInteger); biginty_type!(NonNegativeInteger); biginty_type!(NegativeInteger); biginty_type!(NonPositiveInteger); + +u64y_type!(TimeStamp64); +u64y_type!(BSONTimeStamp); diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs index 2494d07d..b3ab57c9 100644 --- a/src/structure/tfc/typed.rs +++ b/src/structure/tfc/typed.rs @@ -498,11 +498,12 @@ impl TypedDictBufBuilder = dict.iter().collect(); + eprintln!("{entries:?}"); + eprintln!("{data:?}"); + panic!("wah"); + } + + #[test] + fn test_bson_objectid() { + let used_types_buf = BytesMut::new(); + let type_offsets_buf = BytesMut::new(); + let block_offsets_buf = BytesMut::new(); + let data_buf = BytesMut::new(); + + let mut typed_builder = TypedDictBufBuilder::new( + used_types_buf, + type_offsets_buf, + block_offsets_buf, + data_buf, + ); + + let mut vec = vec![ + BSONObjectId::make_entry(&[42; 12]), + BSONObjectId::make_entry(&[43; 12]), + BSONObjectId::make_entry(&[44; 12]), + BSONObjectId::make_entry(&[41; 12]), + BSONObjectId::make_entry(&[25; 12]), + ]; + vec.sort(); + typed_builder.add_all(vec.into_iter()); + let (b1, b2, b3, b4) = typed_builder.finalize(); + let data = b4.freeze(); + let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone()); + + let entries: Vec<_> = dict.iter().collect(); + eprintln!("{entries:?}"); + eprintln!("{data:?}"); + panic!("wah"); + } + + #[test] + fn test_bson_objectid_overlap() { + let used_types_buf = BytesMut::new(); + let type_offsets_buf = BytesMut::new(); + let block_offsets_buf = BytesMut::new(); + let data_buf = BytesMut::new(); + + let mut typed_builder = TypedDictBufBuilder::new( + used_types_buf, + type_offsets_buf, + block_offsets_buf, + data_buf, + ); + + let mut ids = [[42; 12], [43; 12], [44; 12], [41; 12], [25; 12]]; + for id in ids.iter_mut() { + id[0] = 42; + } + + let mut vec: Vec<_> = ids.iter().map(BSONObjectId::make_entry).collect(); + vec.sort(); + typed_builder.add_all(vec.into_iter()); + let (b1, b2, b3, b4) = typed_builder.finalize(); + let data = b4.freeze(); + let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone()); + + let entries: Vec<_> = dict.iter().collect(); + eprintln!("{entries:?}"); + eprintln!("{data:?}"); + panic!("wah"); + } + + #[test] + fn test_decimal128() { + let used_types_buf = BytesMut::new(); + let type_offsets_buf = BytesMut::new(); + let block_offsets_buf = BytesMut::new(); + let data_buf = BytesMut::new(); + + let mut typed_builder = TypedDictBufBuilder::new( + used_types_buf, + type_offsets_buf, + block_offsets_buf, + data_buf, + ); + + let numbers: Vec = [ + "0.1", + "2.3", + "0.00000028", + "1000000", + "4.2", + "-1.3", + "-12", + "-0.0000005", + ] + .iter() + .map(|n| n.parse().unwrap()) + .collect(); + + let mut entries: Vec<_> = numbers.iter().map(Decimal128::make_entry).collect(); + entries.sort(); + + typed_builder.add_all(entries.into_iter()); + let (b1, b2, b3, b4) = typed_builder.finalize(); + let data = b4.freeze(); + let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone()); + + let entries: Vec<_> = dict.iter().collect(); + eprintln!("{entries:?}"); + eprintln!("{data:?}"); + eprintln!( + "{:?}", + entries + .iter() + .map(|e| e.as_val::().to_string()) + .collect::>() + ); + panic!("wah"); + } }