Skip to content

Commit db3e839

Browse files
committed
Improve the Checksum implementation
1 parent 85cc8de commit db3e839

File tree

5 files changed

+183
-34
lines changed

5 files changed

+183
-34
lines changed

heed/src/envs/env_open_options.rs

Lines changed: 126 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#[cfg(master3)]
2+
use std::any::TypeId;
13
use std::ffi::CString;
24
#[cfg(windows)]
35
use std::ffi::OsStr;
@@ -14,10 +16,12 @@ use std::{io, ptr};
1416
use aead::{generic_array::typenum::Unsigned, AeadCore, AeadMutInPlace, Key, KeyInit};
1517
use synchronoise::SignalEvent;
1618

19+
#[cfg(master3)]
20+
use super::checksum_func_wrapper;
1721
#[cfg(master3)]
1822
use super::encrypted_env::{encrypt_func_wrapper, EncryptedEnv};
1923
use super::env::Env;
20-
use super::{canonicalize_path, OPENED_ENV};
24+
use super::{canonicalize_path, Checksum, NoChecksum, OPENED_ENV};
2125
#[cfg(windows)]
2226
use crate::envs::OsStrExtLmdb as _;
2327
use crate::mdb::error::mdb_result;
@@ -28,28 +32,28 @@ use crate::{EnvFlags, Error, Result};
2832
/// Options and flags which can be used to configure how an environment is opened.
2933
#[derive(Debug, PartialEq, Eq)]
3034
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31-
pub struct EnvOpenOptions<T: TlsUsage> {
35+
pub struct EnvOpenOptions<T: TlsUsage, C: Checksum> {
3236
map_size: Option<usize>,
3337
max_readers: Option<u32>,
3438
max_dbs: Option<u32>,
3539
flags: EnvFlags,
36-
_tls_marker: PhantomData<T>,
40+
_marker: PhantomData<(T, C)>,
3741
}
3842

39-
impl EnvOpenOptions<WithTls> {
43+
impl EnvOpenOptions<WithTls, NoChecksum> {
4044
/// Creates a blank new set of options ready for configuration.
41-
pub fn new() -> EnvOpenOptions<WithTls> {
45+
pub fn new() -> EnvOpenOptions<WithTls, NoChecksum> {
4246
EnvOpenOptions {
4347
map_size: None,
4448
max_readers: None,
4549
max_dbs: None,
4650
flags: EnvFlags::empty(),
47-
_tls_marker: PhantomData,
51+
_marker: PhantomData,
4852
}
4953
}
5054
}
5155

52-
impl<T: TlsUsage> EnvOpenOptions<T> {
56+
impl<T: TlsUsage, C: Checksum + 'static> EnvOpenOptions<T, C> {
5357
/// Make the read transactions `!Send` by specifying they will
5458
/// use Thread Local Storage (TLS). It is often faster to open
5559
/// TLS-backed transactions.
@@ -81,9 +85,9 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
8185
/// is_sendable(rtxn);
8286
/// # Ok(()) }
8387
/// ```
84-
pub fn read_txn_with_tls(self) -> EnvOpenOptions<WithTls> {
85-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker: _ } = self;
86-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker: PhantomData }
88+
pub fn read_txn_with_tls(self) -> EnvOpenOptions<WithTls, C> {
89+
let Self { map_size, max_readers, max_dbs, flags, _marker: _ } = self;
90+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
8791
}
8892

8993
/// Make the read transactions `Send` by specifying they will
@@ -126,9 +130,105 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
126130
/// is_sendable(rtxn);
127131
/// # Ok(()) }
128132
/// ```
129-
pub fn read_txn_without_tls(self) -> EnvOpenOptions<WithoutTls> {
130-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker: _ } = self;
131-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker: PhantomData }
133+
pub fn read_txn_without_tls(self) -> EnvOpenOptions<WithoutTls, C> {
134+
let Self { map_size, max_readers, max_dbs, flags, _marker: _ } = self;
135+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
136+
}
137+
138+
#[cfg(master3)]
139+
/// Changes the checksum algorithm to use.
140+
///
141+
/// # Basic Example
142+
///
143+
/// Creates and open a database. The [`Env`] is using a [`crc`](https://github.com/mrhooray/crc-rs)
144+
/// algorithm.
145+
///
146+
/// Note that you cannot use **any** type of crc algorithm as it is possible to tell
147+
/// the size of the crc to LMDB.
148+
///
149+
/// ```
150+
/// use std::fs;
151+
/// use std::path::Path;
152+
/// use memchr::memmem::find;
153+
/// use argon2::Argon2;
154+
/// use chacha20poly1305::{ChaCha20Poly1305, Key};
155+
/// use heed3::types::*;
156+
/// use heed3::{EnvOpenOptions, Checksum, Database, Error, MdbError};
157+
///
158+
/// /// A checksum algorithm based on the well-known CRC_32_BZIP2.
159+
/// enum Crc32Bzip2 {}
160+
///
161+
/// impl Checksum for Crc32Bzip2 {
162+
/// // Be careful the size is in bytes not bits.
163+
/// const SIZE: u32 = 32 / 8;
164+
///
165+
/// fn checksum(input: &[u8], output: &mut [u8], _key: Option<&[u8]>) {
166+
/// let sum = crc::Crc::<u32>::new(&crc::CRC_32_BZIP2).checksum(input);
167+
/// output.copy_from_slice(&sum.to_ne_bytes());
168+
/// }
169+
/// }
170+
///
171+
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
172+
/// let env_path = tempfile::tempdir()?;
173+
///
174+
/// fs::create_dir_all(&env_path)?;
175+
///
176+
/// // We open the environment
177+
/// let mut options = EnvOpenOptions::new().checksum::<Crc32Bzip2>();
178+
/// let env = unsafe {
179+
/// options
180+
/// .map_size(10 * 1024 * 1024) // 10MB
181+
/// .max_dbs(3)
182+
/// .open(&env_path)?
183+
/// };
184+
///
185+
/// let key1 = "first-key";
186+
/// let val1 = "this is my first value";
187+
/// let key2 = "second-key";
188+
/// let val2 = "this is a second information";
189+
///
190+
/// // We create a database and write values in it
191+
/// let mut wtxn = env.write_txn()?;
192+
/// let db = env.create_database::<Str, Str>(&mut wtxn, Some("first"))?;
193+
/// db.put(&mut wtxn, key1, val1)?;
194+
/// db.put(&mut wtxn, key2, val2)?;
195+
/// wtxn.commit()?;
196+
///
197+
/// // We check that we can read the values back
198+
/// let rtxn = env.read_txn()?;
199+
/// assert_eq!(db.get(&rtxn, key1)?, Some(val1));
200+
/// assert_eq!(db.get(&rtxn, key2)?, Some(val2));
201+
/// drop(rtxn);
202+
///
203+
/// // We close the env and check that we can read in it
204+
/// env.prepare_for_closing().wait();
205+
///
206+
/// // We modify the content of the data file
207+
/// let mut content = fs::read(env_path.path().join("data.mdb"))?;
208+
/// let pos = find(&content, b"value").unwrap();
209+
/// content[pos..pos + 5].copy_from_slice(b"thing");
210+
/// fs::write(env_path.path().join("data.mdb"), content)?;
211+
///
212+
/// // We reopen the environment
213+
/// let mut options = EnvOpenOptions::new().checksum::<Crc32Bzip2>();
214+
/// let env = unsafe {
215+
/// options
216+
/// .map_size(10 * 1024 * 1024) // 10MB
217+
/// .max_dbs(3)
218+
/// .open(&env_path)?
219+
/// };
220+
///
221+
/// // We check that we can read the values back
222+
/// let rtxn = env.read_txn()?;
223+
/// let db = env.open_database::<Str, Str>(&rtxn, Some("first"))?.unwrap();
224+
/// assert!(matches!(db.get(&rtxn, key1).unwrap_err(), Error::Mdb(MdbError::BadChecksum)));
225+
/// drop(rtxn);
226+
///
227+
/// # Ok(()) }
228+
/// ```
229+
pub fn checksum<NC: Checksum>(self) -> EnvOpenOptions<T, NC> {
230+
let Self { map_size, max_readers, max_dbs, flags, _marker } = self;
231+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
132232
}
133233

134234
/// Set the size of the memory map to use for this environment.
@@ -237,18 +337,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
237337
path.as_ref(),
238338
#[cfg(master3)]
239339
None,
240-
#[cfg(master3)]
241-
None,
242-
)
243-
}
244-
245-
pub unsafe fn open_checksummed<P: AsRef<Path>>(&self, path: P) -> Result<Env<T>> {
246-
self.raw_open_with_checksum_and_encryption(
247-
path.as_ref(),
248-
#[cfg(master3)]
249-
None,
250-
#[cfg(master3)]
251-
None,
252340
)
253341
}
254342

@@ -404,7 +492,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
404492
{
405493
self.raw_open_with_checksum_and_encryption(
406494
path.as_ref(),
407-
None,
408495
Some((Some(encrypt_func_wrapper::<E>), &key, <E as AeadCore>::TagSize::U32)),
409496
)
410497
.map(|inner| EncryptedEnv { inner })
@@ -413,7 +500,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
413500
fn raw_open_with_checksum_and_encryption(
414501
&self,
415502
path: &Path,
416-
#[cfg(master3)] sum: Option<(ffi::MDB_sum_func, u32)>,
417503
#[cfg(master3)] enc: Option<(ffi::MDB_enc_func, &[u8], u32)>,
418504
) -> Result<Env<T>> {
419505
let mut lock = OPENED_ENV.write().unwrap();
@@ -451,6 +537,15 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
451537
))?;
452538
}
453539

540+
#[cfg(master3)]
541+
if TypeId::of::<C>() != TypeId::of::<NoChecksum>() {
542+
mdb_result(ffi::mdb_env_set_checksum(
543+
env,
544+
Some(checksum_func_wrapper::<C>),
545+
C::SIZE,
546+
))?;
547+
}
548+
454549
if let Some(size) = self.map_size {
455550
if size % page_size::get() != 0 {
456551
let msg = format!(
@@ -496,15 +591,15 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
496591
}
497592
}
498593

499-
impl Default for EnvOpenOptions<WithTls> {
594+
impl Default for EnvOpenOptions<WithTls, NoChecksum> {
500595
fn default() -> Self {
501596
Self::new()
502597
}
503598
}
504599

505-
impl<T: TlsUsage> Clone for EnvOpenOptions<T> {
600+
impl<T: TlsUsage, C: Checksum> Clone for EnvOpenOptions<T, C> {
506601
fn clone(&self) -> Self {
507-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker } = *self;
508-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker }
602+
let Self { map_size, max_readers, max_dbs, flags, _marker } = *self;
603+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker }
509604
}
510605
}

heed/src/envs/mod.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,55 @@ impl FlagSetMode {
288288
}
289289
}
290290
}
291+
292+
/// A trait defining how to calculate checksum within the environment.
293+
///
294+
/// Enabling checksumming is not supported in the heed crate and
295+
/// can only be modified within the heed3 crate.
296+
pub trait Checksum {
297+
/// The size of computed checksum values, in bytes.
298+
const SIZE: u32;
299+
300+
/// Compute the checksum of the data in input and store the
301+
/// result in output, an optional key may be used with keyed
302+
/// hash algorithms.
303+
///
304+
/// The key parameter is an encryption key, if encryption was
305+
/// configured. This parameter will be NULL if there is no key.
306+
fn checksum(input: &[u8], output: &mut [u8], key: Option<&[u8]>);
307+
}
308+
309+
/// Deactivate environment checksumming.
310+
///
311+
/// Enabling checksumming is not supported in the heed crate and
312+
/// can only be modified within the heed3 crate.
313+
pub enum NoChecksum {}
314+
315+
impl Checksum for NoChecksum {
316+
const SIZE: u32 = 0;
317+
fn checksum(_input: &[u8], _output: &mut [u8], _key: Option<&[u8]>) {}
318+
}
319+
320+
/// The wrapper function that is called by LMDB that directly calls
321+
/// the Rust idiomatic function internally.
322+
#[cfg(master3)]
323+
unsafe extern "C" fn checksum_func_wrapper<C: Checksum>(
324+
src: *const ffi::MDB_val,
325+
dst: *mut ffi::MDB_val,
326+
key_ptr: *const ffi::MDB_val,
327+
) {
328+
let result = std::panic::catch_unwind(|| {
329+
let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size);
330+
let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size);
331+
let key = if key_ptr.is_null() {
332+
None
333+
} else {
334+
Some(std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size))
335+
};
336+
C::checksum(input, output, key)
337+
});
338+
339+
if result.is_err() {
340+
std::process::abort();
341+
}
342+
}

heed/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ pub use self::databases::{EncryptedDatabase, EncryptedDatabaseOpenOptions};
9292
#[cfg(master3)]
9393
pub use self::envs::EncryptedEnv;
9494
pub use self::envs::{
95-
env_closing_event, CompactionOption, DefaultComparator, Env, EnvClosingEvent, EnvInfo,
96-
EnvOpenOptions, FlagSetMode, IntegerComparator,
95+
env_closing_event, Checksum, CompactionOption, DefaultComparator, Env, EnvClosingEvent,
96+
EnvInfo, EnvOpenOptions, FlagSetMode, IntegerComparator, NoChecksum,
9797
};
9898
pub use self::iterator::{
9999
RoIter, RoPrefix, RoRange, RoRevIter, RoRevPrefix, RoRevRange, RwIter, RwPrefix, RwRange,

heed/src/mdb/lmdb_ffi.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pub use ffi::{
1111
MDB_RDONLY, MDB_RESERVE,
1212
};
1313
#[cfg(master3)]
14-
pub use ffi::{mdb_env_set_encrypt, MDB_enc_func, MDB_sum_func};
14+
pub use ffi::{mdb_env_set_checksum, mdb_env_set_encrypt, MDB_enc_func};
1515
#[cfg(master3)]
1616
use lmdb_master3_sys as ffi;
1717
#[cfg(not(master3))]

heed3/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ synchronoise = "1.0.1"
3232
[dev-dependencies]
3333
# TODO update dependencies
3434
argon2 = { version = "0.5.3", features = ["std"] }
35+
crc = "3.2.1"
36+
memchr = "2.7.4"
3537
serde = { version = "1.0.215", features = ["derive"] }
3638
chacha20poly1305 = "0.10.1"
3739
tempfile = "3.14.0"

0 commit comments

Comments
 (0)