Skip to content

Commit 5496b9b

Browse files
committed
Improve the Checksum implementation
1 parent 85cc8de commit 5496b9b

File tree

5 files changed

+185
-34
lines changed

5 files changed

+185
-34
lines changed

Diff for: heed/src/envs/env_open_options.rs

+128-31
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#[cfg(master3)]
2+
use std::any::TypeId;
13
use std::ffi::CString;
24
#[cfg(windows)]
35
use std::ffi::OsStr;
@@ -14,10 +16,12 @@ use std::{io, ptr};
1416
use aead::{generic_array::typenum::Unsigned, AeadCore, AeadMutInPlace, Key, KeyInit};
1517
use synchronoise::SignalEvent;
1618

19+
#[cfg(master3)]
20+
use super::checksum_func_wrapper;
1721
#[cfg(master3)]
1822
use super::encrypted_env::{encrypt_func_wrapper, EncryptedEnv};
1923
use super::env::Env;
20-
use super::{canonicalize_path, OPENED_ENV};
24+
use super::{canonicalize_path, Checksum, NoChecksum, OPENED_ENV};
2125
#[cfg(windows)]
2226
use crate::envs::OsStrExtLmdb as _;
2327
use crate::mdb::error::mdb_result;
@@ -28,28 +32,28 @@ use crate::{EnvFlags, Error, Result};
2832
/// Options and flags which can be used to configure how an environment is opened.
2933
#[derive(Debug, PartialEq, Eq)]
3034
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31-
pub struct EnvOpenOptions<T: TlsUsage> {
35+
pub struct EnvOpenOptions<T: TlsUsage, C: Checksum> {
3236
map_size: Option<usize>,
3337
max_readers: Option<u32>,
3438
max_dbs: Option<u32>,
3539
flags: EnvFlags,
36-
_tls_marker: PhantomData<T>,
40+
_marker: PhantomData<(T, C)>,
3741
}
3842

39-
impl EnvOpenOptions<WithTls> {
43+
impl EnvOpenOptions<WithTls, NoChecksum> {
4044
/// Creates a blank new set of options ready for configuration.
41-
pub fn new() -> EnvOpenOptions<WithTls> {
45+
pub fn new() -> EnvOpenOptions<WithTls, NoChecksum> {
4246
EnvOpenOptions {
4347
map_size: None,
4448
max_readers: None,
4549
max_dbs: None,
4650
flags: EnvFlags::empty(),
47-
_tls_marker: PhantomData,
51+
_marker: PhantomData,
4852
}
4953
}
5054
}
5155

52-
impl<T: TlsUsage> EnvOpenOptions<T> {
56+
impl<T: TlsUsage, C: Checksum + 'static> EnvOpenOptions<T, C> {
5357
/// Make the read transactions `!Send` by specifying they will
5458
/// use Thread Local Storage (TLS). It is often faster to open
5559
/// TLS-backed transactions.
@@ -81,9 +85,9 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
8185
/// is_sendable(rtxn);
8286
/// # Ok(()) }
8387
/// ```
84-
pub fn read_txn_with_tls(self) -> EnvOpenOptions<WithTls> {
85-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker: _ } = self;
86-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker: PhantomData }
88+
pub fn read_txn_with_tls(self) -> EnvOpenOptions<WithTls, C> {
89+
let Self { map_size, max_readers, max_dbs, flags, _marker: _ } = self;
90+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
8791
}
8892

8993
/// Make the read transactions `Send` by specifying they will
@@ -126,9 +130,106 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
126130
/// is_sendable(rtxn);
127131
/// # Ok(()) }
128132
/// ```
129-
pub fn read_txn_without_tls(self) -> EnvOpenOptions<WithoutTls> {
130-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker: _ } = self;
131-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker: PhantomData }
133+
pub fn read_txn_without_tls(self) -> EnvOpenOptions<WithoutTls, C> {
134+
let Self { map_size, max_readers, max_dbs, flags, _marker: _ } = self;
135+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
136+
}
137+
138+
#[cfg(master3)]
139+
/// Changes the checksum algorithm to use.
140+
///
141+
/// # Basic Example
142+
///
143+
/// Creates and open a database. The [`Env`] is using a [`crc`](https://github.com/mrhooray/crc-rs)
144+
/// algorithm.
145+
///
146+
/// Note that you cannot use **any** type of crc algorithm as it is possible to tell
147+
/// the size of the crc to LMDB.
148+
///
149+
/// ```
150+
/// use std::fs;
151+
/// use std::path::Path;
152+
/// use memchr::memmem::find;
153+
/// use argon2::Argon2;
154+
/// use chacha20poly1305::{ChaCha20Poly1305, Key};
155+
/// use heed3::types::*;
156+
/// use heed3::{EnvOpenOptions, Checksum, Database, Error, MdbError};
157+
///
158+
/// /// A checksum algorithm based on the well-known CRC_32_BZIP2.
159+
/// enum Crc32Bzip2 {}
160+
///
161+
/// impl Checksum for Crc32Bzip2 {
162+
/// // Be careful the size is in bytes not bits.
163+
/// const SIZE: u32 = 32 / 8;
164+
///
165+
/// fn checksum(input: &[u8], output: &mut [u8], _key: Option<&[u8]>) {
166+
/// let sum = crc::Crc::<u32>::new(&crc::CRC_32_BZIP2).checksum(input);
167+
/// eprintln!("checksumming {input:?} which gives {sum:?}");
168+
/// output.copy_from_slice(&sum.to_ne_bytes());
169+
/// }
170+
/// }
171+
///
172+
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
173+
/// let env_path = tempfile::tempdir()?;
174+
///
175+
/// fs::create_dir_all(&env_path)?;
176+
///
177+
/// // We open the environment
178+
/// let mut options = EnvOpenOptions::new().checksum::<Crc32Bzip2>();
179+
/// let env = unsafe {
180+
/// options
181+
/// .map_size(10 * 1024 * 1024) // 10MB
182+
/// .max_dbs(3)
183+
/// .open(&env_path)?
184+
/// };
185+
///
186+
/// let key1 = "first-key";
187+
/// let val1 = "this is my first value";
188+
/// let key2 = "second-key";
189+
/// let val2 = "this is a second information";
190+
///
191+
/// // We create a database and write values in it
192+
/// let mut wtxn = env.write_txn()?;
193+
/// let db = env.create_database::<Str, Str>(&mut wtxn, Some("first"))?;
194+
/// db.put(&mut wtxn, key1, val1)?;
195+
/// db.put(&mut wtxn, key2, val2)?;
196+
/// wtxn.commit()?;
197+
///
198+
/// // We check that we can read the values back
199+
/// let rtxn = env.read_txn()?;
200+
/// assert_eq!(db.get(&rtxn, key1)?, Some(val1));
201+
/// assert_eq!(db.get(&rtxn, key2)?, Some(val2));
202+
/// drop(rtxn);
203+
///
204+
/// // We close the env and check that we can read in it
205+
/// env.prepare_for_closing().wait();
206+
///
207+
/// // We modify the content of the data file
208+
/// let mut content = fs::read(env_path.path().join("data.mdb"))?;
209+
/// let pos = find(&content, b"value").unwrap();
210+
/// content[pos..pos + 5].copy_from_slice(b"thing");
211+
/// fs::write(env_path.path().join("data.mdb"), content)?;
212+
///
213+
/// // We reopen the environment
214+
/// let mut options = EnvOpenOptions::new().checksum::<Crc32Bzip2>();
215+
/// let env = unsafe {
216+
/// options
217+
/// .map_size(10 * 1024 * 1024) // 10MB
218+
/// .max_dbs(3)
219+
/// .open(&env_path)?
220+
/// };
221+
///
222+
/// // We check that we can read the values back
223+
/// let rtxn = env.read_txn()?;
224+
/// let db = env.open_database::<Str, Str>(&rtxn, Some("first"))?.unwrap();
225+
/// assert!(matches!(db.get(&rtxn, key1).unwrap_err(), Error::Mdb(MdbError::BadChecksum)));
226+
/// drop(rtxn);
227+
///
228+
/// # Ok(()) }
229+
/// ```
230+
pub fn checksum<NC: Checksum>(self) -> EnvOpenOptions<T, NC> {
231+
let Self { map_size, max_readers, max_dbs, flags, _marker } = self;
232+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker: PhantomData }
132233
}
133234

134235
/// Set the size of the memory map to use for this environment.
@@ -237,18 +338,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
237338
path.as_ref(),
238339
#[cfg(master3)]
239340
None,
240-
#[cfg(master3)]
241-
None,
242-
)
243-
}
244-
245-
pub unsafe fn open_checksummed<P: AsRef<Path>>(&self, path: P) -> Result<Env<T>> {
246-
self.raw_open_with_checksum_and_encryption(
247-
path.as_ref(),
248-
#[cfg(master3)]
249-
None,
250-
#[cfg(master3)]
251-
None,
252341
)
253342
}
254343

@@ -404,7 +493,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
404493
{
405494
self.raw_open_with_checksum_and_encryption(
406495
path.as_ref(),
407-
None,
408496
Some((Some(encrypt_func_wrapper::<E>), &key, <E as AeadCore>::TagSize::U32)),
409497
)
410498
.map(|inner| EncryptedEnv { inner })
@@ -413,7 +501,6 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
413501
fn raw_open_with_checksum_and_encryption(
414502
&self,
415503
path: &Path,
416-
#[cfg(master3)] sum: Option<(ffi::MDB_sum_func, u32)>,
417504
#[cfg(master3)] enc: Option<(ffi::MDB_enc_func, &[u8], u32)>,
418505
) -> Result<Env<T>> {
419506
let mut lock = OPENED_ENV.write().unwrap();
@@ -451,6 +538,16 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
451538
))?;
452539
}
453540

541+
#[cfg(master3)]
542+
if TypeId::of::<C>() != TypeId::of::<NoChecksum>() {
543+
eprintln!("Doing some checksumming stuff");
544+
mdb_result(ffi::mdb_env_set_checksum(
545+
env,
546+
Some(checksum_func_wrapper::<C>),
547+
C::SIZE,
548+
))?;
549+
}
550+
454551
if let Some(size) = self.map_size {
455552
if size % page_size::get() != 0 {
456553
let msg = format!(
@@ -496,15 +593,15 @@ impl<T: TlsUsage> EnvOpenOptions<T> {
496593
}
497594
}
498595

499-
impl Default for EnvOpenOptions<WithTls> {
596+
impl Default for EnvOpenOptions<WithTls, NoChecksum> {
500597
fn default() -> Self {
501598
Self::new()
502599
}
503600
}
504601

505-
impl<T: TlsUsage> Clone for EnvOpenOptions<T> {
602+
impl<T: TlsUsage, C: Checksum> Clone for EnvOpenOptions<T, C> {
506603
fn clone(&self) -> Self {
507-
let Self { map_size, max_readers, max_dbs, flags, _tls_marker } = *self;
508-
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _tls_marker }
604+
let Self { map_size, max_readers, max_dbs, flags, _marker } = *self;
605+
EnvOpenOptions { map_size, max_readers, max_dbs, flags, _marker }
509606
}
510607
}

Diff for: heed/src/envs/mod.rs

+52
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,55 @@ impl FlagSetMode {
288288
}
289289
}
290290
}
291+
292+
/// A trait defining how to calculate checksum within the environment.
293+
///
294+
/// Enabling checksumming is not supported in the heed crate and
295+
/// can only be modified within the heed3 crate.
296+
pub trait Checksum {
297+
/// The size of computed checksum values, in bytes.
298+
const SIZE: u32;
299+
300+
/// Compute the checksum of the data in input and store the
301+
/// result in output, an optional key may be used with keyed
302+
/// hash algorithms.
303+
///
304+
/// The key parameter is an encryption key, if encryption was
305+
/// configured. This parameter will be NULL if there is no key.
306+
fn checksum(input: &[u8], output: &mut [u8], key: Option<&[u8]>);
307+
}
308+
309+
/// Deactivate environment checksumming.
310+
///
311+
/// Enabling checksumming is not supported in the heed crate and
312+
/// can only be modified within the heed3 crate.
313+
pub enum NoChecksum {}
314+
315+
impl Checksum for NoChecksum {
316+
const SIZE: u32 = 0;
317+
fn checksum(_input: &[u8], _output: &mut [u8], _key: Option<&[u8]>) {}
318+
}
319+
320+
/// The wrapper function that is called by LMDB that directly calls
321+
/// the Rust idiomatic function internally.
322+
#[cfg(master3)]
323+
unsafe extern "C" fn checksum_func_wrapper<C: Checksum>(
324+
src: *const ffi::MDB_val,
325+
dst: *mut ffi::MDB_val,
326+
key_ptr: *const ffi::MDB_val,
327+
) {
328+
let result = std::panic::catch_unwind(|| {
329+
let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size);
330+
let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size);
331+
let key = if key_ptr.is_null() {
332+
None
333+
} else {
334+
Some(std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size))
335+
};
336+
C::checksum(input, output, key)
337+
});
338+
339+
if result.is_err() {
340+
std::process::abort();
341+
}
342+
}

Diff for: heed/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ pub use self::databases::{EncryptedDatabase, EncryptedDatabaseOpenOptions};
9292
#[cfg(master3)]
9393
pub use self::envs::EncryptedEnv;
9494
pub use self::envs::{
95-
env_closing_event, CompactionOption, DefaultComparator, Env, EnvClosingEvent, EnvInfo,
96-
EnvOpenOptions, FlagSetMode, IntegerComparator,
95+
env_closing_event, Checksum, CompactionOption, DefaultComparator, Env, EnvClosingEvent,
96+
EnvInfo, EnvOpenOptions, FlagSetMode, IntegerComparator, NoChecksum,
9797
};
9898
pub use self::iterator::{
9999
RoIter, RoPrefix, RoRange, RoRevIter, RoRevPrefix, RoRevRange, RwIter, RwPrefix, RwRange,

Diff for: heed/src/mdb/lmdb_ffi.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pub use ffi::{
1111
MDB_RDONLY, MDB_RESERVE,
1212
};
1313
#[cfg(master3)]
14-
pub use ffi::{mdb_env_set_encrypt, MDB_enc_func, MDB_sum_func};
14+
pub use ffi::{mdb_env_set_checksum, mdb_env_set_encrypt, MDB_enc_func};
1515
#[cfg(master3)]
1616
use lmdb_master3_sys as ffi;
1717
#[cfg(not(master3))]

Diff for: heed3/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ synchronoise = "1.0.1"
3232
[dev-dependencies]
3333
# TODO update dependencies
3434
argon2 = { version = "0.5.3", features = ["std"] }
35+
crc = "3.2.1"
36+
memchr = "2.7.4"
3537
serde = { version = "1.0.215", features = ["derive"] }
3638
chacha20poly1305 = "0.10.1"
3739
tempfile = "3.14.0"

0 commit comments

Comments
 (0)