Skip to content

Commit e85a8ad

Browse files
0xdeafbeefRexagon
authored andcommitted
perf(core): mt cell store
1 parent 8a872b1 commit e85a8ad

File tree

4 files changed

+152
-47
lines changed

4 files changed

+152
-47
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ bytesize = { version = "1.3.0", features = ["serde"] }
4141
castaway = "0.2"
4242
clap = { version = "4.5.3", features = ["derive"] }
4343
crc32c = "0.6"
44+
crossbeam-deque = "0.8.5"
45+
crossbeam-utils = "0.8.20"
4446
dashmap = "5.5.3"
4547
dirs = "5.0.1"
4648
ed25519 = "2.0"

storage/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ bumpalo = { workspace = true }
1818
bytes = { workspace = true }
1919
bytesize = { workspace = true }
2020
crc32c = { workspace = true }
21+
crossbeam-deque = { workspace = true }
22+
crossbeam-utils = { workspace = true }
2123
dashmap = { workspace = true }
2224
everscale-types = { workspace = true, features = ["tycho", "stats"] }
2325
fdlimit = { workspace = true }

storage/src/store/shard_state/cell_storage.rs

Lines changed: 146 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
use std::cell::UnsafeCell;
2-
use std::collections::hash_map;
2+
use std::collections::{hash_map, VecDeque};
33
use std::mem::{ManuallyDrop, MaybeUninit};
44
use std::sync::atomic::{AtomicI64, AtomicU8, Ordering};
55
use std::sync::{Arc, Weak};
66

77
use anyhow::{Context, Result};
88
use bumpalo::Bump;
9+
use crossbeam_deque::{Steal, Stealer, Worker};
10+
use crossbeam_utils::Backoff;
911
use dashmap::mapref::entry::Entry;
12+
use dashmap::Map;
1013
use everscale_types::cell::*;
1114
use parking_lot::Mutex;
1215
use quick_cache::sync::{Cache, DefaultLifecycle};
@@ -242,40 +245,59 @@ impl CellStorage {
242245
) -> Result<(PendingOperation<'_>, usize), CellStorageError> {
243246
let pending_op = self.pending.begin();
244247

248+
let walk_hist = HistogramGuard::begin("tycho_storage_walk_tree_time");
245249
let ctx = StoreContext::new(&self.db, &self.raw_cells_cache, estimated_cell_count);
246250

247-
// Check root cell
251+
let mut queue = VecDeque::new();
252+
queue.push_back((root, 0usize));
248253

249-
let key = root.repr_hash();
250-
if !ctx.insert_cell(key, root.as_ref(), 0)? {
251-
return Ok((pending_op, 0));
252-
}
253-
254-
let mut stack = Vec::with_capacity(16);
255-
stack.push(root.references());
254+
while let Some((current_cell, current_depth)) = queue.pop_front() {
255+
if !ctx.insert_cell(
256+
current_cell.repr_hash(),
257+
current_cell.as_ref(),
258+
Some(current_depth),
259+
)? {
260+
continue;
261+
}
262+
for next_cell in current_cell.references().cloned() {
263+
queue.push_back((next_cell, current_depth + 1));
264+
}
256265

257-
// Check other cells
258-
'outer: loop {
259-
let depth = stack.len();
260-
let Some(iter) = stack.last_mut() else {
266+
if current_depth == 6 {
261267
break;
262-
};
268+
}
269+
}
263270

264-
for child in &mut *iter {
265-
let key = child.repr_hash();
271+
let num_cpus = std::thread::available_parallelism()
272+
.expect("We don't use platforms where it's not supported")
273+
.get();
274+
if !queue.is_empty() {
275+
let queues = (0..num_cpus)
276+
.map(|_| Worker::new_lifo())
277+
.collect::<Vec<_>>();
266278

267-
if ctx.insert_cell(key, child, depth)? {
268-
stack.push(child.references());
269-
continue 'outer;
270-
}
279+
for (idx, cell) in queue.into_iter().enumerate() {
280+
queues[idx % num_cpus].push(cell.0);
271281
}
272282

273-
stack.pop();
274-
}
283+
let stealers: Vec<_> = queues.iter().map(|w| w.stealer()).collect();
275284

276-
// Clear big chunks of data before finalization
277-
drop(stack);
285+
std::thread::scope(|s| {
286+
for (index, worker) in queues.into_iter().enumerate() {
287+
let mut stealers = stealers.clone();
288+
stealers.remove(index); // we don't want to steal from ourselves
289+
290+
let ctxt = ctx.clone();
291+
s.spawn(move || {
292+
process_worker_queue(worker, stealers, &ctxt)
293+
.expect("todo somehow propagate error");
294+
});
295+
}
296+
});
297+
}
298+
drop(walk_hist);
278299

300+
let ctx = Arc::into_inner(ctx).unwrap();
279301
// Write transaction to the `WriteBatch`
280302
Ok((pending_op, ctx.finalize(batch)))
281303
}
@@ -419,6 +441,53 @@ impl CellStorage {
419441
}
420442
}
421443

444+
fn process_worker_queue(
445+
worker: Worker<Cell>,
446+
stealers: Vec<Stealer<Cell>>,
447+
ctx: &StoreContext,
448+
) -> Result<(), CellStorageError> {
449+
loop {
450+
let Some(cell) = find_task(&worker, &stealers) else {
451+
break Ok(());
452+
};
453+
454+
let cell_hash = *cell.repr_hash();
455+
if !ctx.insert_cell(&cell_hash, cell.as_ref(), None)? {
456+
continue;
457+
}
458+
459+
for c in cell.references().cloned() {
460+
worker.push(c);
461+
}
462+
}
463+
}
464+
465+
fn find_task<T>(local: &Worker<T>, stealers: &[Stealer<T>]) -> Option<T> {
466+
if let Some(task) = local.pop() {
467+
return Some(task);
468+
};
469+
470+
let backoff = Backoff::new();
471+
for stealer in stealers {
472+
'inner: loop {
473+
match stealer.steal_batch_and_pop(local) {
474+
Steal::Empty => {
475+
// todo : always skip it
476+
break 'inner;
477+
}
478+
Steal::Success(t) => {
479+
return Some(t);
480+
}
481+
Steal::Retry => {
482+
backoff.snooze();
483+
continue 'inner;
484+
}
485+
}
486+
}
487+
}
488+
None
489+
}
490+
422491
struct CellWithRefs {
423492
rc: u32,
424493
data: Option<Vec<u8>>,
@@ -431,23 +500,23 @@ struct StoreContext {
431500
}
432501

433502
impl StoreContext {
434-
fn new(db: &BaseDb, raw_cache: &Arc<RawCellsCache>, capacity: usize) -> Self {
435-
Self {
503+
fn new(db: &BaseDb, raw_cache: &Arc<RawCellsCache>, capacity: usize) -> Arc<Self> {
504+
Arc::new(Self {
436505
db: db.clone(),
437506
raw_cache: raw_cache.clone(),
438507
transaction: FastDashMap::with_capacity_and_hasher_and_shard_amount(
439508
capacity,
440509
Default::default(),
441510
512,
442511
),
443-
}
512+
})
444513
}
445514

446515
fn insert_cell(
447516
&self,
448517
key: &HashBytes,
449518
cell: &DynCell,
450-
depth: usize,
519+
depth: Option<usize>,
451520
) -> Result<bool, CellStorageError> {
452521
let mut buffer = [0; 512];
453522
Ok(match self.transaction.entry(*key) {
@@ -464,13 +533,16 @@ impl StoreContext {
464533
const NEW_CELLS_DEPTH_THRESHOLD: usize = 4;
465534

466535
let (old_rc, has_value) = 'value: {
467-
if depth >= NEW_CELLS_DEPTH_THRESHOLD {
468-
// NOTE: `get` here is used to affect a "hotness" of the value, because
469-
// there is a big chance that we will need it soon during state processing
470-
if let Some(entry) = self.raw_cache.0.get(key) {
471-
let rc = entry.header.header.load(Ordering::Acquire);
472-
break 'value (rc, rc > 0);
536+
match depth {
537+
Some(d) if d >= NEW_CELLS_DEPTH_THRESHOLD => {
538+
// NOTE: `get` here is used to affect a "hotness" of the value, because
539+
// there is a big chance that we will need it soon during state processing
540+
if let Some(entry) = self.raw_cache.0.get(key) {
541+
let rc = entry.header.header.load(Ordering::Acquire);
542+
break 'value (rc, rc > 0);
543+
}
473544
}
545+
_ => {}
474546
}
475547

476548
match self.db.cells.get(key).map_err(CellStorageError::Internal)? {
@@ -500,20 +572,47 @@ impl StoreContext {
500572
}
501573

502574
fn finalize(self, batch: &mut WriteBatch) -> usize {
503-
let mut buffer = Vec::with_capacity(512);
504-
let total = self.transaction.len();
505-
let cells_cf = &self.db.cells.cf();
506-
for (key, CellWithRefs { rc, data }) in self.transaction {
507-
buffer.clear();
508-
refcount::add_positive_refount(rc, data.as_deref(), &mut buffer);
509-
if let Some(data) = data {
510-
self.raw_cache.insert(&key, rc, &data);
511-
} else {
512-
self.raw_cache.add_refs(&key, rc);
575+
std::thread::scope(|s| {
576+
let number_shards = self.transaction._shard_count();
577+
// safety: we hold only read locks
578+
let shards = unsafe { (0..number_shards).map(|i| self.transaction._get_read_shard(i)) };
579+
let cache = &self.raw_cache;
580+
581+
// todo: clamp to number of cpus x2
582+
for shard in shards {
583+
// spawned threads will be joined at the end of the scope, so we don't need to store them
584+
s.spawn(move || {
585+
for (key, value) in shard {
586+
let value = value.get();
587+
let rc = value.rc;
588+
if let Some(data) = &value.data {
589+
cache.insert(key, rc, data);
590+
} else {
591+
cache.add_refs(key, rc);
592+
}
593+
}
594+
});
513595
}
514-
batch.merge_cf(cells_cf, key.as_slice(), &buffer);
515-
}
516-
total
596+
597+
let batch_update = s.spawn(|| {
598+
let mut buffer = Vec::with_capacity(512);
599+
let total = self.transaction.len();
600+
let cells_cf = &self.db.cells.cf();
601+
for kv in self.transaction.iter() {
602+
let key = kv.key();
603+
let value = kv.value();
604+
let rc = value.rc;
605+
let data = value.data.as_deref();
606+
607+
buffer.clear();
608+
refcount::add_positive_refount(rc, data, &mut buffer);
609+
batch.merge_cf(cells_cf, key.as_slice(), &buffer);
610+
}
611+
total
612+
});
613+
614+
batch_update.join().expect("thread panicked")
615+
})
517616
}
518617
}
519618

0 commit comments

Comments
 (0)