Skip to content

Commit 5e7ac00

Browse files
refactor: address PR feedbacks
- when grouping persist jobs try checking if adding persist job will tip it over the max allowed before adding it - consider the cgroup limits for memory - any typo fixes - extra docs for one of the tests
1 parent f4d3d73 commit 5e7ac00

File tree

2 files changed

+53
-31
lines changed

2 files changed

+53
-31
lines changed

influxdb3_write/src/write_buffer/queryable_buffer.rs

+46-20
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::persister::Persister;
33
use crate::write_buffer::persisted_files::PersistedFiles;
44
use crate::write_buffer::table_buffer::TableBuffer;
55
use crate::{ChunkFilter, ParquetFile, ParquetFileId, PersistedSnapshot};
6-
use crate::{chunk::BufferChunk, write_buffer::table_buffer::SnaphotChunkIter};
6+
use crate::{chunk::BufferChunk, write_buffer::table_buffer::SnapshotChunkIter};
77
use anyhow::Context;
88
use arrow::record_batch::RecordBatch;
99
use arrow_util::util::ensure_schema;
@@ -215,7 +215,7 @@ impl QueryableBuffer {
215215

216216
let chunk_time_to_chunk = &mut table_buffer.chunk_time_to_chunks;
217217
let snapshot_chunks = &mut table_buffer.snapshotting_chunks;
218-
let snapshot_chunks_iter = SnaphotChunkIter {
218+
let snapshot_chunks_iter = SnapshotChunkIter {
219219
keys_to_remove: all_keys_to_remove.iter(),
220220
map: chunk_time_to_chunk,
221221
table_def,
@@ -453,7 +453,6 @@ impl QueryableBuffer {
453453
}
454454
}
455455

456-
#[allow(clippy::too_many_arguments)]
457456
async fn sort_dedupe_parallel<I: Iterator<Item = PersistJob>>(
458457
iterator: I,
459458
persister: &Arc<Persister>,
@@ -505,7 +504,6 @@ async fn sort_dedupe_parallel<I: Iterator<Item = PersistJob>>(
505504
}
506505
}
507506

508-
#[allow(clippy::too_many_arguments)]
509507
async fn sort_dedupe_serial<I: Iterator<Item = PersistJob>>(
510508
iterator: I,
511509
persister: &Arc<Persister>,
@@ -839,6 +837,19 @@ impl<'a> PersistJobGroupedIterator<'a> {
839837
),
840838
}
841839
}
840+
841+
fn free_mem_hint(&mut self) -> (u64, u64) {
842+
self.system.refresh_memory();
843+
let system_mem_bytes = self.system.free_memory() - 100_000_000;
844+
let cgroup_free_mem_bytes = self
845+
.system
846+
.cgroup_limits()
847+
.map(|limit| limit.free_memory)
848+
.unwrap_or(u64::MAX);
849+
let system_mem_bytes = system_mem_bytes.min(cgroup_free_mem_bytes);
850+
let max_size_bytes = self.max_size_bytes.min(system_mem_bytes);
851+
(system_mem_bytes, max_size_bytes)
852+
}
842853
}
843854

844855
impl Iterator for PersistJobGroupedIterator<'_> {
@@ -856,18 +867,18 @@ impl Iterator for PersistJobGroupedIterator<'_> {
856867
let mut min_chunk_time = current_data.chunk_time;
857868
let mut current_size_bytes = current_data.total_batch_size();
858869
debug!(?current_size_bytes, table_name = ?current_data.table_name, ">>> current_size_bytes for table");
859-
self.system.refresh_memory();
860-
let system_mem_bytes = self.system.free_memory() - 100_000_000;
861-
let max_size_bytes = self.max_size_bytes.min(system_mem_bytes);
870+
let (system_mem_bytes, max_size_bytes) = self.free_mem_hint();
862871
debug!(
863872
max_size_bytes,
864873
system_mem_bytes, ">>> max size bytes/system mem bytes"
865874
);
866875

867876
while all_batches.len() < self.chunk_size && current_size_bytes < max_size_bytes {
868-
debug!(?current_size_bytes, ">>> current_size_bytes");
877+
trace!(?current_size_bytes, ">>> current_size_bytes");
869878
if let Some(next_data) = self.iter.peek() {
870-
if next_data.table_id == *current_table_id {
879+
if next_data.table_id == *current_table_id
880+
&& (current_size_bytes + next_data.total_batch_size()) < max_size_bytes
881+
{
871882
let next = self.iter.next().unwrap();
872883
ts_min_max = ts_min_max.union(&next.timestamp_min_max);
873884
min_chunk_time = min_chunk_time.min(next.chunk_time);
@@ -880,18 +891,17 @@ impl Iterator for PersistJobGroupedIterator<'_> {
880891
break;
881892
}
882893
}
894+
debug!(?current_size_bytes, ">>> final batch size in bytes");
883895

884896
let table_defn = self
885897
.catalog
886898
.db_schema_by_id(&current_data.database_id)?
887899
.table_definition_by_id(&current_data.table_id)?;
888900

901+
let arrow = table_defn.schema.as_arrow();
889902
let all_schema_aligned_batches: Vec<RecordBatch> = all_batches
890903
.iter()
891-
.map(|batch| {
892-
ensure_schema(&table_defn.schema.as_arrow(), batch)
893-
.expect("batches should have same schema")
894-
})
904+
.map(|batch| ensure_schema(&arrow, batch).expect("batches should have same schema"))
895905
.collect();
896906

897907
Some(PersistJob {
@@ -918,8 +928,8 @@ impl Iterator for PersistJobGroupedIterator<'_> {
918928
}
919929

920930
pub(crate) struct SortDedupePersistSummary {
921-
pub file_size_bytes: u64,
922-
pub file_meta_data: FileMetaData,
931+
pub(crate) file_size_bytes: u64,
932+
pub(crate) file_meta_data: FileMetaData,
923933
}
924934

925935
impl SortDedupePersistSummary {
@@ -1528,7 +1538,7 @@ mod tests {
15281538
persisted_files: Arc::new(PersistedFiles::new()),
15291539
parquet_cache: None,
15301540
gen1_duration: Gen1Duration::new_1m(),
1531-
max_size_per_parquet_file_bytes: 100_000,
1541+
max_size_per_parquet_file_bytes: 150_000,
15321542
};
15331543
let queryable_buffer = QueryableBuffer::new(queryable_buffer_args);
15341544

@@ -1537,7 +1547,9 @@ mod tests {
15371547
for i in 0..2 {
15381548
// create another write, this time with two tags, in a different gen1 block
15391549
let ts = Gen1Duration::new_1m().as_duration().as_nanos() as i64 + (i * 240_000_000_000);
1540-
let lp = format!("foo,t1=a,t2=b f1=3i,f2=3 {}", ts);
1550+
// keep these tags different to bar so that it's easier to spot the byte differences
1551+
// in the logs, otherwise foo and bar report exact same usage in bytes
1552+
let lp = format!("foo,t1=foo_a f1={}i,f2={} {}", i, i, ts);
15411553
debug!(?lp, ">>> writing line");
15421554
let val = WriteValidator::initialize(db.clone(), Arc::clone(&catalog), 0).unwrap();
15431555

@@ -1566,9 +1578,8 @@ mod tests {
15661578
for i in 0..10 {
15671579
// create another write, this time with two tags, in a different gen1 block
15681580
let ts = Gen1Duration::new_1m().as_duration().as_nanos() as i64 + (i * 240_000_000_000);
1569-
// let line = format!("bar,t1=a,t2=b f1=3i,f2=3 {}", ts);
15701581
let lp = format!(
1571-
"bar,t1=a,t2=b f1=3i,f2=3 {}\nbar,t1=a,t2=c f1=4i,f2=3 {}\nbar,t1=ab,t2=b f1=5i,f2=3 {}",
1582+
"bar,t1=br_a,t2=br_b f1=3i,f2=3 {}\nbar,t1=br_a,t2=br_c f1=4i,f2=3 {}\nbar,t1=ab,t2=bb f1=5i,f2=3 {}",
15721583
ts, ts, ts
15731584
);
15741585
debug!(?lp, ">>> writing line");
@@ -1637,12 +1648,27 @@ mod tests {
16371648
assert_eq!(2, foo_file.row_count);
16381649
}
16391650

1640-
// bar had 10 writes with 3 lines, should write 4 files each with 9 rows or 3 row in them
1651+
// bar had 10 writes (each in separate chunk) with 3 lines in each write,
1652+
// so these are grouped but because of the larger writes and max memory
1653+
// is set to 150_000 bytes at the top, we end up with 4 files.
16411654
let table = db.table_definition("bar").unwrap();
16421655
let files = queryable_buffer
16431656
.persisted_files
16441657
.get_files(db.id, table.table_id);
16451658
debug!(?files, ">>> test: queryable buffer persisted files");
1659+
1660+
// Below is the growth in memory (bytes) as reported by arrow record batches
1661+
//
1662+
// >>> current_size_bytes for table current_size_bytes=43952 table_name="bar"
1663+
// >>> final batch size in bytes current_size_bytes=131856
1664+
// >>> current_size_bytes for table current_size_bytes=43952 table_name="bar"
1665+
// >>> final batch size in bytes current_size_bytes=131856
1666+
// >>> current_size_bytes for table current_size_bytes=43952 table_name="bar"
1667+
// >>> final batch size in bytes current_size_bytes=131856
1668+
// >>> current_size_bytes for table current_size_bytes=43952 table_name="bar"
1669+
// >>> final batch size in bytes current_size_bytes=43952
1670+
// >>> current_size_bytes for table current_size_bytes=34408 table_name="foo"
1671+
// >>> final batch size in bytes current_size_bytes=68816
16461672
assert_eq!(4, files.len());
16471673
for bar_file in files {
16481674
debug!(?bar_file, ">>> test: bar_file");

influxdb3_write/src/write_buffer/table_buffer.rs

+7-11
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,9 @@ use influxdb3_wal::{FieldData, Row};
1414
use observability_deps::tracing::error;
1515
use schema::sort::SortKey;
1616
use schema::{InfluxColumnType, InfluxFieldType, Schema, SchemaBuilder};
17+
use std::collections::BTreeMap;
1718
use std::mem::size_of;
1819
use std::sync::Arc;
19-
use std::{
20-
collections::BTreeMap,
21-
mem::{self},
22-
};
2320
use std::{collections::btree_map::Entry, slice::Iter};
2421
use thiserror::Error;
2522

@@ -191,18 +188,17 @@ impl TableBuffer {
191188
}
192189

193190
pub fn clear_snapshots(&mut self) {
194-
// vec clear still holds the mem (capacity), so use take
195-
let _ = mem::take(&mut self.snapshotting_chunks);
191+
self.snapshotting_chunks.clear();
196192
}
197193
}
198194

199-
pub(crate) struct SnaphotChunkIter<'a> {
200-
pub keys_to_remove: Iter<'a, i64>,
201-
pub map: &'a mut BTreeMap<i64, MutableTableChunk>,
202-
pub table_def: Arc<TableDefinition>,
195+
pub(crate) struct SnapshotChunkIter<'a> {
196+
pub(crate) keys_to_remove: Iter<'a, i64>,
197+
pub(crate) map: &'a mut BTreeMap<i64, MutableTableChunk>,
198+
pub(crate) table_def: Arc<TableDefinition>,
203199
}
204200

205-
impl Iterator for SnaphotChunkIter<'_> {
201+
impl Iterator for SnapshotChunkIter<'_> {
206202
type Item = SnapshotChunk;
207203

208204
fn next(&mut self) -> Option<Self::Item> {

0 commit comments

Comments
 (0)