Skip to content

Commit a9fe234

Browse files
authored
feat: ClickHouse batch size config (#406)
1 parent 97a3063 commit a9fe234

File tree

5 files changed

+68
-5
lines changed

5 files changed

+68
-5
lines changed

core/src/database/clickhouse/batch_operations/dynamic.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pub async fn execute_dynamic_batch_operation(
2222
return Ok(());
2323
}
2424

25-
for batch in rows.chunks(1000) {
25+
for batch in rows.chunks(database.batch_size()) {
2626
execute_batch(database, table_name, op_type, batch).await.map_err(|e| {
2727
tracing::error!("{} - Batch operation failed: {}", event_name, e);
2828
e

core/src/database/clickhouse/batch_operations/macros.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ macro_rules! create_batch_clickhouse_operation {
211211
Ok(())
212212
}
213213

214-
for batch in filtered_results.chunks(1000) {
214+
for batch in filtered_results.chunks(database.batch_size()) {
215215
if let Err(e) = execute_batch(database, batch).await {
216216
rindexer_error!("{} - Batch operation failed: {}", $event_name, e);
217217
return Err(e);

core/src/database/clickhouse/client.rs

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ use tracing::info;
88
use crate::metrics::database::{self as db_metrics, ops};
99
use crate::EthereumSqlTypeWrapper;
1010

11+
const DEFAULT_CLICKHOUSE_BATCH_SIZE: usize = 1000;
12+
const CLICKHOUSE_BATCH_SIZE_ENV: &str = "RINDEXER_CLICKHOUSE_BATCH_SIZE";
13+
1114
pub struct ClickhouseConnection {
1215
url: String,
1316
user: String,
@@ -45,11 +48,35 @@ pub enum ClickhouseError {
4548

4649
pub struct ClickhouseClient {
4750
pub(crate) conn: Client,
51+
batch_size: usize,
52+
}
53+
54+
fn parse_clickhouse_batch_size() -> usize {
55+
parse_clickhouse_batch_size_value(env::var(CLICKHOUSE_BATCH_SIZE_ENV).ok())
56+
}
57+
58+
fn parse_clickhouse_batch_size_value(value: Option<String>) -> usize {
59+
match value {
60+
Some(raw) => match raw.parse::<usize>() {
61+
Ok(parsed) if parsed > 0 => parsed,
62+
_ => {
63+
tracing::warn!(
64+
"{} is invalid (value: {:?}); using default {}",
65+
CLICKHOUSE_BATCH_SIZE_ENV,
66+
raw,
67+
DEFAULT_CLICKHOUSE_BATCH_SIZE
68+
);
69+
DEFAULT_CLICKHOUSE_BATCH_SIZE
70+
}
71+
},
72+
None => DEFAULT_CLICKHOUSE_BATCH_SIZE,
73+
}
4874
}
4975

5076
impl ClickhouseClient {
5177
pub async fn new() -> Result<Self, ClickhouseConnectionError> {
5278
let connection = clickhouse_connection()?;
79+
let batch_size = parse_clickhouse_batch_size();
5380

5481
let client = Client::default()
5582
.with_url(connection.url)
@@ -58,9 +85,13 @@ impl ClickhouseClient {
5885
.with_password(connection.password);
5986

6087
client.query("select 1").execute().await?;
61-
info!("Clickhouse client connected successfully!");
88+
info!("Clickhouse client connected successfully! dynamic batch size={}", batch_size);
89+
90+
Ok(ClickhouseClient { conn: client, batch_size })
91+
}
6292

63-
Ok(ClickhouseClient { conn: client })
93+
pub fn batch_size(&self) -> usize {
94+
self.batch_size
6495
}
6596

6697
pub async fn query_one<T>(&self, sql: &str) -> Result<T, ClickhouseError>
@@ -177,3 +208,30 @@ impl ClickhouseClient {
177208
self.bulk_insert_via_query(table_name, column_names, bulk_data).await
178209
}
179210
}
211+
212+
#[cfg(test)]
213+
mod tests {
214+
use super::{parse_clickhouse_batch_size_value, DEFAULT_CLICKHOUSE_BATCH_SIZE};
215+
216+
#[test]
217+
fn clickhouse_batch_size_defaults_when_env_missing() {
218+
assert_eq!(parse_clickhouse_batch_size_value(None), DEFAULT_CLICKHOUSE_BATCH_SIZE);
219+
}
220+
221+
#[test]
222+
fn clickhouse_batch_size_reads_positive_env_override() {
223+
assert_eq!(parse_clickhouse_batch_size_value(Some("5000".to_string())), 5000);
224+
}
225+
226+
#[test]
227+
fn clickhouse_batch_size_rejects_zero_or_invalid_values() {
228+
assert_eq!(
229+
parse_clickhouse_batch_size_value(Some("0".to_string())),
230+
DEFAULT_CLICKHOUSE_BATCH_SIZE
231+
);
232+
assert_eq!(
233+
parse_clickhouse_batch_size_value(Some("invalid".to_string())),
234+
DEFAULT_CLICKHOUSE_BATCH_SIZE
235+
);
236+
}
237+
}

documentation/docs/pages/docs/changelog.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
### Features
1010
-------------------------------------------------
1111
- feat: Add Twilio SMS alerts support — send SMS notifications for on-chain events via the Twilio API
12+
- feat: **`RINDEXER_CLICKHOUSE_BATCH_SIZE` env var** — configure the ClickHouse batch chunk size used for no-code/custom table writes. Defaults to `1000`. Increasing it reduces the number of sequential `INSERT` statements for high-volume streams, at the cost of larger per-request payloads.
1213

1314
## Releases
1415
-------------------------------------------------
@@ -36,7 +37,6 @@ github branch - https://github.com/joshstevens19/rindexer/tree/release/0.38.0
3637

3738
- feat: **`database` field on custom tables** — optional YAML field that directs a custom table to a specific ClickHouse database (or PostgreSQL schema) instead of the default `{project}_{contract}` naming. Enables multiple contracts to write to a shared table (e.g., `database: indexer``indexer.events`).
3839

39-
4040
# 0.37.2-beta - 1st April 2026
4141

4242
github branch - https://github.com/joshstevens19/rindexer/tree/release/0.37.2

documentation/docs/pages/docs/start-building/yaml-config/storage.mdx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,8 +1010,13 @@ CLICKHOUSE_URL="http://[host]:[port]"
10101010
CLICKHOUSE_DB="default"
10111011
CLICKHOUSE_USER="default"
10121012
CLICKHOUSE_PASSWORD="default"
1013+
RINDEXER_CLICKHOUSE_BATCH_SIZE="1000"
10131014
```
10141015

1016+
`RINDEXER_CLICKHOUSE_BATCH_SIZE` controls the chunk size used when rindexer writes dynamic/no-code ClickHouse batches. The default is `1000`.
1017+
1018+
For high-volume streams, increasing this value reduces the number of sequential ClickHouse `INSERT` requests. The tradeoff is that each request becomes larger, so values should be increased carefully based on the workload and ClickHouse capacity.
1019+
10151020
### enabled
10161021

10171022
If clickhouse is enabled or not, if you do not wish to use clickhouse you can set this to false or remove clickhouse from the storage completely.

0 commit comments

Comments
 (0)