Skip to content

Commit 261dd8d

Browse files
committed
Merge branch 'main' of https://github.com/2010YOUY01/datafusion-fuzzer into TLP-having
2 parents fec4647 + 4f9068d commit 261dd8d

9 files changed

Lines changed: 790 additions & 199 deletions

File tree

.github/workflows/test.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Test Suite
2+
3+
on:
4+
push:
5+
pull_request:
6+
workflow_dispatch:
7+
8+
jobs:
9+
integration-test:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: read
13+
steps:
14+
- name: Checkout
15+
uses: actions/checkout@v4
16+
17+
- name: Install Rust
18+
uses: dtolnay/rust-toolchain@stable
19+
20+
- name: Cache Rust artifacts
21+
uses: Swatinem/rust-cache@v2
22+
23+
- name: Run unit tests
24+
run: cargo test --lib
25+
26+
- name: Run integration snapshot test
27+
run: cargo test --test integration_test

Cargo.lock

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ async-trait = "0.1"
2424
regex = "1.0"
2525
signal-hook = "0.3"
2626

27+
[dev-dependencies]
28+
insta = { version = "1", features = ["yaml"] }
29+
2730
[profile.release]
2831
lto = "thin"
2932
codegen-units = 1

datafusion-fuzzer.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,8 @@ max_row_count = 100
3030
max_expr_level = 3
3131
max_table_count = 3
3232
max_insert_per_table = 20
33+
34+
# Supported oracles: NoCrash, NestedQueries, TlpWhere.
35+
# Randomly select one oracle from the configured set for each query.
36+
oracles = ["NoCrash"]
37+
# oracles = ["NoCrash", "NestedQueries", "TlpWhere"]

src/cli/runner.rs

Lines changed: 100 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use datafusion::arrow::record_batch::RecordBatch;
22
use datafusion::common::instant::Instant;
33
use rand::rngs::StdRng;
44
use rand::{Rng, SeedableRng};
5+
use std::fs::OpenOptions;
6+
use std::io::Write;
57
use std::sync::Arc;
68
use std::time::Duration;
79
use tracing::{error, info, warn};
@@ -11,9 +13,7 @@ use crate::common::{InclusionConfig, LogicalTable, Result};
1113
use crate::datasource_generator::dataset_generator::DatasetGenerator;
1214
use crate::fuzz_context::{GlobalContext, ctx_observability::display_all_tables};
1315
use crate::fuzz_runner::{record_query_with_time, update_stat_for_round_completion};
14-
use crate::oracle::{
15-
NoCrashOracle, Oracle, QueryContext, QueryExecutionResult, TlpHavingOracle, TlpWhereOracle,
16-
};
16+
use crate::oracle::{Oracle, QueryContext, QueryExecutionResult};
1717
use crate::query_generator::stmt_select_def::SelectStatementBuilder;
1818

1919
use super::error_whitelist;
@@ -51,7 +51,7 @@ pub async fn run_fuzzer(ctx: Arc<GlobalContext>) -> Result<()> {
5151
let query_seed = query_base_seed.wrapping_add(i as u64);
5252

5353
// >>> CORE LOGIC <<<
54-
execute_oracle_test(query_seed, &ctx).await;
54+
let _ = execute_oracle_test(round, i, query_seed, &ctx).await?;
5555
}
5656

5757
update_stat_for_round_completion(&ctx.fuzzer_stats);
@@ -208,40 +208,42 @@ async fn create_and_register_view(
208208
Ok(())
209209
}
210210

211-
async fn execute_oracle_test(seed: u64, ctx: &Arc<GlobalContext>) -> bool {
212-
// Create a deterministic RNG instance for this test
213-
let mut rng = StdRng::seed_from_u64(seed);
214-
215-
// === Select a random oracle ===
216-
// TODO: disabled views since joining too many table is slow
217-
let available_oracles: Vec<Box<dyn Oracle + Send>> = vec![
218-
Box::new(NoCrashOracle::new(seed, Arc::clone(ctx))),
219-
Box::new(TlpWhereOracle::new(seed, Arc::clone(ctx))),
220-
Box::new(TlpHavingOracle::new(seed, Arc::clone(ctx))),
221-
// Box::new(NestedQueriesOracle::new(seed, Arc::clone(ctx))),
222-
];
223-
let oracle_index = rng.random_range(0..available_oracles.len());
224-
let mut selected_oracle = available_oracles.into_iter().nth(oracle_index).unwrap();
211+
async fn execute_oracle_test(
212+
round: u32,
213+
query_index: u32,
214+
seed: u64,
215+
ctx: &Arc<GlobalContext>,
216+
) -> Result<bool> {
217+
let mut randomly_selected_oracle = select_random_configured_oracle(seed, ctx);
225218

226-
info!("Selected oracle: {}", selected_oracle);
219+
info!("Selected oracle: {}", randomly_selected_oracle);
227220

228221
// === Generate query group ===
229-
let query_group = match selected_oracle.generate_query_group() {
222+
let query_group = match randomly_selected_oracle.generate_query_group() {
230223
Ok(group) => group,
231224
Err(e) => {
232225
let err_msg = format!("Failed to generate query group: {}", e);
233226
if !is_error_whitelisted(&err_msg, None) {
234227
error!(err_msg)
235228
}
236-
return false;
229+
return Ok(false);
237230
}
238231
};
239232

240233
if query_group.is_empty() {
241234
warn!("Oracle generated empty query group");
242-
return false;
235+
return Ok(false);
243236
}
244237

238+
append_query_log(
239+
ctx,
240+
round,
241+
query_index,
242+
seed,
243+
randomly_selected_oracle.name(),
244+
&query_group,
245+
)?;
246+
245247
// === Execute queries and collect results ===
246248
let mut execution_results = Vec::new();
247249
for query_context in query_group {
@@ -257,26 +259,95 @@ async fn execute_oracle_test(seed: u64, ctx: &Arc<GlobalContext>) -> bool {
257259
}
258260

259261
// === Validate execution results ===
260-
match selected_oracle
262+
match randomly_selected_oracle
261263
.validate_consistency(&execution_results)
262264
.await
263265
{
264266
Ok(_) => {
265267
info!("Oracle test passed");
266-
true
268+
Ok(true)
267269
}
268270
Err(e) => {
269271
error!("Oracle test failed: {}", e);
270272

271273
// Log error report if available
272-
if let Ok(error_report) = selected_oracle.create_error_report(&execution_results) {
274+
if let Ok(error_report) =
275+
randomly_selected_oracle.create_error_report(&execution_results)
276+
{
273277
error!("Error Report:\n{}", error_report);
274278
}
275-
false
279+
Ok(false)
276280
}
277281
}
278282
}
279283

284+
fn select_random_configured_oracle(seed: u64, ctx: &Arc<GlobalContext>) -> Box<dyn Oracle + Send> {
285+
// Randomly pick one oracle for this query; the configured oracle set bounds the choice.
286+
let available_oracles: Vec<Box<dyn Oracle + Send>> = ctx
287+
.runner_config
288+
.oracles
289+
.iter()
290+
.copied()
291+
.map(|oracle| oracle.build(seed, Arc::clone(ctx)))
292+
.collect();
293+
294+
let mut rng = StdRng::seed_from_u64(seed);
295+
let oracle_index = rng.random_range(0..available_oracles.len());
296+
available_oracles.into_iter().nth(oracle_index).unwrap()
297+
}
298+
299+
fn append_query_log(
300+
ctx: &Arc<GlobalContext>,
301+
round: u32,
302+
query_index: u32,
303+
query_seed: u64,
304+
oracle_name: &str,
305+
query_group: &[QueryContext],
306+
) -> Result<()> {
307+
let Some(log_dir) = &ctx.runner_config.log_path else {
308+
return Ok(());
309+
};
310+
311+
let query_log_path = log_dir.join("queries.log");
312+
let mut file = OpenOptions::new()
313+
.create(true)
314+
.append(true)
315+
.open(&query_log_path)
316+
.map_err(|e| {
317+
crate::common::fuzzer_err(&format!(
318+
"Failed to open query log '{}': {}",
319+
query_log_path.display(),
320+
e
321+
))
322+
})?;
323+
324+
writeln!(
325+
file,
326+
"=== round={} query={} oracle={} query_seed={} ===",
327+
round + 1,
328+
query_index + 1,
329+
oracle_name,
330+
query_seed
331+
)?;
332+
333+
for (statement_index, query_context) in query_group.iter().enumerate() {
334+
match &query_context.context_description {
335+
Some(description) => writeln!(
336+
file,
337+
"--- statement={} context={} ---",
338+
statement_index + 1,
339+
description
340+
)?,
341+
None => writeln!(file, "--- statement={} ---", statement_index + 1)?,
342+
}
343+
344+
writeln!(file, "{}", query_context.query)?;
345+
writeln!(file)?;
346+
}
347+
348+
Ok(())
349+
}
350+
280351
/// Query execution result that tracks both the outcome and whether it timed out
281352
#[derive(Debug)]
282353
struct QueryExecutionOutcome {
@@ -411,6 +482,7 @@ mod tests {
411482
max_expr_level: 2,
412483
max_table_count: 3,
413484
max_insert_per_table: 20,
485+
oracles: vec![crate::oracle::ConfiguredOracle::NoCrash],
414486
};
415487

416488
// Collect results from multiple runs
@@ -529,6 +601,7 @@ mod tests {
529601
max_expr_level: 2,
530602
max_table_count: 3,
531603
max_insert_per_table: 20,
604+
oracles: vec![crate::oracle::ConfiguredOracle::NoCrash],
532605
};
533606

534607
let mut results_by_seed = Vec::new();
@@ -627,8 +700,7 @@ mod tests {
627700
for i in 0..ctx.runner_config.queries_per_round {
628701
let query_seed = query_base_seed.wrapping_add(i as u64);
629702

630-
// Generate a query using the same logic as execute_oracle_test
631-
let mut oracle = NoCrashOracle::new(query_seed, Arc::clone(&ctx));
703+
let mut oracle = select_random_configured_oracle(query_seed, &ctx);
632704
if let Ok(query_group) = oracle.generate_query_group() {
633705
if let Some(query_context) = query_group.first() {
634706
captured_queries

0 commit comments

Comments
 (0)