Skip to content

Commit 5326340

Browse files
authored
Merge pull request #3 from sdf-jkl/TLP-having
Add TLP Having Oracle
2 parents 4f9068d + f525ce1 commit 5326340

11 files changed

Lines changed: 823 additions & 147 deletions

File tree

README.md

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,19 @@ To run with command-line options:
2727
cargo run --release -- --config datafusion-fuzzer.toml --rounds 5 --queries-per-round 20
2828
```
2929

30+
To run with verbose oracle/query logs to stdout:
31+
```bash
32+
RUST_LOG=info cargo run -- --config datafusion-fuzzer.toml --display-logs
33+
```
34+
35+
## Oracles
36+
37+
The runner currently chooses one oracle at random for each test case:
38+
39+
- `NoCrashOracle`: checks for non-whitelisted crashes/errors.
40+
- `TlpWhereOracle`: validates TLP partitioning over `WHERE` (`p`, `NOT p`, `p IS NULL`) via value-level multiset comparison.
41+
- `TlpHavingOracle`: validates TLP partitioning over `HAVING` (`p`, `NOT p`, `p IS NULL`) via value-level multiset comparison.
42+
3043
## Configuration
3144

3245
The fuzzer supports extensive configuration options to customize the fuzzing process.
@@ -45,18 +58,21 @@ See `datafusion-fuzzer.toml` for an example configuration file:
4558
seed = 42
4659
rounds = 3
4760
queries_per_round = 10
48-
timeout_seconds = 30
61+
timeout_seconds = 2
4962

5063
# Logging settings
51-
display_logs = true
52-
enable_tui = false
53-
# log_path = "logs/datafusion-fuzzer.log"
64+
display_logs = false
65+
enable_tui = true
66+
log_path = "logs"
67+
sample_interval_secs = 5
5468

5569
# Table generation parameters
5670
max_column_count = 5
5771
max_row_count = 100
5872
max_expr_level = 3
73+
max_group_by_count = 3
5974
max_table_count = 3
75+
max_insert_per_table = 20
6076
```
6177

6278
### Command Line Options
@@ -69,6 +85,8 @@ Options:
6985
-q, --queries-per-round <QUERIES> Number of queries per round
7086
-t, --timeout <TIMEOUT> Query timeout in seconds
7187
-l, --log-path <LOG_PATH> Path to log file
88+
-d, --display-logs Display logs
89+
--enable-tui Enable TUI display
7290
-h, --help Print help
7391
-V, --version Print version
7492
```
@@ -79,13 +97,14 @@ Options:
7997
- `max_column_count`: Maximum number of columns per generated table (default: 5)
8098
- `max_row_count`: Maximum number of rows per generated table (default: 100)
8199
- `max_expr_level`: Maximum expression nesting level (default: 3)
100+
- `max_group_by_count`: Maximum number of `GROUP BY` expressions (default: 3)
82101

83102
## Progress Tracker
84103
### SQL Features
85104
- [x] where
86105
- [ ] sort + limit, offset
87106
- [ ] aggregate
88-
- [ ] having
107+
- [x] having
89108
- [ ] join
90109
- [ ] union/union all/intersect/except
91110

@@ -112,4 +131,4 @@ Options:
112131

113132
## License
114133

115-
[MIT](LICENSE)
134+
[MIT](LICENSE)

datafusion-fuzzer.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ sample_interval_secs = 5
2828
max_column_count = 5
2929
max_row_count = 100
3030
max_expr_level = 3
31+
max_group_by_count = 3
3132
max_table_count = 3
3233
max_insert_per_table = 20
3334

34-
# Supported oracles: NoCrash, NestedQueries, TlpWhere.
35+
# Supported oracles: NoCrash, NestedQueries, TlpWhere, TlpHaving.
3536
# Randomly select one oracle from the configured set for each query.
3637
oracles = ["NoCrash"]
37-
# oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
38+
# oracles = ["NoCrash", "NestedQueries", "TlpWhere", "TlpHaving"]

src/cli/runner.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ mod tests {
480480
max_column_count: 3,
481481
max_row_count: 10,
482482
max_expr_level: 2,
483+
max_group_by_count: 2,
483484
max_table_count: 3,
484485
max_insert_per_table: 20,
485486
oracles: vec![crate::oracle::ConfiguredOracle::NoCrash],
@@ -599,6 +600,7 @@ mod tests {
599600
max_column_count: 3,
600601
max_row_count: 10,
601602
max_expr_level: 2,
603+
max_group_by_count: 2,
602604
max_table_count: 3,
603605
max_insert_per_table: 20,
604606
oracles: vec![crate::oracle::ConfiguredOracle::NoCrash],

src/fuzz_context/runner_config.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub struct RunnerConfig {
3030
pub max_column_count: u64,
3131
pub max_row_count: u64,
3232
pub max_expr_level: u32,
33+
#[serde(default = "RunnerConfig::default_max_group_by_count")]
34+
pub max_group_by_count: u32,
3335
pub max_table_count: u32,
3436
pub max_insert_per_table: u32,
3537
#[serde(default = "RunnerConfig::default_oracles", alias = "oracle")]
@@ -100,6 +102,10 @@ impl RunnerConfig {
100102
fn default_oracles() -> Vec<ConfiguredOracle> {
101103
vec![ConfiguredOracle::NoCrash]
102104
}
105+
106+
fn default_max_group_by_count() -> u32 {
107+
3
108+
}
103109
}
104110

105111
impl Default for RunnerConfig {
@@ -116,6 +122,7 @@ impl Default for RunnerConfig {
116122
max_column_count: 5,
117123
max_row_count: 100,
118124
max_expr_level: 3,
125+
max_group_by_count: Self::default_max_group_by_count(),
119126
max_table_count: 3,
120127
max_insert_per_table: 20,
121128
oracles: Self::default_oracles(),
@@ -142,6 +149,7 @@ sample_interval_secs = 5
142149
max_column_count = 5
143150
max_row_count = 100
144151
max_expr_level = 3
152+
max_group_by_count = 3
145153
max_table_count = 3
146154
max_insert_per_table = 20
147155
oracles = ["NoCrash"]
@@ -167,9 +175,10 @@ sample_interval_secs = 5
167175
max_column_count = 5
168176
max_row_count = 100
169177
max_expr_level = 3
178+
max_group_by_count = 3
170179
max_table_count = 3
171180
max_insert_per_table = 20
172-
oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
181+
oracles = ["NoCrash", "NestedQueries", "TlpWhere", "TlpHaving"]
173182
"#,
174183
)
175184
.unwrap();
@@ -179,7 +188,8 @@ oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
179188
vec![
180189
ConfiguredOracle::NoCrash,
181190
ConfiguredOracle::NestedQueries,
182-
ConfiguredOracle::TlpWhere
191+
ConfiguredOracle::TlpWhere,
192+
ConfiguredOracle::TlpHaving
183193
]
184194
);
185195
}
@@ -199,6 +209,7 @@ sample_interval_secs = 5
199209
max_column_count = 5
200210
max_row_count = 100
201211
max_expr_level = 3
212+
max_group_by_count = 3
202213
max_table_count = 3
203214
max_insert_per_table = 20
204215
oracles = []

src/oracle/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
pub(crate) mod oracle_common;
44
pub mod oracle_impl_nested_queries;
55
pub mod oracle_impl_no_crash;
6+
pub mod oracle_impl_tlp_having;
67
pub mod oracle_impl_tlp_where;
78
pub mod oracle_trait;
9+
#[cfg(test)]
10+
pub(crate) mod test_helpers;
811

912
use std::sync::Arc;
1013

@@ -15,6 +18,7 @@ use crate::fuzz_context::GlobalContext;
1518
// Re-export main types and traits
1619
pub use oracle_impl_nested_queries::NestedQueriesOracle;
1720
pub use oracle_impl_no_crash::NoCrashOracle;
21+
pub use oracle_impl_tlp_having::TlpHavingOracle;
1822
pub use oracle_impl_tlp_where::TlpWhereOracle;
1923
pub use oracle_trait::{Oracle, QueryContext, QueryExecutionResult};
2024

@@ -26,6 +30,8 @@ pub enum ConfiguredOracle {
2630
NestedQueries,
2731
#[serde(rename = "TlpWhere", alias = "TlpWhereOracle")]
2832
TlpWhere,
33+
#[serde(rename = "TlpHaving", alias = "TlpHavingOracle")]
34+
TlpHaving,
2935
}
3036

3137
impl ConfiguredOracle {
@@ -34,6 +40,7 @@ impl ConfiguredOracle {
3440
Self::NoCrash => Box::new(NoCrashOracle::new(seed, ctx)),
3541
Self::NestedQueries => Box::new(NestedQueriesOracle::new(seed, ctx)),
3642
Self::TlpWhere => Box::new(TlpWhereOracle::new(seed, ctx)),
43+
Self::TlpHaving => Box::new(TlpHavingOracle::new(seed, ctx)),
3744
}
3845
}
3946
}

src/oracle/oracle_common.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
11
use crate::common::{Result, fuzzer_err, util};
22
use crate::oracle::QueryExecutionResult;
33

4+
pub(crate) fn validate_binary_tlp_consistency(
5+
results: &[QueryExecutionResult],
6+
oracle_name: &str,
7+
) -> Result<()> {
8+
let result_count = results.len();
9+
if result_count != 2 {
10+
return Err(fuzzer_err(&format!(
11+
"{oracle_name} expects 2 query results, got {result_count}"
12+
)));
13+
}
14+
15+
let num_ok = results.iter().filter(|r| r.result.is_ok()).count();
16+
let num_err = result_count - num_ok;
17+
18+
match num_ok {
19+
2 => validate_value_equivalence(results, 0, 1, oracle_name),
20+
0 => Ok(()),
21+
_ => Err(fuzzer_err(&format!(
22+
"{oracle_name} consistency requires all queries to either succeed or fail; got mixed outcomes (ok={num_ok}, err={num_err})"
23+
))),
24+
}
25+
}
26+
427
pub(crate) fn validate_value_equivalence(
528
results: &[QueryExecutionResult],
629
left_idx: usize,
@@ -25,3 +48,67 @@ pub(crate) fn validate_value_equivalence(
2548

2649
util::validate_batches_value_equivalence(left_batches, right_batches, oracle_name)
2750
}
51+
52+
pub(crate) fn append_labeled_query_results(
53+
report: &mut String,
54+
results: &[QueryExecutionResult],
55+
labels: &[&str],
56+
) {
57+
for (idx, result) in results.iter().enumerate() {
58+
let label = labels.get(idx).copied().unwrap_or("unknown");
59+
report.push_str(&format!(
60+
"Q{} [{}]:\n{}\n",
61+
idx + 1,
62+
label,
63+
result.query_context.query
64+
));
65+
66+
match &result.result {
67+
Ok(batches) => report.push_str(&format!(
68+
" status: ok, rows={}\n\n",
69+
util::count_total_rows(batches)
70+
)),
71+
Err(e) => report.push_str(&format!(" status: error, details={}\n\n", e)),
72+
}
73+
}
74+
}
75+
76+
pub(crate) fn append_binary_value_equivalence_report(
77+
report: &mut String,
78+
results: &[QueryExecutionResult],
79+
) -> Result<()> {
80+
if results.len() != 2 || results.iter().any(|r| r.result.is_err()) {
81+
return Ok(());
82+
}
83+
84+
let q_all_batches = results[0]
85+
.result
86+
.as_ref()
87+
.map_err(|e| fuzzer_err(&e.to_string()))?;
88+
let q_union_batches = results[1]
89+
.result
90+
.as_ref()
91+
.map_err(|e| fuzzer_err(&e.to_string()))?;
92+
93+
report.push_str(&format!(
94+
"Row counts: all={}, partition_union={}\n",
95+
util::count_total_rows(q_all_batches),
96+
util::count_total_rows(q_union_batches)
97+
));
98+
99+
let all_multiset = util::batches_to_row_multiset(q_all_batches)?;
100+
let partition_multiset = util::batches_to_row_multiset(q_union_batches)?;
101+
102+
if all_multiset != partition_multiset {
103+
report.push_str("\nTop multiset differences:\n");
104+
report.push_str(&util::format_row_multiset_diff(
105+
&all_multiset,
106+
&partition_multiset,
107+
));
108+
report.push('\n');
109+
} else {
110+
report.push_str("Multiset equivalence: true\n");
111+
}
112+
113+
Ok(())
114+
}

0 commit comments

Comments
 (0)