Skip to content

Commit 9396efb

Browse files
committed
add integration test
1 parent 261dd8d commit 9396efb

6 files changed

Lines changed: 292 additions & 6 deletions

File tree

datafusion-fuzzer.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ max_expr_level = 3
3131
max_table_count = 3
3232
max_insert_per_table = 20
3333

34-
# Supported oracles: NoCrash, NestedQueries, TlpWhere.
34+
# Supported oracles: NoCrash, NestedQueries, TlpWhere, TlpHaving.
3535
# Randomly select one oracle from the configured set for each query.
3636
oracles = ["NoCrash"]
37-
# oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
37+
# oracles = ["NoCrash", "NestedQueries", "TlpWhere", "TlpHaving"]

src/fuzz_context/runner_config.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ max_row_count = 100
169169
max_expr_level = 3
170170
max_table_count = 3
171171
max_insert_per_table = 20
172-
oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
172+
oracles = ["NoCrash", "NestedQueries", "TlpWhere", "TlpHaving"]
173173
"#,
174174
)
175175
.unwrap();
@@ -179,7 +179,8 @@ oracles = ["NoCrash", "NestedQueries", "TlpWhere"]
179179
vec![
180180
ConfiguredOracle::NoCrash,
181181
ConfiguredOracle::NestedQueries,
182-
ConfiguredOracle::TlpWhere
182+
ConfiguredOracle::TlpWhere,
183+
ConfiguredOracle::TlpHaving
183184
]
184185
);
185186
}

src/oracle/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ pub enum ConfiguredOracle {
2929
NestedQueries,
3030
#[serde(rename = "TlpWhere", alias = "TlpWhereOracle")]
3131
TlpWhere,
32+
#[serde(rename = "TlpHaving", alias = "TlpHavingOracle")]
33+
TlpHaving,
3234
}
3335

3436
impl ConfiguredOracle {
@@ -37,6 +39,7 @@ impl ConfiguredOracle {
3739
Self::NoCrash => Box::new(NoCrashOracle::new(seed, ctx)),
3840
Self::NestedQueries => Box::new(NestedQueriesOracle::new(seed, ctx)),
3941
Self::TlpWhere => Box::new(TlpWhereOracle::new(seed, ctx)),
42+
Self::TlpHaving => Box::new(TlpHavingOracle::new(seed, ctx)),
4043
}
4144
}
4245
}

src/oracle/oracle_impl_tlp_having.rs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ impl Oracle for TlpHavingOracle {
128128
#[cfg(test)]
129129
mod tests {
130130
use super::*;
131-
use crate::common::{FuzzerDataType, LogicalColumn, LogicalTable};
131+
use crate::common::{FuzzerDataType, LogicalColumn, LogicalTable, init_available_data_types};
132132
use datafusion::arrow::array::{Array, Int64Array};
133133
use datafusion::arrow::datatypes::{DataType, Field, Schema};
134134
use datafusion::arrow::record_batch::RecordBatch;
@@ -199,8 +199,48 @@ mod tests {
199199
assert!(oracle.validate_consistency(&results).await.is_ok());
200200
}
201201

202+
#[tokio::test]
203+
async fn tlp_having_validate_fails_for_schema_mismatch() {
204+
let one_col_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int64, false)]));
205+
let one_col_batch = RecordBatch::try_new(
206+
one_col_schema,
207+
vec![Arc::new(Int64Array::from(vec![1, 2])) as Arc<dyn Array>],
208+
)
209+
.unwrap();
210+
211+
let two_col_schema = Arc::new(Schema::new(vec![
212+
Field::new("c1", DataType::Int64, false),
213+
Field::new("c2", DataType::Int64, false),
214+
]));
215+
let two_col_batch = RecordBatch::try_new(
216+
two_col_schema,
217+
vec![
218+
Arc::new(Int64Array::from(vec![1])) as Arc<dyn Array>,
219+
Arc::new(Int64Array::from(vec![9])) as Arc<dyn Array>,
220+
],
221+
)
222+
.unwrap();
223+
224+
let oracle =
225+
TlpHavingOracle::new(1, Arc::new(crate::fuzz_context::GlobalContext::default()));
226+
let results = vec![
227+
QueryExecutionResult {
228+
query_context: make_query_context("all"),
229+
result: Ok(vec![one_col_batch.clone()]),
230+
},
231+
QueryExecutionResult {
232+
query_context: make_query_context("partition_union"),
233+
result: Ok(vec![two_col_batch]),
234+
},
235+
];
236+
237+
let err = oracle.validate_consistency(&results).await.unwrap_err();
238+
assert!(err.to_string().contains("value equivalence violated"));
239+
}
240+
202241
#[test]
203242
fn tlp_having_generates_expected_query_group_shape() {
243+
init_available_data_types();
204244
let ctx = Arc::new(crate::fuzz_context::GlobalContext::default());
205245
ctx.runtime_context
206246
.registered_tables

src/oracle/oracle_impl_tlp_where.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ impl Oracle for TlpWhereOracle {
158158
#[cfg(test)]
159159
mod tests {
160160
use super::*;
161-
use crate::common::LogicalTable;
161+
use crate::common::{LogicalTable, init_available_data_types};
162162
use datafusion::arrow::array::{Array, Int64Array, RecordBatch};
163163
use datafusion::arrow::datatypes::{DataType, Field, Schema};
164164
use datafusion::prelude::SessionContext;
@@ -269,6 +269,7 @@ mod tests {
269269

270270
#[test]
271271
fn tlp_where_generates_expected_query_group_shape() {
272+
init_available_data_types();
272273
let ctx = Arc::new(crate::fuzz_context::GlobalContext::default());
273274
ctx.runtime_context
274275
.registered_tables

tests/integration_test.rs

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,247 @@ fn full_run_logs_expected_queries_for_tlp_where_oracle() -> Result<(), Box<dyn E
283283
Ok(())
284284
}
285285

286+
#[test]
287+
fn full_run_logs_expected_queries_for_tlp_having_oracle() -> Result<(), Box<dyn Error>> {
288+
let log_dir = make_temp_log_dir("integration-tlp-having")?;
289+
let config_path =
290+
generate_default_config_with_oracles(&log_dir, &[ConfiguredOracle::TlpHaving])?;
291+
let run_output = run_fuzzer_once(&config_path)?;
292+
293+
insta::assert_snapshot!(run_output.query_log, @r#"
294+
=== round=1 query=1 oracle=TlpHavingOracle query_seed=310304 ===
295+
--- statement=1 context=TLP-HAVING all groups ---
296+
SELECT t0.col_t0_3_date32, t0.col_t0_1_decimal128
297+
FROM t0
298+
GROUP BY t0.col_t0_3_date32, t0.col_t0_1_decimal128
299+
300+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
301+
SELECT t0.col_t0_3_date32, t0.col_t0_1_decimal128
302+
FROM t0
303+
GROUP BY t0.col_t0_3_date32, t0.col_t0_1_decimal128
304+
HAVING ((INTERVAL '-7 MONS 16 DAYS -0.000000001 SECS' = 1193.6000000000000000000000000000000))
305+
UNION ALL
306+
SELECT t0.col_t0_3_date32, t0.col_t0_1_decimal128
307+
FROM t0
308+
GROUP BY t0.col_t0_3_date32, t0.col_t0_1_decimal128
309+
HAVING NOT ((INTERVAL '-7 MONS 16 DAYS -0.000000001 SECS' = 1193.6000000000000000000000000000000))
310+
UNION ALL
311+
SELECT t0.col_t0_3_date32, t0.col_t0_1_decimal128
312+
FROM t0
313+
GROUP BY t0.col_t0_3_date32, t0.col_t0_1_decimal128
314+
HAVING ((INTERVAL '-7 MONS 16 DAYS -0.000000001 SECS' = 1193.6000000000000000000000000000000)) IS NULL
315+
316+
=== round=1 query=2 oracle=TlpHavingOracle query_seed=310305 ===
317+
--- statement=1 context=TLP-HAVING all groups ---
318+
SELECT t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
319+
FROM t1
320+
GROUP BY t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
321+
322+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
323+
SELECT t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
324+
FROM t1
325+
GROUP BY t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
326+
HAVING (NULL)
327+
UNION ALL
328+
SELECT t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
329+
FROM t1
330+
GROUP BY t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
331+
HAVING NOT (NULL)
332+
UNION ALL
333+
SELECT t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
334+
FROM t1
335+
GROUP BY t1.col_t1_1_int64, t1.col_t1_3_date32, t1.col_t1_2_int64
336+
HAVING (NULL) IS NULL
337+
338+
=== round=1 query=3 oracle=TlpHavingOracle query_seed=310306 ===
339+
--- statement=1 context=TLP-HAVING all groups ---
340+
SELECT t2.col_t2_1_string
341+
FROM t2
342+
WHERE NULL
343+
GROUP BY t2.col_t2_1_string
344+
345+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
346+
SELECT t2.col_t2_1_string
347+
FROM t2
348+
WHERE NULL
349+
GROUP BY t2.col_t2_1_string
350+
HAVING (false)
351+
UNION ALL
352+
SELECT t2.col_t2_1_string
353+
FROM t2
354+
WHERE NULL
355+
GROUP BY t2.col_t2_1_string
356+
HAVING NOT (false)
357+
UNION ALL
358+
SELECT t2.col_t2_1_string
359+
FROM t2
360+
WHERE NULL
361+
GROUP BY t2.col_t2_1_string
362+
HAVING (false) IS NULL
363+
364+
=== round=1 query=5 oracle=TlpHavingOracle query_seed=310308 ===
365+
--- statement=1 context=TLP-HAVING all groups ---
366+
SELECT t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
367+
FROM t0
368+
WHERE (to_char(CAST('2052-04-28' AS DATE), '=B 2v') !~* to_char(INTERVAL '1 MONS -11 DAYS -0.658344865 SECS', to_char(CAST('2056-06-17 08:39:22.305135405 -09:00' AS TIMESTAMP), '9L4l6.-bG6dPLWk-7 ~9azH0^V;7q0S#|%@?MyX"')))
369+
GROUP BY t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
370+
371+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
372+
SELECT t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
373+
FROM t0
374+
WHERE (to_char(CAST('2052-04-28' AS DATE), '=B 2v') !~* to_char(INTERVAL '1 MONS -11 DAYS -0.658344865 SECS', to_char(CAST('2056-06-17 08:39:22.305135405 -09:00' AS TIMESTAMP), '9L4l6.-bG6dPLWk-7 ~9azH0^V;7q0S#|%@?MyX"')))
375+
GROUP BY t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
376+
HAVING (false)
377+
UNION ALL
378+
SELECT t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
379+
FROM t0
380+
WHERE (to_char(CAST('2052-04-28' AS DATE), '=B 2v') !~* to_char(INTERVAL '1 MONS -11 DAYS -0.658344865 SECS', to_char(CAST('2056-06-17 08:39:22.305135405 -09:00' AS TIMESTAMP), '9L4l6.-bG6dPLWk-7 ~9azH0^V;7q0S#|%@?MyX"')))
381+
GROUP BY t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
382+
HAVING NOT (false)
383+
UNION ALL
384+
SELECT t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
385+
FROM t0
386+
WHERE (to_char(CAST('2052-04-28' AS DATE), '=B 2v') !~* to_char(INTERVAL '1 MONS -11 DAYS -0.658344865 SECS', to_char(CAST('2056-06-17 08:39:22.305135405 -09:00' AS TIMESTAMP), '9L4l6.-bG6dPLWk-7 ~9azH0^V;7q0S#|%@?MyX"')))
387+
GROUP BY t0.col_t0_2_float32, t0.col_t0_1_decimal128, t0.col_t0_3_date32
388+
HAVING (false) IS NULL
389+
390+
=== round=2 query=1 oracle=TlpHavingOracle query_seed=311304 ===
391+
--- statement=1 context=TLP-HAVING all groups ---
392+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
393+
FROM t0
394+
WHERE false
395+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
396+
397+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
398+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
399+
FROM t0
400+
WHERE false
401+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
402+
HAVING (false)
403+
UNION ALL
404+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
405+
FROM t0
406+
WHERE false
407+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
408+
HAVING NOT (false)
409+
UNION ALL
410+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
411+
FROM t0
412+
WHERE false
413+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_1_float64, t0.col_t0_2_time64_nanosecond
414+
HAVING (false) IS NULL
415+
416+
=== round=2 query=2 oracle=TlpHavingOracle query_seed=311305 ===
417+
--- statement=1 context=TLP-HAVING all groups ---
418+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
419+
FROM t0
420+
WHERE true
421+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
422+
423+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
424+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
425+
FROM t0
426+
WHERE true
427+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
428+
HAVING (t0.col_t0_3_boolean)
429+
UNION ALL
430+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
431+
FROM t0
432+
WHERE true
433+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
434+
HAVING NOT (t0.col_t0_3_boolean)
435+
UNION ALL
436+
SELECT t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
437+
FROM t0
438+
WHERE true
439+
GROUP BY t0.col_t0_4_interval_month_day_nano, t0.col_t0_3_boolean
440+
HAVING (t0.col_t0_3_boolean) IS NULL
441+
442+
=== round=2 query=3 oracle=TlpHavingOracle query_seed=311306 ===
443+
--- statement=1 context=TLP-HAVING all groups ---
444+
SELECT t2.col_t2_1_float32
445+
FROM t2
446+
WHERE false
447+
GROUP BY t2.col_t2_1_float32
448+
449+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
450+
SELECT t2.col_t2_1_float32
451+
FROM t2
452+
WHERE false
453+
GROUP BY t2.col_t2_1_float32
454+
HAVING (false)
455+
UNION ALL
456+
SELECT t2.col_t2_1_float32
457+
FROM t2
458+
WHERE false
459+
GROUP BY t2.col_t2_1_float32
460+
HAVING NOT (false)
461+
UNION ALL
462+
SELECT t2.col_t2_1_float32
463+
FROM t2
464+
WHERE false
465+
GROUP BY t2.col_t2_1_float32
466+
HAVING (false) IS NULL
467+
468+
=== round=2 query=4 oracle=TlpHavingOracle query_seed=311307 ===
469+
--- statement=1 context=TLP-HAVING all groups ---
470+
SELECT t1.col_t1_3_decimal128
471+
FROM t1
472+
GROUP BY t1.col_t1_3_decimal128
473+
474+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
475+
SELECT t1.col_t1_3_decimal128
476+
FROM t1
477+
GROUP BY t1.col_t1_3_decimal128
478+
HAVING (true)
479+
UNION ALL
480+
SELECT t1.col_t1_3_decimal128
481+
FROM t1
482+
GROUP BY t1.col_t1_3_decimal128
483+
HAVING NOT (true)
484+
UNION ALL
485+
SELECT t1.col_t1_3_decimal128
486+
FROM t1
487+
GROUP BY t1.col_t1_3_decimal128
488+
HAVING (true) IS NULL
489+
490+
=== round=2 query=5 oracle=TlpHavingOracle query_seed=311308 ===
491+
--- statement=1 context=TLP-HAVING all groups ---
492+
SELECT t2.col_t2_1_float32
493+
FROM t2
494+
GROUP BY t2.col_t2_1_float32
495+
496+
--- statement=2 context=TLP-HAVING p UNION ALL NOT p UNION ALL p IS NULL ---
497+
SELECT t2.col_t2_1_float32
498+
FROM t2
499+
GROUP BY t2.col_t2_1_float32
500+
HAVING (('ro' ~ to_char((INTERVAL '4 MONS -11 DAYS -0.961066195 SECS' + INTERVAL '10 MONS -17 DAYS -0.349920790 SECS'), to_char(NULL, '0,YK#Mw'))))
501+
UNION ALL
502+
SELECT t2.col_t2_1_float32
503+
FROM t2
504+
GROUP BY t2.col_t2_1_float32
505+
HAVING NOT (('ro' ~ to_char((INTERVAL '4 MONS -11 DAYS -0.961066195 SECS' + INTERVAL '10 MONS -17 DAYS -0.349920790 SECS'), to_char(NULL, '0,YK#Mw'))))
506+
UNION ALL
507+
SELECT t2.col_t2_1_float32
508+
FROM t2
509+
GROUP BY t2.col_t2_1_float32
510+
HAVING (('ro' ~ to_char((INTERVAL '4 MONS -11 DAYS -0.961066195 SECS' + INTERVAL '10 MONS -17 DAYS -0.349920790 SECS'), to_char(NULL, '0,YK#Mw')))) IS NULL
511+
"#);
512+
insta::assert_snapshot!(run_output.stats_summary, @r#"
513+
============================================================
514+
🎯 DataFusion Fuzzer - Final Statistics
515+
============================================================
516+
📊 Execution Summary:
517+
• Rounds Completed: 2
518+
• Queries Executed: 18
519+
• Query Success Rate: 77.78%
520+
"#);
521+
522+
fs::remove_dir_all(&log_dir)?;
523+
524+
Ok(())
525+
}
526+
286527
struct RunOutput {
287528
query_log: String,
288529
stats_summary: String,

0 commit comments

Comments
 (0)