Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/cli/error_whitelist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,24 @@ static ERROR_PATTERNS: LazyLock<Vec<ErrorPattern>> = LazyLock::new(|| {
query_sub: "to_date(",
error_sub: "Casting from",
},
ErrorPattern::QueryAndErrorContains {
query_sub: "to_date(",
error_sub: "Error parsing timestamp from",
},
ErrorPattern::QueryAndErrorContains {
query_sub: "to_char(",
error_sub: "Cannot cast",
},
ErrorPattern::Contains("Regular expression did not compile"),
ErrorPattern::Contains("to_unixtime function unsupported data type"),
ErrorPattern::QueryAndErrorContains {
query_sub: "to_unixtime(",
error_sub: "Error parsing timestamp from",
},
ErrorPattern::QueryAndErrorContains {
query_sub: "to_timestamp",
error_sub: "Error parsing timestamp from",
},
// =========================
// Known Issues
// =========================
Expand Down Expand Up @@ -233,3 +245,32 @@ pub fn get_configured_patterns() -> Vec<String> {
})
.collect()
}

#[cfg(test)]
mod tests {
use super::is_error_whitelisted;

#[test]
fn whitelists_timestamp_parse_errors_for_to_timestamp_queries() {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow-up idea: I think an alternative testing strategy is: running this query on datafusion, and assert the expected error is returned.
Once DF is updated with different error messages, we can catch them from UTs and directly update the whitelist, which can be easier to investigate comparing to the fuzzer oracle inconsistencies.

let error = "Query execution failed: Execution error: Error parsing timestamp from 'abc' using format 'fmt': input contains invalid characters";
let query = "SELECT to_timestamp_seconds(66, 'fmt')";

assert!(is_error_whitelisted(error, Some(query)));
}

#[test]
fn does_not_whitelist_timestamp_parse_errors_without_to_timestamp_query() {
let error = "Query execution failed: Execution error: Error parsing timestamp from 'abc' using format 'fmt': input contains invalid characters";
let query = "SELECT 1";

assert!(!is_error_whitelisted(error, Some(query)));
}

#[test]
fn whitelists_timestamp_parse_errors_for_to_date_queries() {
let error = "Query execution failed: Execution error: Error parsing timestamp from 'abc' using format 'fmt': input contains invalid characters";
let query = "SELECT to_date('abc', 'fmt')";

assert!(is_error_whitelisted(error, Some(query)));
}
}
13 changes: 8 additions & 5 deletions src/query_generator/expr_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1092,16 +1092,19 @@ impl BaseExprWithInfo for ToUnixtimeExpr {
ExprWrapper {
expr: BaseExpr::ToUnixtime,
return_type: return_types,
inferred_child_signature: vec![vec![
TypeGroup::OneOf(vec![
inferred_child_signature: vec![
vec![TypeGroup::OneOf(vec![
FuzzerDataType::String.to_datafusion_type(),
FuzzerDataType::Date32.to_datafusion_type(),
FuzzerDataType::Timestamp.to_datafusion_type(),
FuzzerDataType::Float32.to_datafusion_type(),
FuzzerDataType::Float64.to_datafusion_type(),
]),
TypeGroup::OneOf(vec![FuzzerDataType::String.to_datafusion_type()]),
]],
])],
vec![
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good idea to try to generate valid signature at this level.

If we want to inject more randomness to generate invalid exprs, we can do that at the expr-generation layer.

TypeGroup::Fixed(FuzzerDataType::String.to_datafusion_type()),
TypeGroup::Fixed(FuzzerDataType::String.to_datafusion_type()),
],
],
}
}

Expand Down
Loading