Skip to content

Commit 3158ce2

Browse files
committed
cleared errors
1 parent b0bffa8 commit 3158ce2

3 files changed

Lines changed: 69 additions & 10 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cli/error_whitelist.rs

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ pub enum ErrorPattern {
88
Contains(&'static str),
99
/// Regex pattern match - checks if the error message matches this regex pattern
1010
RegexMatch(&'static str),
11+
/// Combined condition: query SQL contains a substring AND error contains a substring
12+
QueryAndErrorContains {
13+
query_sub: &'static str,
14+
error_sub: &'static str,
15+
},
1116
}
1217

1318
/// Configuration for error whitelist patterns
@@ -76,9 +81,34 @@ static ERROR_PATTERNS: LazyLock<Vec<ErrorPattern>> = LazyLock::new(|| {
7681
ErrorPattern::Contains("Failed to create view"),
7782
// Null - Null
7883
ErrorPattern::Contains("Cannot get result type for null arithmetic Null - Null"),
79-
ErrorPattern::Contains("regex parse error"),
84+
// Only whitelist regex parse errors when query uses regexp-related function
85+
ErrorPattern::QueryAndErrorContains {
86+
query_sub: "regexp_replace(",
87+
error_sub: "regex parse error",
88+
},
8089
// Invalid JOIN ON expression like '... t1 natural join t2 on true'
8190
ErrorPattern::Contains("SQL error: ParserError(\"Expected: end of statement, found: ON\")"),
91+
// For anti joins, the fuzzer might generate join predicates that referencing
92+
// eliminated columns from anti joins, example (note t0.flag is a valid column
93+
// from t0, but it's eliminated by the first RIGHT ANTI JOIN):
94+
// SELECT *
95+
// FROM t0
96+
// RIGHT ANTI JOIN t1 ON TRUE
97+
// RIGHT ANTI JOIN t2 ON t0.flag;
98+
ErrorPattern::QueryAndErrorContains {
99+
query_sub: "ANTI JOIN",
100+
error_sub: "Schema error: No field named",
101+
},
102+
ErrorPattern::QueryAndErrorContains {
103+
query_sub: "to_date(",
104+
error_sub: "Casting from",
105+
},
106+
ErrorPattern::QueryAndErrorContains {
107+
query_sub: "to_char(",
108+
error_sub: "Cannot cast",
109+
},
110+
ErrorPattern::Contains("Regular expression did not compile"),
111+
ErrorPattern::Contains("to_unixtime function unsupported data type"),
82112
// =========================
83113
// Known Issues
84114
// =========================
@@ -97,10 +127,17 @@ static ERROR_PATTERNS: LazyLock<Vec<ErrorPattern>> = LazyLock::new(|| {
97127
ErrorPattern::Contains("Invalid arithmetic operation: Null % Null"),
98128
// https://github.com/apache/datafusion/issues/17390
99129
ErrorPattern::Contains("Schema error: No field named"),
130+
// https://github.com/apache/datafusion/issues/17472
131+
ErrorPattern::Contains("to_local_time"),
100132
// =========================
101133
// Investigate Later
102134
// =========================
103135
ErrorPattern::Contains("Cast error: Format error"),
136+
ErrorPattern::Contains("to_date"),
137+
// This is function taking a invalid regex, but triggered a confusing optimizer
138+
// error -- I think the best thing to do is provide better error message
139+
ErrorPattern::Contains("Optimizer rule 'simplify_expressions' failed"),
140+
ErrorPattern::Contains("to_timestamp"),
104141
]
105142
});
106143

@@ -117,6 +154,7 @@ static COMPILED_REGEXES: LazyLock<Vec<Option<Regex>>> = LazyLock::new(|| {
117154
None
118155
}
119156
},
157+
ErrorPattern::QueryAndErrorContains { .. } => None,
120158
})
121159
.collect()
122160
});
@@ -128,6 +166,7 @@ static COMPILED_REGEXES: LazyLock<Vec<Option<Regex>>> = LazyLock::new(|| {
128166
///
129167
/// # Arguments
130168
/// * `error_msg` - The error message to check
169+
/// * `query_sql` - The SQL text for the query that produced the error, if available
131170
///
132171
/// # Returns
133172
/// * `true` if the error message matches any whitelisted pattern
@@ -138,13 +177,13 @@ static COMPILED_REGEXES: LazyLock<Vec<Option<Regex>>> = LazyLock::new(|| {
138177
/// use datafusion_fuzzer::cli::error_whitelist::is_error_whitelisted;
139178
///
140179
/// // These should match if the patterns are configured
141-
/// assert!(is_error_whitelisted("Query failed: Arrow error: Divide by zero error"));
142-
/// assert!(is_error_whitelisted("Some context: Arrow error: Divide by zero error here"));
180+
/// assert!(is_error_whitelisted("Query failed: Arrow error: Divide by zero error", None));
181+
/// assert!(is_error_whitelisted("Some context: Arrow error: Divide by zero error here", None));
143182
///
144183
/// // This should not match
145-
/// assert!(!is_error_whitelisted("Unexpected segmentation fault"));
184+
/// assert!(!is_error_whitelisted("Unexpected segmentation fault", None));
146185
/// ```
147-
pub fn is_error_whitelisted(error_msg: &str) -> bool {
186+
pub fn is_error_whitelisted(error_msg: &str, query_sql: Option<&str>) -> bool {
148187
for (i, pattern) in ERROR_PATTERNS.iter().enumerate() {
149188
match pattern {
150189
ErrorPattern::Contains(exact_str) => {
@@ -159,6 +198,16 @@ pub fn is_error_whitelisted(error_msg: &str) -> bool {
159198
}
160199
}
161200
}
201+
ErrorPattern::QueryAndErrorContains {
202+
query_sub,
203+
error_sub,
204+
} => {
205+
if let Some(sql) = query_sql {
206+
if sql.contains(query_sub) && error_msg.contains(error_sub) {
207+
return true;
208+
}
209+
}
210+
}
162211
}
163212
}
164213

@@ -172,6 +221,15 @@ pub fn get_configured_patterns() -> Vec<String> {
172221
.map(|pattern| match pattern {
173222
ErrorPattern::Contains(s) => format!("Exact: {}", s),
174223
ErrorPattern::RegexMatch(s) => format!("Regex: {}", s),
224+
ErrorPattern::QueryAndErrorContains {
225+
query_sub,
226+
error_sub,
227+
} => {
228+
format!(
229+
"QueryAndError: query contains '{}' AND error contains '{}'",
230+
query_sub, error_sub
231+
)
232+
}
175233
})
176234
.collect()
177235
}

src/cli/runner.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ async fn generate_views_for_round(seed: u64, ctx: &Arc<GlobalContext>) -> Result
135135
Ok(sql) => sql,
136136
Err(e) => {
137137
let err_msg = format!("Failed to generate view SQL: {}", e);
138-
if !is_error_whitelisted(&err_msg) {
138+
if !is_error_whitelisted(&err_msg, None) {
139139
error!(err_msg);
140140
}
141141
continue; // Skip this view and try the next one
@@ -226,7 +226,7 @@ async fn execute_oracle_test(seed: u64, ctx: &Arc<GlobalContext>) -> bool {
226226
Ok(group) => group,
227227
Err(e) => {
228228
let err_msg = format!("Failed to generate query group: {}", e);
229-
if !is_error_whitelisted(&err_msg) {
229+
if !is_error_whitelisted(&err_msg, None) {
230230
error!(err_msg)
231231
}
232232
return false;
@@ -307,7 +307,7 @@ async fn execute_single_query(
307307
// Check if error is whitelisted using the dedicated error_whitelist module
308308
if let Err(ref e) = outcome.result {
309309
let error_msg = e.to_string();
310-
if !error_whitelist::is_error_whitelisted(&error_msg) {
310+
if !error_whitelist::is_error_whitelisted(&error_msg, Some(&query_context.query)) {
311311
// Log non-whitelisted errors
312312
error!("Non-whitelisted error encountered: {}", error_msg);
313313
error!("Query that caused the error: {}", query_context.query);

0 commit comments

Comments
 (0)