Skip to content

Commit f1483ab

Browse files
committed
fix(query): handle malformed UDF script metadata
1 parent 66a717a commit f1483ab

2 files changed

Lines changed: 66 additions & 13 deletions

File tree

src/query/script_udf_support/src/transform_udf_script.rs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ impl TransformUdfScript {
609609
imports_stage_info,
610610
..
611611
}) => {
612-
let mut dependencies = Self::extract_deps(&code_str);
612+
let mut dependencies = Self::extract_deps(&code_str)?;
613613
dependencies.extend_from_slice(packages.as_slice());
614614

615615
let stage_fingerprints = Self::collect_stage_fingerprints(imports_stage_info)?;
@@ -677,7 +677,7 @@ impl TransformUdfScript {
677677
Ok(script_runtimes)
678678
}
679679

680-
fn extract_deps(script: &str) -> Vec<String> {
680+
fn extract_deps(script: &str) -> Result<Vec<String>> {
681681
let mut ss = String::new();
682682
let mut meta_start = false;
683683
for line in script.lines() {
@@ -693,19 +693,23 @@ impl TransformUdfScript {
693693
}
694694
}
695695

696-
let parsed = ss.parse::<toml::Value>().unwrap();
696+
let parsed = ss.parse::<toml::Value>().map_err(|err| {
697+
ErrorCode::SemanticError(format!("Failed to parse UDF script metadata as TOML: {err}"))
698+
})?;
697699

698700
if parsed.get("dependencies").is_none() {
699-
return Vec::new();
701+
return Ok(Vec::new());
700702
}
701703

702-
if let Some(deps) = parsed["dependencies"].as_array() {
703-
deps.iter()
704+
let deps = if let Some(deps) = parsed["dependencies"].as_array() {
705+
deps
706+
.iter()
704707
.filter_map(|v| v.as_str().map(|s| s.to_string()))
705708
.collect()
706709
} else {
707710
Vec::new()
708-
}
711+
};
712+
Ok(deps)
709713
}
710714

711715
fn prepare_block_entries(
@@ -1178,3 +1182,25 @@ mod venv {
11781182
pub static PY_VENV_CACHE: LazyLock<RwLock<LruCache<PyVenvKeyEntry, PyVenvCacheEntry>>> =
11791183
LazyLock::new(|| RwLock::new(LruCache::with_items_capacity(64)));
11801184
}
1185+
1186+
#[cfg(test)]
1187+
mod tests {
1188+
use super::*;
1189+
1190+
#[test]
1191+
fn test_extract_deps_returns_error_for_malformed_toml() {
1192+
let err = TransformUdfScript::extract_deps(
1193+
r#"# /// script
1194+
# dependencies = [
1195+
# ///
1196+
"#,
1197+
)
1198+
.expect_err("malformed UDF script metadata should return an error");
1199+
1200+
assert_eq!(err.code(), ErrorCode::SEMANTIC_ERROR);
1201+
assert!(
1202+
err.message()
1203+
.contains("Failed to parse UDF script metadata as TOML")
1204+
);
1205+
}
1206+
}

src/query/sql/src/planner/semantic/type_check/udf.rs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ fn escape_python_double_quoted(value: &str) -> String {
264264
value.replace('\\', "\\\\").replace('"', "\\\"")
265265
}
266266

267-
fn extract_script_metadata_deps(script: &str) -> Vec<String> {
267+
fn extract_script_metadata_deps(script: &str) -> Result<Vec<String>> {
268268
let mut ss = String::new();
269269
let mut meta_start = false;
270270
for line in script.lines() {
@@ -280,19 +280,24 @@ fn extract_script_metadata_deps(script: &str) -> Vec<String> {
280280
}
281281
}
282282

283-
let parsed = ss.parse::<toml::Value>().unwrap();
283+
let parsed = ss.parse::<toml::Value>().map_err(|err| {
284+
ErrorCode::SemanticError(format!(
285+
"Failed to parse UDF script metadata as TOML: {err}"
286+
))
287+
})?;
284288

285289
if parsed.get("dependencies").is_none() {
286-
return Vec::new();
290+
return Ok(Vec::new());
287291
}
288292

289-
if let Some(deps) = parsed["dependencies"].as_array() {
293+
let deps = if let Some(deps) = parsed["dependencies"].as_array() {
290294
deps.iter()
291295
.filter_map(|value| value.as_str().map(|item| item.to_string()))
292296
.collect()
293297
} else {
294298
Vec::new()
295-
}
299+
};
300+
Ok(deps)
296301
}
297302

298303
fn unique_heredoc_marker(base: &str, contents: &[&str]) -> String {
@@ -607,7 +612,7 @@ impl UdfAdapter for FullTypeCheckAdapter {
607612
ErrorCode::SemanticError(format!("Failed to parse UDF code as utf-8: {err}"))
608613
})?;
609614
let import_assets = self.build_udf_cloud_imports(&imports)?;
610-
let mut merged_packages = extract_script_metadata_deps(&resolved_code);
615+
let mut merged_packages = extract_script_metadata_deps(&resolved_code)?;
611616
merged_packages.extend_from_slice(&packages);
612617
let input_types = arg_types.iter().map(udf_type_string).collect::<Vec<_>>();
613618
let result_type = udf_type_string(&return_type);
@@ -1220,3 +1225,25 @@ where A: UdfAdapter
12201225
})
12211226
}
12221227
}
1228+
1229+
#[cfg(test)]
1230+
mod tests {
1231+
use super::*;
1232+
1233+
#[test]
1234+
fn test_extract_script_metadata_deps_returns_error_for_malformed_toml() {
1235+
let err = extract_script_metadata_deps(
1236+
r#"# /// script
1237+
# dependencies = [
1238+
# ///
1239+
"#,
1240+
)
1241+
.expect_err("malformed UDF script metadata should return an error");
1242+
1243+
assert_eq!(err.code(), ErrorCode::SEMANTIC_ERROR);
1244+
assert!(
1245+
err.message()
1246+
.contains("Failed to parse UDF script metadata as TOML")
1247+
);
1248+
}
1249+
}

0 commit comments

Comments
 (0)