Skip to content

Commit 38a8e28

Browse files
claudespiceClaudeclaude
authored
Improve Databricks error message when dataset has no columns (spiceai#9928)
* Improve Databricks error message when dataset has no columns When a Databricks dataset returns no columns (empty data_array or missing result), the error was non-informative ("result.data_array"). This change: - Adds a NoColumnsInDataset error variant with a clear, actionable message including the dataset name - Improves the UnableToRetrieveSchema error to include the dataset name and guidance to verify the table exists - Passes the table reference name through schema_from_json for context - Adds tests for the no-columns error path, clustering-only metadata, and verifies error messages are actionable Fixes spiceai#9859 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: format code with cargo fmt Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude <claude@spices-MacBook.localdomain> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b53b09b commit 38a8e28

1 file changed

Lines changed: 136 additions & 25 deletions

File tree

crates/data_components/src/databricks/sql_warehouse.rs

Lines changed: 136 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ pub enum Error {
6565
#[snafu(display("Unable to retrieve schema: {reason}"))]
6666
UnableToRetrieveSchema { reason: String },
6767

68+
#[snafu(display(
69+
"The dataset '{dataset_name}' in Databricks has no columns. Verify the table exists and has at least one column."
70+
))]
71+
NoColumnsInDataset { dataset_name: String },
72+
6873
#[snafu(display(
6974
"Warehouse is not ready (state: '{state}'). Verify the warehouse state and try again later."
7075
))]
@@ -235,7 +240,7 @@ impl SqlWarehouseApi {
235240
let payload = self.create_schema_payload(table)?;
236241
let response = self.execute_sql_statement(&token, &payload).await?;
237242
let response = self.wait_for_statement_completion(&token, response).await?;
238-
schema_from_json(&response)
243+
schema_from_json(&response, &table.to_string())
239244
}
240245

241246
fn create_schema_payload(&self, table: &TableReference) -> Result<Value, Error> {
@@ -548,7 +553,7 @@ struct ExternalLink {
548553
next_chunk_internal_link: Option<String>,
549554
}
550555

551-
fn schema_from_json(json_value: &Value) -> Result<SchemaRef, Error> {
556+
fn schema_from_json(json_value: &Value, dataset_name: &str) -> Result<SchemaRef, Error> {
552557
tracing::trace!("Parsing schema definition from Databricks JSON response: {json_value}");
553558

554559
SqlWarehouseApi::verify_response_status(json_value)?;
@@ -558,7 +563,10 @@ fn schema_from_json(json_value: &Value) -> Result<SchemaRef, Error> {
558563
.and_then(|r| r.get("data_array"))
559564
.and_then(|d| d.as_array())
560565
.ok_or_else(|| Error::UnableToRetrieveSchema {
561-
reason: "result.data_array".to_string(),
566+
reason: format!(
567+
"The response for dataset '{dataset_name}' is missing 'result.data_array'. \
568+
Verify the table exists and the SQL warehouse is responding correctly."
569+
),
562570
})?;
563571

564572
let mut fields = Vec::new();
@@ -612,6 +620,12 @@ fn schema_from_json(json_value: &Value) -> Result<SchemaRef, Error> {
612620
fields.push(field);
613621
}
614622

623+
if fields.is_empty() {
624+
return Err(Error::NoColumnsInDataset {
625+
dataset_name: dataset_name.to_string(),
626+
});
627+
}
628+
615629
Ok(Arc::new(Schema::new(fields)))
616630
}
617631

@@ -807,7 +821,7 @@ mod tests {
807821
["amount", "double", "NO"]
808822
]));
809823

810-
let schema = schema_from_json(&response).expect("should parse schema");
824+
let schema = schema_from_json(&response, "test_table").expect("should parse schema");
811825
assert_eq!(schema.fields().len(), 3);
812826

813827
assert_eq!(schema.field(0).name(), "id");
@@ -836,7 +850,7 @@ mod tests {
836850
["col_decimal", "decimal(10,2)", "YES"]
837851
]));
838852

839-
let schema = schema_from_json(&response).expect("should parse schema");
853+
let schema = schema_from_json(&response, "test_table").expect("should parse schema");
840854
assert_eq!(schema.fields().len(), 8);
841855
assert_eq!(schema.field(0).data_type(), &DataType::Int64);
842856
assert_eq!(schema.field(1).data_type(), &DataType::Int16);
@@ -855,8 +869,21 @@ mod tests {
855869
fn test_schema_from_json_empty_table() {
856870
let response = make_schema_response(&json!([]));
857871

858-
let schema = schema_from_json(&response).expect("should parse empty schema");
859-
assert_eq!(schema.fields().len(), 0);
872+
let err = schema_from_json(&response, "my_catalog.my_schema.my_table")
873+
.expect_err("should fail on empty schema");
874+
assert!(
875+
matches!(&err, Error::NoColumnsInDataset { dataset_name } if dataset_name == "my_catalog.my_schema.my_table"),
876+
"unexpected error: {err}"
877+
);
878+
let msg = err.to_string();
879+
assert!(
880+
msg.contains("my_catalog.my_schema.my_table"),
881+
"error should contain dataset name: {msg}"
882+
);
883+
assert!(
884+
msg.contains("has no columns"),
885+
"error should mention no columns: {msg}"
886+
);
860887
}
861888

862889
#[test]
@@ -868,7 +895,8 @@ mod tests {
868895
["# col_name", "data_type", "comment"]
869896
]));
870897

871-
let schema = schema_from_json(&response).expect("should stop at clustering marker");
898+
let schema =
899+
schema_from_json(&response, "test_table").expect("should stop at clustering marker");
872900
assert_eq!(schema.fields().len(), 2);
873901
assert_eq!(schema.field(0).name(), "id");
874902
assert_eq!(schema.field(1).name(), "name");
@@ -880,9 +908,10 @@ mod tests {
880908
"status": { "state": "SUCCEEDED" }
881909
});
882910

883-
let err = schema_from_json(&response).expect_err("should fail without result");
911+
let err =
912+
schema_from_json(&response, "test_table").expect_err("should fail without result");
884913
assert!(
885-
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason == "result.data_array"),
914+
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("result.data_array") && reason.contains("test_table")),
886915
"unexpected error: {err}"
887916
);
888917
}
@@ -894,9 +923,10 @@ mod tests {
894923
"result": {}
895924
});
896925

897-
let err = schema_from_json(&response).expect_err("should fail without data_array");
926+
let err =
927+
schema_from_json(&response, "test_table").expect_err("should fail without data_array");
898928
assert!(
899-
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason == "result.data_array"),
929+
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("result.data_array") && reason.contains("test_table")),
900930
"unexpected error: {err}"
901931
);
902932
}
@@ -908,9 +938,10 @@ mod tests {
908938
"result": { "data_array": "not_an_array" }
909939
});
910940

911-
let err = schema_from_json(&response).expect_err("should fail when data_array is string");
941+
let err = schema_from_json(&response, "test_table")
942+
.expect_err("should fail when data_array is string");
912943
assert!(
913-
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason == "result.data_array"),
944+
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("result.data_array") && reason.contains("test_table")),
914945
"unexpected error: {err}"
915946
);
916947
}
@@ -919,7 +950,8 @@ mod tests {
919950
fn test_schema_from_json_row_not_array() {
920951
let response = make_schema_response(&json!(["not_an_array"]));
921952

922-
let err = schema_from_json(&response).expect_err("should fail on non-array row");
953+
let err =
954+
schema_from_json(&response, "test_table").expect_err("should fail on non-array row");
923955
assert!(
924956
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("is not an array")),
925957
"unexpected error: {err}"
@@ -930,7 +962,7 @@ mod tests {
930962
fn test_schema_from_json_row_too_short() {
931963
let response = make_schema_response(&json!([["id", "int"]]));
932964

933-
let err = schema_from_json(&response).expect_err("should fail on short row");
965+
let err = schema_from_json(&response, "test_table").expect_err("should fail on short row");
934966
assert!(
935967
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("lacks column_name")),
936968
"unexpected error: {err}"
@@ -941,7 +973,8 @@ mod tests {
941973
fn test_schema_from_json_column_name_not_string() {
942974
let response = make_schema_response(&json!([[123, "int", "NO"]]));
943975

944-
let err = schema_from_json(&response).expect_err("should fail on non-string col name");
976+
let err = schema_from_json(&response, "test_table")
977+
.expect_err("should fail on non-string col name");
945978
assert!(
946979
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("[0] is not a string")),
947980
"unexpected error: {err}"
@@ -952,7 +985,8 @@ mod tests {
952985
fn test_schema_from_json_data_type_not_string() {
953986
let response = make_schema_response(&json!([["id", 42, "NO"]]));
954987

955-
let err = schema_from_json(&response).expect_err("should fail on non-string data type");
988+
let err = schema_from_json(&response, "test_table")
989+
.expect_err("should fail on non-string data type");
956990
assert!(
957991
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("[1] is not a string")),
958992
"unexpected error: {err}"
@@ -963,7 +997,8 @@ mod tests {
963997
fn test_schema_from_json_nullable_not_string() {
964998
let response = make_schema_response(&json!([["id", "int", true]]));
965999

966-
let err = schema_from_json(&response).expect_err("should fail on non-string nullable");
1000+
let err = schema_from_json(&response, "test_table")
1001+
.expect_err("should fail on non-string nullable");
9671002
assert!(
9681003
matches!(&err, Error::UnableToRetrieveSchema { reason } if reason.contains("[2] is not a string")),
9691004
"unexpected error: {err}"
@@ -982,7 +1017,8 @@ mod tests {
9821017
["g", "int", "anything_else"]
9831018
]));
9841019

985-
let schema = schema_from_json(&response).expect("should parse nullable variations");
1020+
let schema =
1021+
schema_from_json(&response, "test_table").expect("should parse nullable variations");
9861022
assert!(schema.field(0).is_nullable());
9871023
assert!(schema.field(1).is_nullable());
9881024
assert!(schema.field(2).is_nullable());
@@ -1003,7 +1039,8 @@ mod tests {
10031039
"result": { "data_array": [] }
10041040
});
10051041

1006-
let err = schema_from_json(&response).expect_err("should fail on FAILED status");
1042+
let err =
1043+
schema_from_json(&response, "test_table").expect_err("should fail on FAILED status");
10071044
assert!(
10081045
matches!(&err, Error::QueryFailure { message } if message.contains("table not found")),
10091046
"unexpected error: {err}"
@@ -1017,7 +1054,8 @@ mod tests {
10171054
"statement_id": "test-stmt-id"
10181055
});
10191056

1020-
let err = schema_from_json(&response).expect_err("should fail on PENDING status");
1057+
let err =
1058+
schema_from_json(&response, "test_table").expect_err("should fail on PENDING status");
10211059
assert!(
10221060
matches!(&err, Error::InvalidWarehouseState { .. }),
10231061
"unexpected error: {err}"
@@ -1028,7 +1066,8 @@ mod tests {
10281066
fn test_schema_from_json_unsupported_type() {
10291067
let response = make_schema_response(&json!([["col", "TOTALLY_FAKE_TYPE", "NO"]]));
10301068

1031-
let err = schema_from_json(&response).expect_err("should fail on unsupported type");
1069+
let err =
1070+
schema_from_json(&response, "test_table").expect_err("should fail on unsupported type");
10321071
assert!(
10331072
matches!(&err, Error::ParseError { .. }),
10341073
"unexpected error: {err}"
@@ -1041,7 +1080,8 @@ mod tests {
10411080
let response =
10421081
make_schema_response(&json!([["id", "int", "NO", "extra_col", "another_extra"]]));
10431082

1044-
let schema = schema_from_json(&response).expect("should parse with extra columns");
1083+
let schema =
1084+
schema_from_json(&response, "test_table").expect("should parse with extra columns");
10451085
assert_eq!(schema.fields().len(), 1);
10461086
assert_eq!(schema.field(0).name(), "id");
10471087
}
@@ -1052,7 +1092,8 @@ mod tests {
10521092
"result": { "data_array": [["id", "int", "NO"]] }
10531093
});
10541094

1055-
let err = schema_from_json(&response).expect_err("should fail without status");
1095+
let err =
1096+
schema_from_json(&response, "test_table").expect_err("should fail without status");
10561097
assert!(
10571098
matches!(&err, Error::MissingJsonField { field } if field == "status.state"),
10581099
"unexpected error: {err}"
@@ -1357,4 +1398,74 @@ mod tests {
13571398
"unexpected error: {err}"
13581399
);
13591400
}
1401+
1402+
#[test]
1403+
fn test_schema_from_json_no_columns_error_includes_dataset_name() {
1404+
let response = make_schema_response(&json!([]));
1405+
1406+
let err = schema_from_json(&response, "my_catalog.my_schema.orders")
1407+
.expect_err("should fail when no columns");
1408+
let msg = err.to_string();
1409+
assert!(
1410+
msg.contains("my_catalog.my_schema.orders"),
1411+
"error should contain the full dataset name: {msg}"
1412+
);
1413+
assert!(
1414+
msg.contains("has no columns"),
1415+
"error should mention 'has no columns': {msg}"
1416+
);
1417+
assert!(
1418+
msg.contains("Verify the table exists"),
1419+
"error should suggest verifying table existence: {msg}"
1420+
);
1421+
}
1422+
1423+
#[test]
1424+
fn test_schema_from_json_only_clustering_metadata_returns_no_columns_error() {
1425+
// When the data_array only contains clustering metadata markers,
1426+
// no real columns are parsed and we should get a NoColumnsInDataset error.
1427+
let response = make_schema_response(&json!([
1428+
["# Clustering Information", "", ""],
1429+
["# col_name", "data_type", "comment"]
1430+
]));
1431+
1432+
let err = schema_from_json(&response, "test_table")
1433+
.expect_err("should fail when only clustering metadata present");
1434+
assert!(
1435+
matches!(&err, Error::NoColumnsInDataset { dataset_name } if dataset_name == "test_table"),
1436+
"unexpected error: {err}"
1437+
);
1438+
}
1439+
1440+
#[test]
1441+
fn test_schema_from_json_missing_result_error_is_actionable() {
1442+
let response = json!({
1443+
"status": { "state": "SUCCEEDED" }
1444+
});
1445+
1446+
let err = schema_from_json(&response, "catalog.schema.my_orders")
1447+
.expect_err("should fail without result");
1448+
let msg = err.to_string();
1449+
assert!(
1450+
msg.contains("catalog.schema.my_orders"),
1451+
"error should contain dataset name: {msg}"
1452+
);
1453+
assert!(
1454+
msg.contains("Verify the table exists"),
1455+
"error should suggest verifying table: {msg}"
1456+
);
1457+
}
1458+
1459+
#[test]
1460+
fn test_schema_from_json_happy_path_with_dataset_name() {
1461+
// Ensure the dataset_name parameter doesn't affect successful parsing.
1462+
let response =
1463+
make_schema_response(&json!([["id", "int", "NO"], ["name", "string", "YES"]]));
1464+
1465+
let schema = schema_from_json(&response, "catalog.schema.users")
1466+
.expect("should parse schema successfully");
1467+
assert_eq!(schema.fields().len(), 2);
1468+
assert_eq!(schema.field(0).name(), "id");
1469+
assert_eq!(schema.field(1).name(), "name");
1470+
}
13601471
}

0 commit comments

Comments
 (0)