Skip to content
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
- Improve error reporting of malformed column data types in configuration and introspection

- Support comments on columns. This involves a backwards compatible configuration change. New cli and connector versions will be able to read the configuration that includes comments, but older connectors and cli versions will error out.
- Improve error reporting of malformed column data types in configuration and introspection

## [1.1.0] - 2025-02-07

- ClickHouse 25.1 serializes booleans as 1/0 when using `FORMAT JSON`. We only used that in introspection. Change it to use `toJSONString` instead.
Expand Down
28 changes: 20 additions & 8 deletions crates/common/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::{
clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery},
config_file::{
MaybeClickhouseDataType, ParameterizedQueryConfigFile, ParameterizedQueryExposedAs,
PrimaryKey, ReturnType, ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME,
ColumnDefinition, MaybeClickhouseDataType, ParameterizedQueryConfigFile,
ParameterizedQueryExposedAs, PrimaryKey, ReturnType, ServerConfigFile, TableConfigFile,
CONFIG_FILE_NAME,
},
format::display_period_separated,
};
Expand All @@ -29,7 +30,13 @@ pub struct ServerConfig {
#[derive(Debug, Clone)]
pub struct TableType {
pub comment: Option<String>,
pub columns: BTreeMap<FieldName, ClickHouseDataType>,
pub columns: BTreeMap<FieldName, ColumnType>,
}

#[derive(Debug, Clone)]
pub struct ColumnType {
pub comment: Option<String>,
pub data_type: ClickHouseDataType,
}

#[derive(Debug, Default, Clone)]
Expand Down Expand Up @@ -314,7 +321,7 @@ fn validate_and_parse_return_type(
config: &ServerConfigFile,
file_path: &Path,
node_path: &[&str],
) -> Result<Option<BTreeMap<FieldName, ClickHouseDataType>>, ConfigurationError> {
) -> Result<Option<BTreeMap<FieldName, ColumnType>>, ConfigurationError> {
let get_node_path = |extra_segments: &[&str]| {
node_path
.iter()
Expand Down Expand Up @@ -369,14 +376,19 @@ fn validate_and_parse_return_type(
ReturnType::Definition { columns } => Ok(Some(
columns
.iter()
.map(|(field_alias, data_type)| {
let node_path = get_node_path(&["columns", field_alias.inner()]);
.map(|(field_alias, field_info)| {
let (data_type, comment, node_path) = match field_info {
ColumnDefinition::ShortHand(data_type) => (data_type, &None, get_node_path(&["columns", field_alias.inner()])),
ColumnDefinition::LongForm { data_type, comment } => (data_type, comment, get_node_path(&["columns", field_alias.inner(), "data_type"])),
};
// set empty comment to None
let comment = comment.as_ref().filter(|comment| !comment.is_empty());
match data_type {
MaybeClickhouseDataType::Valid(data_type) => Ok((field_alias.to_owned(), data_type.to_owned() )),
MaybeClickhouseDataType::Valid(data_type) => Ok((field_alias.to_owned(), ColumnType { data_type: data_type.to_owned(), comment: comment.cloned() })),
MaybeClickhouseDataType::Invalid(malformed_string) => Err(ConfigurationError::ValidateError { file_path: file_path.into(), node_path, message: format!("Invalid data type \"{malformed_string}\"") }),
}
})
.collect::<Result<BTreeMap<FieldName, ClickHouseDataType>, ConfigurationError>>()?,
.collect::<Result<BTreeMap<FieldName, ColumnType>, ConfigurationError>>()?,
)),
}
}
13 changes: 12 additions & 1 deletion crates/common/src/config_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pub enum ReturnType {
/// A custom return type definition
/// The keys are column names, the values are parsable clichouse datatypes
Definition {
columns: BTreeMap<FieldName, MaybeClickhouseDataType>,
columns: BTreeMap<FieldName, ColumnDefinition>,
},
/// the same as the return type for another table
TableReference {
Expand All @@ -81,6 +81,17 @@ pub enum ReturnType {
},
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(untagged, rename_all = "snake_case")]
/// Either a data type, or an object with data_type and optional comment
pub enum ColumnDefinition {
ShortHand(MaybeClickhouseDataType),
LongForm {
data_type: MaybeClickhouseDataType,
comment: Option<String>,
},
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(untagged, rename_all = "snake_case")]
pub enum MaybeClickhouseDataType {
Expand Down
4 changes: 2 additions & 2 deletions crates/common/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub fn schema_response(configuration: &ServerConfig) -> models::SchemaResponse {
let mut fields = vec![];
for (column_alias, column_type) in &table_type.columns {
let type_definition = ClickHouseTypeDefinition::from_table_column(
column_type,
&column_type.data_type,
column_alias,
type_name,
&configuration.namespace_separator,
Expand All @@ -44,7 +44,7 @@ pub fn schema_response(configuration: &ServerConfig) -> models::SchemaResponse {
fields.push((
column_alias.to_owned(),
models::ObjectField {
description: None,
description: column_type.comment.to_owned(),
r#type: type_definition.type_identifier(),
arguments: BTreeMap::new(),
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ definitions:
columns:
type: object
additionalProperties:
type: string
$ref: "#/definitions/ColumnDefinition"
- description: the same as the return type for another table
type: object
required:
Expand All @@ -107,6 +107,20 @@ definitions:
query_name:
description: "the table alias must match a key in `tables`, and the query must return the same type as that table alternatively, the alias may reference another parameterized query which has a return type definition,"
type: string
ColumnDefinition:
description: "Either a data type, or an object with data_type and optional comment"
anyOf:
- type: string
- type: object
required:
- data_type
properties:
data_type:
type: string
comment:
type:
- string
- "null"
ParameterizedQueryConfigFile:
type: object
required:
Expand Down
1 change: 1 addition & 0 deletions crates/ndc-clickhouse-cli/src/database_introspection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub struct ColumnInfo {
#[allow(dead_code)]
pub is_nullable: bool,
pub is_in_primary_key: bool,
pub column_comment: String,
}

#[derive(Debug, Deserialize)]
Expand Down
5 changes: 3 additions & 2 deletions crates/ndc-clickhouse-cli/src/database_introspection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ SELECT toJSONString(
c.columns
)
),
'Array(Tuple(table_name String, table_schema String, table_catalog String, table_comment Nullable(String), primary_key Nullable(String), table_type String, view_definition String, columns Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))))'
'Array(Tuple(table_name String, table_schema String, table_catalog String, table_comment Nullable(String), primary_key Nullable(String), table_type String, view_definition String, columns Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool, column_comment String))))'
)
)
FROM INFORMATION_SCHEMA.TABLES AS t
Expand All @@ -29,7 +29,8 @@ FROM INFORMATION_SCHEMA.TABLES AS t
c.column_name,
c.data_type,
toBool(c.is_nullable),
toBool(sc.is_in_primary_key)
toBool(sc.is_in_primary_key),
c.column_comment
)
) AS "columns"
FROM INFORMATION_SCHEMA.COLUMNS AS c
Expand Down
39 changes: 33 additions & 6 deletions crates/ndc-clickhouse-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use common::{
},
config::ConnectionConfig,
config_file::{
MaybeClickhouseDataType, ParameterizedQueryConfigFile, PrimaryKey, ReturnType,
ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME,
ColumnDefinition, MaybeClickhouseDataType, ParameterizedQueryConfigFile, PrimaryKey,
ReturnType, ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME,
},
};
use database_introspection::{introspect_database, TableInfo};
Expand Down Expand Up @@ -145,14 +145,20 @@ async fn main() -> Result<(), Box<dyn Error>> {
};

let introspection = introspect_database(&connection).await?;
println!("Database introspected successfully!");
let config = update_tables_config(&context_path, &introspection).await?;
println!("Configuration updated successfully!");
validate_table_config(&context_path, &config).await?;
println!("Configuration validated successfully!");
}
Command::Validate {} => {
let file_path = context_path.join(CONFIG_FILE_NAME);
let config = read_config_file(&file_path).await?;
if let Some(config) = config {
validate_table_config(&context_path, &config).await?;
println!("Configuration validated successfully!");
} else {
println!("Configuration file not found")
}
}
Command::Watch {} => {
Expand Down Expand Up @@ -331,7 +337,14 @@ async fn validate_table_config(
}
}
ReturnType::Definition { columns } => {
for (field_name, data_type) in columns {
for (field_name, column) in columns {
let data_type = match &column {
ColumnDefinition::ShortHand(data_type)
| ColumnDefinition::LongForm {
data_type,
comment: _,
} => data_type,
};
match data_type {
MaybeClickhouseDataType::Valid(_data_type) => {
// data type string is valid
Expand Down Expand Up @@ -408,7 +421,14 @@ async fn validate_table_config(
}
}
ReturnType::Definition { columns } => {
for (field_name, data_type) in columns {
for (field_name, column) in columns {
let data_type = match &column {
ColumnDefinition::ShortHand(data_type)
| ColumnDefinition::LongForm {
data_type,
comment: _,
} => data_type,
};
match data_type {
MaybeClickhouseDataType::Valid(_data_type) => {
// data type string is valid
Expand Down Expand Up @@ -567,14 +587,21 @@ fn get_table_return_type(
/// If that happens, we want to write out the new configuration including the invalid data type,
/// and rely on the validation step to let the user know something is wrong
/// This allows user to see exactly where the invalid type comes from
fn get_return_type_columns(table: &TableInfo) -> BTreeMap<FieldName, MaybeClickhouseDataType> {
fn get_return_type_columns(table: &TableInfo) -> BTreeMap<FieldName, ColumnDefinition> {
table
.columns
.iter()
.map(|column| {
(
column.column_name.to_string().into(),
column.data_type.to_owned(),
if column.column_comment.is_empty() {
ColumnDefinition::ShortHand(column.data_type.to_owned())
} else {
ColumnDefinition::LongForm {
data_type: column.data_type.to_owned(),
comment: Some(column.column_comment.to_owned()),
}
},
)
})
.collect()
Expand Down
15 changes: 10 additions & 5 deletions crates/ndc-clickhouse-core/src/sql/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1967,11 +1967,16 @@ impl<'r, 'c> QueryBuilder<'r, 'c> {
.get(return_type)
.ok_or_else(|| QueryBuilderError::UnknownTableType(return_type.to_owned()))?;

let column_type = table_type.columns.get(column_alias).ok_or_else(|| {
QueryBuilderError::UnknownColumn(column_alias.to_owned(), return_type.to_owned())
})?;

Ok(column_type.to_owned())
let column_type = table_type
.columns
.get(column_alias)
.ok_or_else(|| {
QueryBuilderError::UnknownColumn(column_alias.to_owned(), return_type.to_owned())
})?
.data_type
.to_owned();

Ok(column_type)
}
fn column_accessor(
&self,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use common::{
clickhouse_parser::datatype::{ClickHouseDataType, Identifier},
config::ServerConfig,
config::{ColumnType, ServerConfig},
schema::{
single_column_aggregate_function::ClickHouseSingleColumnAggregateFunction,
type_definition::ClickHouseTypeDefinition,
Expand Down Expand Up @@ -107,7 +107,7 @@ impl AggregatesTypeString {
field_path: _,
} => {
let return_type = get_return_type(table_alias, config)?;
let column_type = get_column(column_alias, return_type, config)?;
let column_type = &get_column(column_alias, return_type, config)?.data_type;
let type_definition = ClickHouseTypeDefinition::from_table_column(
column_type,
column_alias,
Expand Down Expand Up @@ -180,7 +180,8 @@ impl RowTypeString {
arguments: _,
} => {
let return_type = get_return_type(table_alias, config)?;
let column_type = get_column(column_alias, return_type, config)?;
let column_type =
&get_column(column_alias, return_type, config)?.data_type;
let type_definition = ClickHouseTypeDefinition::from_table_column(
column_type,
column_alias,
Expand Down Expand Up @@ -394,7 +395,7 @@ fn get_column<'a>(
column_alias: &FieldName,
return_type: &ObjectTypeName,
config: &'a ServerConfig,
) -> Result<&'a ClickHouseDataType, TypeStringError> {
) -> Result<&'a ColumnType, TypeStringError> {
let table_type =
config
.table_types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@
"return_type": {
"kind": "definition",
"columns": {
"Id": "UInt32",
"Name": "String"
"Id": {
"data_type": "UInt32",
"comment": null
},
"Name": {
"data_type": "String",
"comment": "This is a string comment. Comments can be null"
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
"columns": {
"type": "object",
"additionalProperties": {
"type": "string"
"$ref": "#/definitions/ColumnDefinition"
}
}
}
Expand Down Expand Up @@ -160,6 +160,31 @@
}
]
},
"ColumnDefinition": {
"description": "Either a data type, or an object with data_type and optional comment",
"anyOf": [
{
"type": "string"
},
{
"type": "object",
"required": [
"data_type"
],
"properties": {
"data_type": {
"type": "string"
},
"comment": {
"type": [
"string",
"null"
]
}
}
}
]
},
"ParameterizedQueryConfigFile": {
"type": "object",
"required": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: crates/ndc-clickhouse/tests/query_builder.rs
source: crates/ndc-clickhouse-core/tests/query_builder.rs
expression: schema
---
scalar_types:
Expand Down Expand Up @@ -464,6 +464,7 @@ object_types:
type: named
name: UInt32
Name:
description: This is a string comment. Comments can be null
type:
type: named
name: String
Expand Down