-
Notifications
You must be signed in to change notification settings - Fork 103
add feature to create tables from pyarrow objects #597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
joe-clickhouse
merged 6 commits into
ClickHouse:main
from
akkik04:feature/arrow-schema-to-column-defs
Dec 3, 2025
Merged
Changes from 4 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
3a54ea9
working addition for creating tables from pyarrow objects
akkik04 4f0d9ce
update the changelog
akkik04 1c4cf8b
address PR review
akkik04 8f8da0b
more pr fixes
akkik04 7773276
ran linting + updated docstring in ddl function
akkik04 4da3ab0
linting again
akkik04 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
105 changes: 105 additions & 0 deletions
105
tests/integration_tests/test_pyarrow_ddl_integration.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| import pytest | ||
|
|
||
| from clickhouse_connect.driver import Client | ||
|
|
||
| pytest.importorskip("pyarrow") | ||
|
|
||
| import pyarrow as pa | ||
|
|
||
| from clickhouse_connect.driver.ddl import ( | ||
| arrow_schema_to_column_defs, | ||
| create_table, | ||
| create_table_from_arrow_schema, | ||
| ) | ||
|
|
||
|
|
||
| def test_arrow_create_table_and_insert(test_client: Client): | ||
| if not test_client.min_version("20"): | ||
| pytest.skip( | ||
| f"Not supported server version {test_client.server_version}" | ||
| ) | ||
|
|
||
| table_name = "test_arrow_basic_integration" | ||
|
|
||
| test_client.command(f"DROP TABLE IF EXISTS {table_name}") | ||
|
|
||
| schema = pa.schema( | ||
| [ | ||
| ("id", pa.int64()), | ||
| ("name", pa.string()), | ||
| ("score", pa.float32()), | ||
| ("flag", pa.bool_()), | ||
| ] | ||
| ) | ||
|
|
||
| ddl = create_table_from_arrow_schema( | ||
| table_name=table_name, | ||
| schema=schema, | ||
| engine="MergeTree", | ||
| engine_params={"ORDER BY": "id"}, | ||
| ) | ||
| test_client.command(ddl) | ||
|
|
||
| arrow_table = pa.table( | ||
| { | ||
| "id": [1, 2], | ||
| "name": ["a", "b"], | ||
| "score": [1.5, 2.5], | ||
| "flag": [True, False], | ||
| }, | ||
| schema=schema, | ||
| ) | ||
|
|
||
| test_client.insert_arrow(table=table_name, arrow_table=arrow_table) | ||
|
|
||
| result = test_client.query( | ||
| f"SELECT id, name, score, flag FROM {table_name} ORDER BY id" | ||
| ) | ||
| assert result.result_rows == [ | ||
| (1, "a", 1.5, True), | ||
| (2, "b", 2.5, False), | ||
| ] | ||
|
|
||
| test_client.command(f"DROP TABLE IF EXISTS {table_name}") | ||
|
|
||
|
|
||
| def test_arrow_schema_to_column_defs(test_client: Client): | ||
| table_name = "test_arrow_manual_integration" | ||
|
|
||
| test_client.command(f"DROP TABLE IF EXISTS {table_name}") | ||
|
|
||
| schema = pa.schema( | ||
| [ | ||
| ("id", pa.int64()), | ||
| ("name", pa.string()), | ||
| ] | ||
| ) | ||
|
|
||
| # check using the explicit helper path. | ||
| col_defs = arrow_schema_to_column_defs(schema) | ||
|
|
||
| ddl = create_table( | ||
| table_name=table_name, | ||
| columns=col_defs, | ||
| engine="MergeTree", | ||
| engine_params={"ORDER BY": "id"}, | ||
| ) | ||
| test_client.command(ddl) | ||
|
|
||
| arrow_table = pa.table( | ||
| { | ||
| "id": [10, 20], | ||
| "name": ["x", "y"], | ||
| }, | ||
| schema=schema, | ||
| ) | ||
|
|
||
| test_client.insert_arrow(table=table_name, arrow_table=arrow_table) | ||
|
|
||
| result = test_client.query(f"SELECT id, name FROM {table_name} ORDER BY id") | ||
| assert result.result_rows == [ | ||
| (10, "x"), | ||
| (20, "y"), | ||
| ] | ||
|
|
||
| test_client.command(f"DROP TABLE IF EXISTS {table_name}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| import pytest | ||
|
|
||
| pytest.importorskip("pyarrow") | ||
|
|
||
| import pyarrow as pa | ||
|
|
||
| from clickhouse_connect.driver.ddl import ( | ||
| arrow_schema_to_column_defs, | ||
| create_table, | ||
| create_table_from_arrow_schema, | ||
| ) | ||
|
|
||
|
|
||
| def test_arrow_schema_to_column_defs_basic_mappings(): | ||
| schema = pa.schema( | ||
| [ | ||
| ("i8", pa.int8()), | ||
| ("i16", pa.int16()), | ||
| ("i32", pa.int32()), | ||
| ("i64", pa.int64()), | ||
| ("u8", pa.uint8()), | ||
| ("u16", pa.uint16()), | ||
| ("u32", pa.uint32()), | ||
| ("u64", pa.uint64()), | ||
| ("f16", pa.float16()), | ||
| ("f32", pa.float32()), | ||
| ("f64", pa.float64()), | ||
| ("s", pa.string()), | ||
| ("ls", pa.large_string()), | ||
| ("b", pa.bool_()), | ||
| ] | ||
| ) | ||
|
|
||
| col_defs = arrow_schema_to_column_defs(schema) | ||
|
|
||
| assert [c.name for c in col_defs] == [ | ||
| "i8", | ||
| "i16", | ||
| "i32", | ||
| "i64", | ||
| "u8", | ||
| "u16", | ||
| "u32", | ||
| "u64", | ||
| "f16", | ||
| "f32", | ||
| "f64", | ||
| "s", | ||
| "ls", | ||
| "b", | ||
| ] | ||
|
|
||
| type_names = [c.ch_type.name for c in col_defs] | ||
|
|
||
| assert type_names == [ | ||
| "Int8", | ||
| "Int16", | ||
| "Int32", | ||
| "Int64", | ||
| "UInt8", | ||
| "UInt16", | ||
| "UInt32", | ||
| "UInt64", | ||
| "Float32", | ||
| "Float32", | ||
| "Float64", | ||
| "String", | ||
| "String", | ||
| "Bool", | ||
| ] | ||
|
|
||
|
|
||
| def test_arrow_schema_to_column_defs_unsupported_type_raises(): | ||
| schema = pa.schema( | ||
| [ | ||
| ("ts", pa.timestamp("ms")), | ||
| ] | ||
| ) | ||
|
|
||
| with pytest.raises(TypeError, match="Unsupported Arrow type"): | ||
| arrow_schema_to_column_defs(schema) | ||
|
|
||
|
|
||
| def test_arrow_schema_to_column_defs_invalid_input_type(): | ||
| with pytest.raises(TypeError, match="Expected pyarrow.Schema"): | ||
| arrow_schema_to_column_defs("not a schema") | ||
|
|
||
|
|
||
| def test_create_table_from_arrow_schema_builds_expected_ddl(): | ||
| schema = pa.schema( | ||
| [ | ||
| ("id", pa.int64()), | ||
| ("name", pa.string()), | ||
| ("score", pa.float32()), | ||
| ("flag", pa.bool_()), | ||
| ] | ||
| ) | ||
|
|
||
| ddl = create_table_from_arrow_schema( | ||
| table_name="arrow_basic_test", | ||
| schema=schema, | ||
| engine="MergeTree", | ||
| engine_params={"ORDER BY": "id"}, | ||
| ) | ||
|
|
||
| assert ( | ||
| ddl | ||
| == "CREATE TABLE arrow_basic_test " | ||
| "(id Int64, name String, score Float32, flag Bool) " | ||
| "ENGINE MergeTree ORDER BY id" | ||
| ) | ||
|
|
||
|
|
||
| def test_create_table_from_arrow_schema_matches_manual_create_table(): | ||
| schema = pa.schema( | ||
| [ | ||
| ("id", pa.int64()), | ||
| ("name", pa.string()), | ||
| ] | ||
| ) | ||
|
|
||
| col_defs = arrow_schema_to_column_defs(schema) | ||
|
|
||
| ddl_manual = create_table( | ||
| table_name="arrow_compare_test", | ||
| columns=col_defs, | ||
| engine="MergeTree", | ||
| engine_params={"ORDER BY": "id"}, | ||
| ) | ||
|
|
||
| ddl_wrapper = create_table_from_arrow_schema( | ||
| table_name="arrow_compare_test", | ||
| schema=schema, | ||
| engine="MergeTree", | ||
| engine_params={"ORDER BY": "id"}, | ||
| ) | ||
|
|
||
| assert ddl_manual == ddl_wrapper | ||
joe-clickhouse marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.