diff --git a/scripts/data_generators/connections/spark_rest/__init__.py b/scripts/data_generators/connections/spark_rest/__init__.py index d36dc6991..4fc219f3e 100644 --- a/scripts/data_generators/connections/spark_rest/__init__.py +++ b/scripts/data_generators/connections/spark_rest/__init__.py @@ -28,7 +28,7 @@ import os CONNECTION_KEY = 'spark-rest' -SPARK_RUNTIME_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'iceberg-spark-runtime-3.5_2.12-1.9.0.jar') +SPARK_RUNTIME_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'iceberg-spark-runtime-4.0_2.13-1.10.0.jar') @IcebergConnection.register(CONNECTION_KEY) @@ -39,7 +39,7 @@ def __init__(self): def get_connection(self): os.environ["PYSPARK_SUBMIT_ARGS"] = ( - "--packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0 pyspark-shell" + "--packages org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0,org.apache.iceberg:iceberg-aws-bundle:1.10.0 pyspark-shell" ) os.environ["AWS_REGION"] = "us-east-1" os.environ["AWS_ACCESS_KEY_ID"] = "admin" diff --git a/scripts/data_generators/iceberg-spark-runtime-4.0_2.13-1.10.0.jar b/scripts/data_generators/iceberg-spark-runtime-4.0_2.13-1.10.0.jar new file mode 100644 index 000000000..3b495f028 Binary files /dev/null and b/scripts/data_generators/iceberg-spark-runtime-4.0_2.13-1.10.0.jar differ diff --git a/scripts/data_generators/tests/variant_column/__init__.py b/scripts/data_generators/tests/variant_column/__init__.py new file mode 100644 index 000000000..136535f62 --- /dev/null +++ b/scripts/data_generators/tests/variant_column/__init__.py @@ -0,0 +1,21 @@ +from scripts.data_generators.tests.base import IcebergTest +import pathlib +import tempfile +import duckdb + + +@IcebergTest.register() +class Test(IcebergTest): + def __init__(self): + path = pathlib.PurePath(__file__) + super().__init__(path.parent.name) + + # Create a temporary directory + self.tempdir = pathlib.Path(tempfile.mkdtemp()) + self.parquet_file = self.tempdir / "tmp.parquet" + + duckdb_con = duckdb.connect() + duckdb_con.execute(f"copy (select * from range(100) t(col)) to '{self.parquet_file}' (FORMAT PARQUET)") + + def setup(self, con): + con.con.read.parquet(self.parquet_file.as_posix()).createOrReplaceTempView('parquet_file_view') diff --git a/scripts/data_generators/tests/variant_column/q00.sql b/scripts/data_generators/tests/variant_column/q00.sql new file mode 100644 index 000000000..7e5555ffc --- /dev/null +++ b/scripts/data_generators/tests/variant_column/q00.sql @@ -0,0 +1,6 @@ +CREATE or REPLACE TABLE default.variant_column +TBLPROPERTIES ( + 'format-version'='3', + 'write.update.mode'='copy-on-write' +) +AS SELECT col::VARIANT FROM parquet_file_view t(col); \ No newline at end of file diff --git a/scripts/data_generators/tests/variant_column/q01.sql b/scripts/data_generators/tests/variant_column/q01.sql new file mode 100644 index 000000000..d68952d5b --- /dev/null +++ b/scripts/data_generators/tests/variant_column/q01.sql @@ -0,0 +1,4 @@ +INSERT INTO default.variant_column VALUES + (-123::VARIANT), + (123::VARIANT), + (256::VARIANT); diff --git a/scripts/requirements.txt b/scripts/requirements.txt index de8806b13..4343bb7be 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,4 +1,4 @@ -pyspark==3.5.0 +pyspark==4.0.1 duckdb pyiceberg pyarrow \ No newline at end of file diff --git a/src/metadata/iceberg_column_definition.cpp b/src/metadata/iceberg_column_definition.cpp index 5dcf898af..e7e042e24 100644 --- a/src/metadata/iceberg_column_definition.cpp +++ b/src/metadata/iceberg_column_definition.cpp @@ -164,6 +164,9 @@ LogicalType IcebergColumnDefinition::ParsePrimitiveTypeString(const string &type auto scale = std::stoi(digits[1]); return LogicalType::DECIMAL(width, scale); } + if (type_str == "variant") { + return LogicalType::JSON(); + } throw InvalidConfigurationException("Unrecognized primitive type: %s", type_str); } diff --git a/test/sql/local/irc/test_basic_variant.test b/test/sql/local/irc/test_basic_variant.test new file mode 100644 index 000000000..03d1e0cee --- /dev/null +++ b/test/sql/local/irc/test_basic_variant.test @@ -0,0 +1,56 @@ +# name: test/sql/local/irc/test_basic_variant.test +# description: test variant reading capabilities +# group: [irc] + +require-env ICEBERG_SERVER_AVAILABLE + +require avro + +require parquet + +require iceberg + +require httpfs + +require aws + +# Do not ignore 'HTTP' error messages! +set ignore_error_messages + +statement ok +CREATE SECRET local_catalog_secret ( + TYPE S3, + KEY_ID 'admin', + SECRET 'password', + ENDPOINT '127.0.0.1:9000', + URL_STYLE 'path', + USE_SSL 0 +); + +statement ok +ATTACH '' AS my_datalake ( + TYPE ICEBERG, + CLIENT_ID 'admin', + CLIENT_SECRET 'password', + ENDPOINT 'http://127.0.0.1:8181' +); + +statement ok +use my_datalake.default; + +query I +select count(*) from my_datalake.default.variant_column ; +---- +103 + +query I rowsort expected_res +select * from range(100) UNION ALL VALUES (-123), (123), (256) + +query I rowsort expected_res +select * from my_datalake.default.variant_column; +---- + +query I +select count(*) from my_datalake.default.variant_column where col > 100 +---- +2