Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/data_generators/connections/spark_rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import os

CONNECTION_KEY = 'spark-rest'
SPARK_RUNTIME_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'iceberg-spark-runtime-3.5_2.12-1.9.0.jar')
SPARK_RUNTIME_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'iceberg-spark-runtime-4.0_2.13-1.10.0.jar')


@IcebergConnection.register(CONNECTION_KEY)
Expand All @@ -39,7 +39,7 @@ def __init__(self):

def get_connection(self):
os.environ["PYSPARK_SUBMIT_ARGS"] = (
"--packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0 pyspark-shell"
"--packages org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0,org.apache.iceberg:iceberg-aws-bundle:1.10.0 pyspark-shell"
)
os.environ["AWS_REGION"] = "us-east-1"
os.environ["AWS_ACCESS_KEY_ID"] = "admin"
Expand Down
Binary file not shown.
21 changes: 21 additions & 0 deletions scripts/data_generators/tests/variant_column/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from scripts.data_generators.tests.base import IcebergTest
import pathlib
import tempfile
import duckdb


@IcebergTest.register()
class Test(IcebergTest):
def __init__(self):
path = pathlib.PurePath(__file__)
super().__init__(path.parent.name)

# Create a temporary directory
self.tempdir = pathlib.Path(tempfile.mkdtemp())
self.parquet_file = self.tempdir / "tmp.parquet"

duckdb_con = duckdb.connect()
duckdb_con.execute(f"copy (select * from range(100) t(col)) to '{self.parquet_file}' (FORMAT PARQUET)")

def setup(self, con):
con.con.read.parquet(self.parquet_file.as_posix()).createOrReplaceTempView('parquet_file_view')
6 changes: 6 additions & 0 deletions scripts/data_generators/tests/variant_column/q00.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE default.variant_column
TBLPROPERTIES (
'format-version'='3',
'write.update.mode'='copy-on-write'
)
AS SELECT col::VARIANT FROM parquet_file_view t(col);
4 changes: 4 additions & 0 deletions scripts/data_generators/tests/variant_column/q01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
INSERT INTO default.variant_column VALUES
(-123::VARIANT),
(123::VARIANT),
(256::VARIANT);
2 changes: 1 addition & 1 deletion scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pyspark==3.5.0
pyspark==4.0.1
duckdb
pyiceberg
pyarrow
3 changes: 3 additions & 0 deletions src/metadata/iceberg_column_definition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ LogicalType IcebergColumnDefinition::ParsePrimitiveTypeString(const string &type
auto scale = std::stoi(digits[1]);
return LogicalType::DECIMAL(width, scale);
}
if (type_str == "variant") {
return LogicalType::JSON();
}
throw InvalidConfigurationException("Unrecognized primitive type: %s", type_str);
}

Expand Down
56 changes: 56 additions & 0 deletions test/sql/local/irc/test_basic_variant.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# name: test/sql/local/irc/test_basic_variant.test
# description: test variant reading capabilities
# group: [irc]

require-env ICEBERG_SERVER_AVAILABLE

require avro

require parquet

require iceberg

require httpfs

require aws

# Do not ignore 'HTTP' error messages!
set ignore_error_messages

statement ok
CREATE SECRET local_catalog_secret (
TYPE S3,
KEY_ID 'admin',
SECRET 'password',
ENDPOINT '127.0.0.1:9000',
URL_STYLE 'path',
USE_SSL 0
);

statement ok
ATTACH '' AS my_datalake (
TYPE ICEBERG,
CLIENT_ID 'admin',
CLIENT_SECRET 'password',
ENDPOINT 'http://127.0.0.1:8181'
);

statement ok
use my_datalake.default;

query I
select count(*) from my_datalake.default.variant_column ;
----
103

query I rowsort expected_res
select * from range(100) UNION ALL VALUES (-123), (123), (256)

query I rowsort expected_res
select * from my_datalake.default.variant_column;
----

query I
select count(*) from my_datalake.default.variant_column where col > 100
----
2
Loading