spiceai
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 16 additions & 0 deletions b/‎README.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎spice_jdbc/dialect.tdd‎
Lines changed: 28 additions & 2 deletions b/‎spice_jdbc/dialect.tdd‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎tdvt/.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎tdvt/.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tdvt/TestV1/Staples_utf8_headers.csv‎
Lines changed: 2 additions & 2 deletions b/‎tdvt/TestV1/Staples_utf8_headers.csv‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tdvt/TestV1/arrow_utils.py‎
Lines changed: 69 additions & 0 deletions b/‎tdvt/TestV1/arrow_utils.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎tdvt/TestV1/calcs.parquet‎
8.11 KB b/‎tdvt/TestV1/calcs.parquet‎
8.11 KB
diff --git a/‎tdvt/TestV1/calcs_csv_to_parquet.py‎
Lines changed: 79 additions & 0 deletions b/‎tdvt/TestV1/calcs_csv_to_parquet.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎tdvt/TestV1/staples.parquet‎
2.66 MB b/‎tdvt/TestV1/staples.parquet‎
2.66 MB
diff --git a/‎tdvt/TestV1/staples_csv_to_parquet.py‎
Lines changed: 126 additions & 0 deletions b/‎tdvt/TestV1/staples_csv_to_parquet.py‎
Lines changed: 126 additions & 0 deletions
@@ -9,6 +9,7 @@ logs
 
 # Python virtual environment
 .venv
+__pycache__
 
 # Build artifacts
 *.taco
 
@@ -44,6 +44,22 @@ make install
 make run-tableau-allow-unsigned
 ```
 
+## Limitations
+
+### Multiplication and Division on Intervals Are Not Supported
+
+Multiplying or dividing intervals is not supported and will result in a `Cannot coerce arithmetic expression` error. For example:
+
+```sql
+SELECT "orders"."order_date" + "orders"."delivery_days" * INTERVAL '1 DAY'
+```
+
+```text
+Error during planning: Cannot coerce arithmetic expression Int64 * Interval(MonthDayNano) to valid types
+```
+
+The limitation is due to limited arithmetic operations support for Interval by DataFusion, tracked as [apache/datafusion#13850](https://github.com/apache/arrow-datafusion/issues/13850).
+
 ## Development
 
 ### Prerequisites
 
@@ -1,4 +1,30 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <dialect name="SpiceDialect" base="PostgreSQL90Dialect" class="spice_jdbc" version="18.1">
-  
-</dialect>
+    <function-map>
+        <!-- Override default mapping to`VARIANCE` that is not available in DF -->
+        <function group="aggregate" name="VAR" return-type="real">
+            <!-- https://datafusion.apache.org/user-guide/sql/aggregate_functions.html#var -->
+            <formula>VAR(%1)</formula>
+            <unagg-formula>NULL</unagg-formula>
+            <argument type="real" />
+        </function>
+        <function group='numeric' name='SIGN' return-type='int'>
+            <!-- https://datafusion.apache.org/user-guide/sql/scalar_functions.html#signum -->
+            <formula>CAST(SIGNUM(%1) AS SMALLINT)</formula>
+            <argument type='real' />
+        </function>
+        <!-- Override %1^2 which is not supported -->
+        <function group='numeric' name='SQUARE' return-type='real'>
+            <formula>((%1)*(%1))</formula>
+            <argument type='real' />
+        </function>
+         <function group='numeric' name='SQUARE' return-type='int'>
+            <formula>((%1)*(%1))</formula>
+            <argument type='int' />
+        </function>
+        <function group='date' name='ISDATE' return-type='bool'>
+            <formula>(TRY_CAST(%1 AS DATE) IS NOT NULL)</formula>
+            <argument type='str' />
+        </function>
+    </function-map>
+</dialect>
@@ -1,5 +1,8 @@
 /test_results_combined.csv
+/test_metadata.csv
 /tdvt_output_combined.json
 /tdvt_log_combined.txt
 /tdvt_actuals_combined.zip
 /tabquery_logs.zip
+/*.twb
+/*.twbr
@@ -1,5 +1,5 @@
-"Item Count","Ship Priority","Order Priority","Order Status","Order Quantity","Sales Total","Discount","Tax Rate","Ship Mode","Fill Time","Gross Profit","Price","Ship Handle Cost","Employee Name","Employee Dept","Manager Name","Employee Yrs Exp","Employee Salary","Customer Name","Customer State","Call Center Region","Customer Balance","Customer Segment","Prod Type1","Prod Type2","Prod Type3","Prod Type4","Product Name","Product Container","Ship Promo","Supplier Name","Supplier Balance","Supplier Region","Supplier State","Order ID","Order Year","Order Month","Order Day","Order Date","Order Quarter","Product Base Margin","Product ID","Receive Time","Received Date","Ship Date","Ship Charge","Total Cycle Time","Product In Stock","PID","Market Segment" 
-1,0,1-URGENT,O,11,766.85,0.07,0.02,DELIVERY TRUCK,1,33.61,70.9800,26.2000,Harold Pretty,1004,"Carpenter, Jan",4,56950.0000,Shui Tom,WASHINGTON,WEST,3652,HOME OFFICE,FURNITURE,BOOKCASES,METAL BOOKCASES,METAL BOOKCASES,"Safco Value Mate Series Steel Bookcases, Baked Enamel Finish on Steel, Gray",JUMBO BOX,REGULAR SHIPPING,Supplier_042,6565,EAST,DELAWARE,4097,2002,5,24,2002-05-24 00:00:00,Q2,0.57,1006,7,2002-06-01 00:00:00,2002-05-25 00:00:00,26.2000,8,YES,49239,HOME OFFICE
+"Item Count","Ship Priority","Order Priority","Order Status","Order Quantity","Sales Total","Discount","Tax Rate","Ship Mode","Fill Time","Gross Profit","Price","Ship Handle Cost","Employee Name","Employee Dept","Manager Name","Employee Yrs Exp","Employee Salary","Customer Name","Customer State","Call Center Region","Customer Balance","Customer Segment","Prod Type1","Prod Type2","Prod Type3","Prod Type4","Product Name","Product Container","Ship Promo","Supplier Name","Supplier Balance","Supplier Region","Supplier State","Order ID","Order Year","Order Month","Order Day","Order Date","Order Quarter","Product Base Margin","Product ID","Receive Time","Received Date","Ship Date","Ship Charge","Total Cycle Time","Product In Stock","PID","Market Segment"
+1,0,1-URGENT,O,11,766.85,0.07,0.02,DELIVERY TRUCK,1,33.61,70.9800,26.2000,Harold Pretty,1004,"Carpenter, Jan",4,56950.0000,Shui Tom,WASHINGTON,WEST,3652.00,HOME OFFICE,FURNITURE,BOOKCASES,METAL BOOKCASES,METAL BOOKCASES,"Safco Value Mate Series Steel Bookcases, Baked Enamel Finish on Steel, Gray",JUMBO BOX,REGULAR SHIPPING,Supplier_042,6565,EAST,DELAWARE,4097,2002,5,24,2002-05-24 00:00:00,Q2,0.57,1006,7,2002-06-01 00:00:00,2002-05-25 00:00:00,26.2000,8,YES,49239,HOME OFFICE
 1,0,1-URGENT,O,21,76.29,0,0.05,REGULAR AIR,2,-45.65,3.2800,3.9700,Harold Pretty,1004,"Carpenter, Jan",4,56950.0000,Shui Tom,WASHINGTON,WEST,3652,HOME OFFICE,OFFICE SUPPLIES,PENS & ART SUPPLIES,ART SUPPLIES,COLORED PENS,Newell 342,WRAP BAG,REGULAR SHIPPING,Supplier_071,8180,WEST,WASHINGTON,4097,2002,5,24,2002-05-24 00:00:00,Q2,0.56,342,4,2002-05-30 00:00:00,2002-05-26 00:00:00,3.9700,6,YES,49240,HOME OFFICE
 1,0,1-URGENT,O,37,758.02,0.07,0.05,REGULAR AIR,1,431.2,20.9800,1.4900,Harold Pretty,1004,"Carpenter, Jan",4,56950.0000,Shui Tom,WASHINGTON,WEST,3652,HOME OFFICE,OFFICE SUPPLIES,BINDERS AND BINDER ACCESSORIES,ROUND RING BINDERS,ROUND RING BINDERS,Avery Legal 4-Ring Binder,SMALL BOX,FREE SHIPPING,Supplier_068,5119,WEST,CALIFORNIA,4097,2002,5,24,2002-05-24 00:00:00,Q2,0.35,1587,2,2002-05-27 00:00:00,2002-05-25 00:00:00,0.0000,3,YES,49241,HOME OFFICE
 1,0,1-URGENT,O,25,407.75,0,0.03,REGULAR AIR,1,-82.84,15.4200,10.6800,Leslie Monsalve-Jones,1007,"Zingarella, Rosie",11,59850.0000,David Wiener,COLORADO,WEST,4606,HOME OFFICE,OFFICE SUPPLIES,STORAGE & ORGANIZATION,PORTABLE STORAGE,PORTABLE STORAGE,"Decoflex Hanging Personal Folder File, Blue",SMALL BOX,REGULAR SHIPPING,Supplier_080,-40,WEST,CALIFORNIA,33856,2002,5,25,2002-05-25 00:00:00,Q2,0.58,395,3,2002-05-29 00:00:00,2002-05-26 00:00:00,10.6800,4,YES,49242,HOME OFFICE
 
@@ -0,0 +1,69 @@
+import pyarrow as pa
+import pyarrow.csv as csv
+import pyarrow.parquet as pq
+
+def read_csv_with_schema(csv_path, schema, skip_header=True, delimiter=",", quote_char='"'):
+    """
+    Read a CSV file using a specified PyArrow schema.
+    
+    Args:
+        csv_path (str): Path to the CSV file
+        schema (pa.Schema): PyArrow schema to apply
+        skip_header (bool): Whether to skip the header row
+        delimiter (str): CSV delimiter character
+        quote_char (str): CSV quote character
+        
+    Returns:
+        pa.Table: PyArrow table with the specified schema
+    """
+    read_options = csv.ReadOptions(
+        skip_rows=1 if skip_header else 0,
+        column_names=schema.names
+    )
+
+    parse_options = csv.ParseOptions(
+        delimiter=delimiter,
+        quote_char=quote_char
+    )
+
+    convert_options = csv.ConvertOptions(
+        column_types={field.name: field.type for field in schema},
+        strings_can_be_null=True,
+        auto_dict_encode=True,
+        timestamp_parsers=["%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%H:%M:%S"]
+    )
+
+    return csv.read_csv(
+        csv_path,
+        read_options=read_options,
+        parse_options=parse_options,
+        convert_options=convert_options
+    )
+
+def write_table_to_parquet(table, parquet_path):
+    """
+    Write a PyArrow table to a Parquet file.
+    
+    Args:
+        table (pa.Table): PyArrow table to write
+        parquet_path (str): Output Parquet file path
+        
+    Returns:
+        str: Path to the created Parquet file
+    """
+    pq.write_table(table, parquet_path)
+    return parquet_path
+
+def print_parquet_schema(parquet_path):
+    """
+    Print the schema of a Parquet file.
+    
+    Args:
+        parquet_path (str): Path to the Parquet file
+    """
+    schema = pq.read_schema(parquet_path)
+    print(f"Schema in Parquet file: {parquet_path}")
+    for field in schema:
+        print(f"  {field.name}: {field.type}")
+    
+    return schema
@@ -0,0 +1,79 @@
+import pyarrow.csv as csv
+import pyarrow as pa
+import pyarrow.parquet as pq
+from datetime import datetime
+
+from arrow_utils import print_parquet_schema, read_csv_with_schema, write_table_to_parquet
+
+# CSV input path and Parquet output path
+csv_path = "./Calcs_headers.csv"
+parquet_path = "calcs.parquet"
+
+# Target Calcs schema: https://github.com/tableau/connector-plugin-sdk/blob/master/tests/datasets/TestV1/DDL/Calcs.sql
+arrow_schema = pa.schema([
+    ("key", pa.string()),
+    ("num0", pa.float64()),
+    ("num1", pa.float64()),
+    ("num2", pa.float64()),
+    ("num3", pa.float64()),
+    ("num4", pa.float64()),
+    ("str0", pa.string()),
+    ("str1", pa.string()),
+    ("str2", pa.string()),
+    ("str3", pa.string()),
+    ("int0", pa.int32()),
+    ("int1", pa.int32()),
+    ("int2", pa.int32()),
+    ("int3", pa.int32()),
+    ("bool0", pa.bool_()),
+    ("bool1", pa.bool_()),
+    ("bool2", pa.bool_()),
+    ("bool3", pa.bool_()),
+    ("date0", pa.date32()),
+    ("date1", pa.date32()),
+    ("date2", pa.date32()),
+    ("date3", pa.date32()),
+    ("time0", pa.timestamp("s")),
+    ("time1", pa.time64("us")),
+    ("datetime0", pa.timestamp("s")),
+    ("datetime1", pa.string()),
+    ("zzz", pa.string())
+])
+
+try:
+    table = read_csv_with_schema(csv_path, arrow_schema)
+    write_table_to_parquet(table, parquet_path)
+    print_parquet_schema(parquet_path)
+    
+except Exception as e:
+    print(f"Error during conversion: {e}")
+
+# Keeping Table Arrow schema for future reference / troubleshooting
+# Schema in Parquet file:
+#   key: string
+#   num0: double
+#   num1: double
+#   num2: double
+#   num3: double
+#   num4: double
+#   str0: string
+#   str1: string
+#   str2: string
+#   str3: string
+#   int0: int32
+#   int1: int32
+#   int2: int32
+#   int3: int32
+#   bool0: bool
+#   bool1: bool
+#   bool2: bool
+#   bool3: bool
+#   date0: date32[day]
+#   date1: date32[day]
+#   date2: date32[day]
+#   date3: date32[day]
+#   time0: timestamp[ms]
+#   time1: time64[us]
+#   datetime0: timestamp[ms]
+#   datetime1: string
+#   zzz: string
@@ -0,0 +1,126 @@
+import pyarrow.csv as csv
+import pyarrow as pa
+import pyarrow.parquet as pq
+from datetime import datetime
+
+from arrow_utils import print_parquet_schema, read_csv_with_schema, write_table_to_parquet
+
+# CSV input path and Parquet output path
+csv_path = "./Staples_utf8_headers.csv"
+parquet_path = "staples.parquet"
+
+# Target Staples schema: https://github.com/tableau/connector-plugin-sdk/blob/master/tests/datasets/TestV1/DDL/Staples.sql
+arrow_schema = pa.schema([
+    ("Item Count", pa.int32()),
+    ("Ship Priority", pa.string()),
+    ("Order Priority", pa.string()),
+    ("Order Status", pa.string()),
+    ("Order Quantity", pa.float64()),
+    ("Sales Total", pa.float64()),
+    ("Discount", pa.float64()),
+    ("Tax Rate", pa.float64()),
+    ("Ship Mode", pa.string()),
+    ("Fill Time", pa.float64()),
+    ("Gross Profit", pa.float64()),
+    ("Price", pa.decimal128(18, 4)),
+    ("Ship Handle Cost", pa.decimal128(18, 4)),
+    ("Employee Name", pa.string()),
+    ("Employee Dept", pa.string()),
+    ("Manager Name", pa.string()),
+    ("Employee Yrs Exp", pa.float64()),
+    ("Employee Salary", pa.decimal128(18, 4)),
+    ("Customer Name", pa.string()),
+    ("Customer State", pa.string()),
+    ("Call Center Region", pa.string()),
+    ("Customer Balance", pa.float64()),
+    ("Customer Segment", pa.string()),
+    ("Prod Type1", pa.string()),
+    ("Prod Type2", pa.string()),
+    ("Prod Type3", pa.string()),
+    ("Prod Type4", pa.string()),
+    ("Product Name", pa.string()),
+    ("Product Container", pa.string()),
+    ("Ship Promo", pa.string()),
+    ("Supplier Name", pa.string()),
+    ("Supplier Balance", pa.float64()),
+    ("Supplier Region", pa.string()),
+    ("Supplier State", pa.string()),
+    ("Order ID", pa.string()),
+    ("Order Year", pa.int32()),
+    ("Order Month", pa.int32()),
+    ("Order Day", pa.int32()),
+    ("Order Date", pa.timestamp("s")),
+    ("Order Quarter", pa.string()),
+    ("Product Base Margin", pa.float64()),
+    ("Product ID", pa.string()),
+    ("Receive Time", pa.float64()),
+    ("Received Date", pa.timestamp("s")),
+    ("Ship Date", pa.timestamp("s")),
+    ("Ship Charge", pa.decimal128(18, 4)),
+    ("Total Cycle Time", pa.float64()),
+    ("Product In Stock", pa.string()),
+    ("PID", pa.int32()),
+    ("Market Segment", pa.string())
+])
+
+try:
+    table = read_csv_with_schema(csv_path, arrow_schema)
+    write_table_to_parquet(table, parquet_path)
+    print_parquet_schema(parquet_path)
+    
+except Exception as e:
+    print(f"Error during conversion: {e}")
+
+# Keeping Table Arrow schema for future reference / troubleshooting
+# Schema in Parquet file:
+#   Item Count: int32
+#   Ship Priority: string
+#   Order Priority: string
+#   Order Status: string
+#   Order Quantity: double
+#   Sales Total: double
+#   Discount: double
+#   Tax Rate: double
+#   Ship Mode: string
+#   Fill Time: double
+#   Gross Profit: double
+#   Price: decimal128(18, 4)
+#   Ship Handle Cost: decimal128(18, 4)
+#   Employee Name: string
+#   Employee Dept: string
+#   Manager Name: string
+#   Employee Yrs Exp: double
+#   Employee Salary: decimal128(18, 4)
+#   Customer Name: string
+#   Customer State: string
+#   Call Center Region: string
+#   Customer Balance: double
+#   Customer Segment: string
+#   Prod Type1: string
+#   Prod Type2: string
+#   Prod Type3: string
+#   Prod Type4: string
+#   Product Name: string
+#   Product Container: string
+#   Ship Promo: string
+#   Supplier Name: string
+#   Supplier Balance: double
+#   Supplier Region: string
+#   Supplier State: string
+#   Order ID: string
+#   Order Year: int32
+#   Order Month: int32
+#   Order Day: int32
+#   Order Date: timestamp[ms]
+#   Order Quarter: string
+#   Product Base Margin: double
+#   Product ID: string
+#   Receive Time: double
+#   Received Date: timestamp[ms]
+#   Ship Date: timestamp[ms]
+#   Ship Charge: decimal128(18, 4)
+#   Total Cycle Time: double
+#   Product In Stock: string
+#   PID: int32
+#   Market Segment: string
+