Skip to content

Commit 3765b43

Browse files
committed
improve supported dialects and typeclasses
1 parent 255708f commit 3765b43

File tree

1 file changed

+110
-132
lines changed

1 file changed

+110
-132
lines changed
Lines changed: 110 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,134 +1,112 @@
1-
# ============================================================================
2-
# TEMPORAL TYPE TRANSFORMATIONS
3-
# ============================================================================
4-
# This file defines all transformations for date, time, and timestamp types
5-
# across supported database dialects.
6-
# ============================================================================
7-
81
version: "1.0.0"
9-
category_name: "temporal"
10-
description: >
11-
Temporal types represent points in time or durations. This category
12-
covers DATE (calendar dates), TIME (time of day), TIMESTAMP (date+time),
13-
TIMESTAMPTZ (with timezone), and INTERVAL (duration) types.
14-
15-
type_classes:
16-
- "DATE"
17-
- "TIME"
18-
- "TIMESTAMP"
19-
- "TIMESTAMPTZ"
20-
- "TIMESTAMPNTZ"
21-
- "DATETIME"
22-
- "INTERVAL"
23-
- "YEAR"
24-
25-
supported_dialects:
26-
- "databricks"
27-
- "sqlserver"
28-
- "oracle"
29-
- "snowflake"
30-
31-
# ----------------------------------------------------------------------------
32-
# TRANSFORMATIONS
33-
# ----------------------------------------------------------------------------
34-
transformations:
35-
36-
date_to_iso8601:
37-
description: >
38-
Normalize DATE types to ISO 8601 string format (YYYY-MM-DD).
39-
This is the most portable and human-readable date format.
40-
41-
dialects:
42-
all:
43-
types: ["DATE"]
44-
sql: "TO_CHAR({}, 'YYYY-MM-DD')"
45-
46-
sqlserver:
47-
types: ["DATE"]
48-
sql: "CONVERT(VARCHAR(10), {}, 23)"
49-
notes: "Style 23 = ISO 8601 format (YYYY-MM-DD)"
50-
51-
test_cases:
52-
- name: "standard_date"
53-
input: "2025-10-24"
54-
expected_output: "2025-10-24"
55-
56-
- name: "leap_year_date"
57-
input: "2024-02-29"
58-
expected_output: "2024-02-29"
59-
60-
- name: "null_handling"
61-
input: null
62-
expected_output: null
63-
64-
timestamp_to_iso8601:
65-
description: >
66-
Normalize TIMESTAMP types (without timezone) to ISO 8601 string format
67-
with microsecond precision (YYYY-MM-DD HH:MI:SS.ffffff).
68-
69-
dialects:
70-
snowflake:
71-
types: ["TIMESTAMP_NTZ", "TIMESTAMP_TZ"]
72-
sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')"
73-
notes: "FF6 = fractional seconds with 6 digits"
74-
75-
oracle:
76-
types: ["TIMESTAMP"]
77-
sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')"
78-
79-
sqlserver:
80-
types: ["DATETIME2", "DATETIME", "SMALLDATETIME"]
81-
sql: "CONVERT(VARCHAR(27), {}, 121)"
82-
notes: "Style 121 = ODBC canonical format with milliseconds"
83-
84-
databricks:
85-
types: ["TIMESTAMP"]
86-
sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS')"
87-
notes: "Databricks uses Java SimpleDateFormat patterns"
88-
89-
test_cases:
90-
- name: "standard_timestamp"
91-
input: "2025-10-24 13:45:30.123456"
92-
expected_output: "2025-10-24 13:45:30.123456"
93-
94-
- name: "midnight_timestamp"
95-
input: "2025-01-01 00:00:00.000000"
96-
expected_output: "2025-01-01 00:00:00.000000"
97-
98-
- name: "end_of_day"
99-
input: "2025-12-31 23:59:59.999999"
100-
expected_output: "2025-12-31 23:59:59.999999"
101-
102-
limitations:
103-
- "SQL Server DATETIME limited to 3.33ms precision, use DATETIME2 for microseconds"
104-
- "Precision varies by dialect version"
105-
106-
timestamp_to_utc:
107-
description: >
108-
Normalize timezone-aware timestamps to UTC ISO 8601 format.
109-
Ensures all timestamps are comparable regardless of source timezone.
110-
111-
dialects:
112-
snowflake:
113-
types: ["TIMESTAMP_TZ"]
114-
sql: "TO_VARCHAR(CONVERT_TIMEZONE('UTC', {}), 'YYYY-MM-DD HH24:MI:SS.FF6')"
115-
116-
oracle:
117-
types: ["TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE"]
118-
sql: "TO_CHAR(SYS_EXTRACT_UTC({}), 'YYYY-MM-DD HH24:MI:SS.FF6')"
119-
120-
sqlserver:
121-
types: ["DATETIMEOFFSET"]
122-
sql: "CONVERT(VARCHAR(33), SWITCHOFFSET({}, '+00:00'), 127)"
123-
notes: "SWITCHOFFSET converts to UTC, style 127 = ISO 8601"
124-
125-
test_cases:
126-
- name: "utc_timestamp"
127-
dialect: "snowflake"
128-
input: "2025-10-24 13:45:30.123456+00:00"
129-
expected_output: "2025-10-24 13:45:30.123456"
1302

131-
- name: "est_to_utc"
132-
dialect: "snowflake"
133-
input: "2025-10-24 09:45:30.123456-04:00"
134-
expected_output: "2025-10-24 13:45:30.123456"
3+
lakebridge:
4+
description: >
5+
Unified configuration for type normalization rules.
6+
Each category defines transformations that standardize data types
7+
across dialects before comparison.
8+
9+
categories:
10+
# ------------------------------------------------------------------------
11+
# TEMPORAL CATEGORY
12+
# ------------------------------------------------------------------------
13+
temporal:
14+
description: >
15+
Temporal types across dialects.
16+
17+
# Per-category dialect type mapping -> canonical classes
18+
type_map:
19+
canonical_classes: ["DATE", "TIME", "TIMESTAMP", "TIMESTAMP_TZ", "INTERVAL"]
20+
tsql:
21+
DATE: ["DATE"]
22+
TIME: ["TIME"]
23+
TIMESTAMP: ["DATETIME2", "DATETIME", "SMALLDATETIME"]
24+
TIMESTAMP_TZ: ["DATETIMEOFFSET"]
25+
INTERVAL: []
26+
oracle:
27+
DATE: ["DATE"] # Oracle DATE includes time component. treat as DATE here for normalization.
28+
TIME: []
29+
TIMESTAMP: ["TIMESTAMP"]
30+
TIMESTAMP_TZ: ["TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE"]
31+
INTERVAL: ["INTERVAL YEAR TO MONTH", "INTERVAL DAY TO SECOND"]
32+
snowflake:
33+
DATE: ["DATE"]
34+
TIME: ["TIME"]
35+
TIMESTAMP: ["DATETIME", "TIMESTAMP_NTZ"]
36+
TIMESTAMP_TZ: ["TIMESTAMP_TZ", "TIMESTAMP_LTZ"]
37+
INTERVAL: []
38+
databricks:
39+
DATE: ["DATE"]
40+
TIME: []
41+
TIMESTAMP: ["TIMESTAMP_NTZ"]
42+
TIMESTAMP_TZ: ["TIMESTAMP"]
43+
INTERVAL: ["INTERVAL"]
44+
45+
transformations:
46+
47+
date_to_iso8601:
48+
description: "Normalize DATE to ISO 8601 (YYYY-MM-DD)."
49+
applies_to: ["DATE"]
50+
dialects:
51+
tsql:
52+
sql: "CONVERT(VARCHAR(10), {}, 23)"
53+
snowflake:
54+
sql: "TO_VARCHAR({}, 'YYYY-MM-DD')"
55+
databricks:
56+
sql: "DATE_FORMAT({}, 'yyyy-MM-dd')"
57+
oracle:
58+
sql: "TO_CHAR({}, 'YYYY-MM-DD')"
59+
test_cases:
60+
- name: "standard_date"
61+
input: "2025-10-24"
62+
expected_output: "2025-10-24"
63+
- name: "null_handling"
64+
input: null
65+
expected_output: null
66+
67+
timestamp_to_iso8601:
68+
description: "Normalize TIMESTAMP (no tz) to ISO 8601 with microseconds."
69+
applies_to: ["TIMESTAMP"]
70+
dialects:
71+
snowflake:
72+
sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')"
73+
oracle:
74+
sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')"
75+
tsql:
76+
sql: "CONVERT(VARCHAR(27), {}, 126)"
77+
databricks:
78+
sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS')"
79+
test_cases:
80+
- name: "standard_timestamp"
81+
input: "2025-10-24 13:45:30.123456"
82+
expected_output: "2025-10-24 13:45:30.123456"
83+
84+
timestamptz_to_iso8601:
85+
description: "Normalize TIMESTAMP with time zone to ISO 8601 with microseconds."
86+
applies_to: ["TIMESTAMP_TZ"]
87+
dialects:
88+
snowflake:
89+
sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM')"
90+
oracle:
91+
sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM')"
92+
tsql:
93+
sql: "REPLACE(CONVERT(VARCHAR(33), CAST({} AS DATETIME2(6)), 126), 'T', ' ')"
94+
databricks:
95+
sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS ZZZZZ')"
96+
test_cases:
97+
- name: "with_offset"
98+
input: "2025-10-24 13:45:30.123456 +02:00"
99+
expected_output: "2025-10-24 13:45:30.123456 +02:00"
100+
101+
time_to_hhmmss:
102+
description: "Normalize TIME to HH:MM:SS[.ffffff]."
103+
applies_to: ["TIME"]
104+
dialects:
105+
snowflake:
106+
sql: "TO_VARCHAR({}, 'HH24:MI:SS.FF6')"
107+
tsql:
108+
sql: "FORMAT({}, 'HH:mm:ss.ffffff')"
109+
test_cases:
110+
- name: "standard_time"
111+
input: "13:45:30"
112+
expected_output: "13:45:30"

0 commit comments

Comments
 (0)