|
1 | | -# ============================================================================ |
2 | | -# TEMPORAL TYPE TRANSFORMATIONS |
3 | | -# ============================================================================ |
4 | | -# This file defines all transformations for date, time, and timestamp types |
5 | | -# across supported database dialects. |
6 | | -# ============================================================================ |
7 | | - |
8 | 1 | version: "1.0.0" |
9 | | -category_name: "temporal" |
10 | | -description: > |
11 | | - Temporal types represent points in time or durations. This category |
12 | | - covers DATE (calendar dates), TIME (time of day), TIMESTAMP (date+time), |
13 | | - TIMESTAMPTZ (with timezone), and INTERVAL (duration) types. |
14 | | -
|
15 | | -type_classes: |
16 | | - - "DATE" |
17 | | - - "TIME" |
18 | | - - "TIMESTAMP" |
19 | | - - "TIMESTAMPTZ" |
20 | | - - "TIMESTAMPNTZ" |
21 | | - - "DATETIME" |
22 | | - - "INTERVAL" |
23 | | - - "YEAR" |
24 | | - |
25 | | -supported_dialects: |
26 | | - - "databricks" |
27 | | - - "sqlserver" |
28 | | - - "oracle" |
29 | | - - "snowflake" |
30 | | - |
31 | | -# ---------------------------------------------------------------------------- |
32 | | -# TRANSFORMATIONS |
33 | | -# ---------------------------------------------------------------------------- |
34 | | -transformations: |
35 | | - |
36 | | - date_to_iso8601: |
37 | | - description: > |
38 | | - Normalize DATE types to ISO 8601 string format (YYYY-MM-DD). |
39 | | - This is the most portable and human-readable date format. |
40 | | -
|
41 | | - dialects: |
42 | | - all: |
43 | | - types: ["DATE"] |
44 | | - sql: "TO_CHAR({}, 'YYYY-MM-DD')" |
45 | | - |
46 | | - sqlserver: |
47 | | - types: ["DATE"] |
48 | | - sql: "CONVERT(VARCHAR(10), {}, 23)" |
49 | | - notes: "Style 23 = ISO 8601 format (YYYY-MM-DD)" |
50 | | - |
51 | | - test_cases: |
52 | | - - name: "standard_date" |
53 | | - input: "2025-10-24" |
54 | | - expected_output: "2025-10-24" |
55 | | - |
56 | | - - name: "leap_year_date" |
57 | | - input: "2024-02-29" |
58 | | - expected_output: "2024-02-29" |
59 | | - |
60 | | - - name: "null_handling" |
61 | | - input: null |
62 | | - expected_output: null |
63 | | - |
64 | | - timestamp_to_iso8601: |
65 | | - description: > |
66 | | - Normalize TIMESTAMP types (without timezone) to ISO 8601 string format |
67 | | - with microsecond precision (YYYY-MM-DD HH:MI:SS.ffffff). |
68 | | -
|
69 | | - dialects: |
70 | | - snowflake: |
71 | | - types: ["TIMESTAMP_NTZ", "TIMESTAMP_TZ"] |
72 | | - sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')" |
73 | | - notes: "FF6 = fractional seconds with 6 digits" |
74 | | - |
75 | | - oracle: |
76 | | - types: ["TIMESTAMP"] |
77 | | - sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')" |
78 | | - |
79 | | - sqlserver: |
80 | | - types: ["DATETIME2", "DATETIME", "SMALLDATETIME"] |
81 | | - sql: "CONVERT(VARCHAR(27), {}, 121)" |
82 | | - notes: "Style 121 = ODBC canonical format with milliseconds" |
83 | | - |
84 | | - databricks: |
85 | | - types: ["TIMESTAMP"] |
86 | | - sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS')" |
87 | | - notes: "Databricks uses Java SimpleDateFormat patterns" |
88 | | - |
89 | | - test_cases: |
90 | | - - name: "standard_timestamp" |
91 | | - input: "2025-10-24 13:45:30.123456" |
92 | | - expected_output: "2025-10-24 13:45:30.123456" |
93 | | - |
94 | | - - name: "midnight_timestamp" |
95 | | - input: "2025-01-01 00:00:00.000000" |
96 | | - expected_output: "2025-01-01 00:00:00.000000" |
97 | | - |
98 | | - - name: "end_of_day" |
99 | | - input: "2025-12-31 23:59:59.999999" |
100 | | - expected_output: "2025-12-31 23:59:59.999999" |
101 | | - |
102 | | - limitations: |
103 | | - - "SQL Server DATETIME limited to 3.33ms precision, use DATETIME2 for microseconds" |
104 | | - - "Precision varies by dialect version" |
105 | | - |
106 | | - timestamp_to_utc: |
107 | | - description: > |
108 | | - Normalize timezone-aware timestamps to UTC ISO 8601 format. |
109 | | - Ensures all timestamps are comparable regardless of source timezone. |
110 | | -
|
111 | | - dialects: |
112 | | - snowflake: |
113 | | - types: ["TIMESTAMP_TZ"] |
114 | | - sql: "TO_VARCHAR(CONVERT_TIMEZONE('UTC', {}), 'YYYY-MM-DD HH24:MI:SS.FF6')" |
115 | | - |
116 | | - oracle: |
117 | | - types: ["TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE"] |
118 | | - sql: "TO_CHAR(SYS_EXTRACT_UTC({}), 'YYYY-MM-DD HH24:MI:SS.FF6')" |
119 | | - |
120 | | - sqlserver: |
121 | | - types: ["DATETIMEOFFSET"] |
122 | | - sql: "CONVERT(VARCHAR(33), SWITCHOFFSET({}, '+00:00'), 127)" |
123 | | - notes: "SWITCHOFFSET converts to UTC, style 127 = ISO 8601" |
124 | | - |
125 | | - test_cases: |
126 | | - - name: "utc_timestamp" |
127 | | - dialect: "snowflake" |
128 | | - input: "2025-10-24 13:45:30.123456+00:00" |
129 | | - expected_output: "2025-10-24 13:45:30.123456" |
130 | 2 |
|
131 | | - - name: "est_to_utc" |
132 | | - dialect: "snowflake" |
133 | | - input: "2025-10-24 09:45:30.123456-04:00" |
134 | | - expected_output: "2025-10-24 13:45:30.123456" |
| 3 | +lakebridge: |
| 4 | + description: > |
| 5 | + Unified configuration for type normalization rules. |
| 6 | + Each category defines transformations that standardize data types |
| 7 | + across dialects before comparison. |
| 8 | +
|
| 9 | + categories: |
| 10 | + # ------------------------------------------------------------------------ |
| 11 | + # TEMPORAL CATEGORY |
| 12 | + # ------------------------------------------------------------------------ |
| 13 | + temporal: |
| 14 | + description: > |
| 15 | + Temporal types across dialects. |
| 16 | +
|
| 17 | + # Per-category dialect type mapping -> canonical classes |
| 18 | + type_map: |
| 19 | + canonical_classes: ["DATE", "TIME", "TIMESTAMP", "TIMESTAMP_TZ", "INTERVAL"] |
| 20 | + tsql: |
| 21 | + DATE: ["DATE"] |
| 22 | + TIME: ["TIME"] |
| 23 | + TIMESTAMP: ["DATETIME2", "DATETIME", "SMALLDATETIME"] |
| 24 | + TIMESTAMP_TZ: ["DATETIMEOFFSET"] |
| 25 | + INTERVAL: [] |
| 26 | + oracle: |
| 27 | + DATE: ["DATE"] # Oracle DATE includes time component. treat as DATE here for normalization. |
| 28 | + TIME: [] |
| 29 | + TIMESTAMP: ["TIMESTAMP"] |
| 30 | + TIMESTAMP_TZ: ["TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE"] |
| 31 | + INTERVAL: ["INTERVAL YEAR TO MONTH", "INTERVAL DAY TO SECOND"] |
| 32 | + snowflake: |
| 33 | + DATE: ["DATE"] |
| 34 | + TIME: ["TIME"] |
| 35 | + TIMESTAMP: ["DATETIME", "TIMESTAMP_NTZ"] |
| 36 | + TIMESTAMP_TZ: ["TIMESTAMP_TZ", "TIMESTAMP_LTZ"] |
| 37 | + INTERVAL: [] |
| 38 | + databricks: |
| 39 | + DATE: ["DATE"] |
| 40 | + TIME: [] |
| 41 | + TIMESTAMP: ["TIMESTAMP_NTZ"] |
| 42 | + TIMESTAMP_TZ: ["TIMESTAMP"] |
| 43 | + INTERVAL: ["INTERVAL"] |
| 44 | + |
| 45 | + transformations: |
| 46 | + |
| 47 | + date_to_iso8601: |
| 48 | + description: "Normalize DATE to ISO 8601 (YYYY-MM-DD)." |
| 49 | + applies_to: ["DATE"] |
| 50 | + dialects: |
| 51 | + tsql: |
| 52 | + sql: "CONVERT(VARCHAR(10), {}, 23)" |
| 53 | + snowflake: |
| 54 | + sql: "TO_VARCHAR({}, 'YYYY-MM-DD')" |
| 55 | + databricks: |
| 56 | + sql: "DATE_FORMAT({}, 'yyyy-MM-dd')" |
| 57 | + oracle: |
| 58 | + sql: "TO_CHAR({}, 'YYYY-MM-DD')" |
| 59 | + test_cases: |
| 60 | + - name: "standard_date" |
| 61 | + input: "2025-10-24" |
| 62 | + expected_output: "2025-10-24" |
| 63 | + - name: "null_handling" |
| 64 | + input: null |
| 65 | + expected_output: null |
| 66 | + |
| 67 | + timestamp_to_iso8601: |
| 68 | + description: "Normalize TIMESTAMP (no tz) to ISO 8601 with microseconds." |
| 69 | + applies_to: ["TIMESTAMP"] |
| 70 | + dialects: |
| 71 | + snowflake: |
| 72 | + sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')" |
| 73 | + oracle: |
| 74 | + sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6')" |
| 75 | + tsql: |
| 76 | + sql: "CONVERT(VARCHAR(27), {}, 126)" |
| 77 | + databricks: |
| 78 | + sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS')" |
| 79 | + test_cases: |
| 80 | + - name: "standard_timestamp" |
| 81 | + input: "2025-10-24 13:45:30.123456" |
| 82 | + expected_output: "2025-10-24 13:45:30.123456" |
| 83 | + |
| 84 | + timestamptz_to_iso8601: |
| 85 | + description: "Normalize TIMESTAMP with time zone to ISO 8601 with microseconds." |
| 86 | + applies_to: ["TIMESTAMP_TZ"] |
| 87 | + dialects: |
| 88 | + snowflake: |
| 89 | + sql: "TO_VARCHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM')" |
| 90 | + oracle: |
| 91 | + sql: "TO_CHAR({}, 'YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM')" |
| 92 | + tsql: |
| 93 | + sql: "REPLACE(CONVERT(VARCHAR(33), CAST({} AS DATETIME2(6)), 126), 'T', ' ')" |
| 94 | + databricks: |
| 95 | + sql: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSSSSS ZZZZZ')" |
| 96 | + test_cases: |
| 97 | + - name: "with_offset" |
| 98 | + input: "2025-10-24 13:45:30.123456 +02:00" |
| 99 | + expected_output: "2025-10-24 13:45:30.123456 +02:00" |
| 100 | + |
| 101 | + time_to_hhmmss: |
| 102 | + description: "Normalize TIME to HH:MM:SS[.ffffff]." |
| 103 | + applies_to: ["TIME"] |
| 104 | + dialects: |
| 105 | + snowflake: |
| 106 | + sql: "TO_VARCHAR({}, 'HH24:MI:SS.FF6')" |
| 107 | + tsql: |
| 108 | + sql: "FORMAT({}, 'HH:mm:ss.ffffff')" |
| 109 | + test_cases: |
| 110 | + - name: "standard_time" |
| 111 | + input: "13:45:30" |
| 112 | + expected_output: "13:45:30" |
0 commit comments