|
| 1 | +version: "1.0.0" |
| 2 | + |
| 3 | +lakebridge: |
| 4 | + description: > |
| 5 | + Defines transformations that standardize snowflake data types before comparison to databricks. |
| 6 | + transformations: |
| 7 | + # ------------------------------------------------------------------------ |
| 8 | + # TEMPORAL CATEGORY |
| 9 | + # ------------------------------------------------------------------------ |
| 10 | + date_to_iso8601: # Use any name here |
| 11 | + description: "Normalize DATE to ISO 8601 (YYYY-MM-DD)." |
| 12 | + databricks_type: ["DATE"] |
| 13 | + databricks_transformation: "DATE_FORMAT({}, 'yyyy-MM-dd')" |
| 14 | + rules: |
| 15 | + date: # Use any name here |
| 16 | + source_types: ["DATE"] |
| 17 | + source_transformation: "TO_VARCHAR({}, 'YYYY-MM-DD')" # can also override databricks transformation if needed |
| 18 | + string: |
| 19 | + source_types: ["CHAR", "VARCHAR", "STRING", "TEXT"] |
| 20 | + source_transformation: "{}" # identity, no transformation needed for strings |
| 21 | + |
| 22 | + timestamp_ntz_to_iso8601: |
| 23 | + description: "Normalize TIMESTAMP (no tz) to ISO 8601 with milliseconds." |
| 24 | + databricks_type: ["TIMESTAMP_NTZ", "TIMESTAMP WITHOUT TIME ZONE"] |
| 25 | + databricks_transformation: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSS')" |
| 26 | + rules: |
| 27 | + timestamp_ntz: |
| 28 | + source_types: [] # applies to all types if empty |
| 29 | + source_transformation: "TO_VARCHAR(TO_TIMESTAMP_NTZ({}))" |
| 30 | + |
| 31 | + timestamp_to_iso8601: |
| 32 | + description: "Normalize TIMESTAMP with time zone to ISO 8601 with milliseconds." |
| 33 | + databricks_type: ["TIMESTAMP"] |
| 34 | + databricks_transformation: "DATE_FORMAT({}, 'yyyy-MM-dd HH:mm:ss.SSS ZZZZZ')" |
| 35 | + rules: |
| 36 | + timestamp_tz: |
| 37 | + source_types: [] # applies to all types if empty |
| 38 | + source_transformation: "TO_VARCHAR(TO_TIMESTAMP_TZ({}))" |
| 39 | + |
| 40 | + # ------------------------------------------------------------------------ |
| 41 | + # SEMI-STRUCTURED AND STRUCTURED CATEGORY |
| 42 | + # ------------------------------------------------------------------------ |
| 43 | + arrays: |
| 44 | + description: "Normalize Arrays: filter NULLs then sort then join" |
| 45 | + databricks_type: ["ARRAY"] |
| 46 | + databricks_transformation: "COALESCE(CONCAT_WS(',', ARRAY_SORT(FILTER({}, x -> x IS NOT NULL))), 'null_recon')" |
| 47 | + rules: |
| 48 | + array: |
| 49 | + source_types: ["ARRAY"] |
| 50 | + source_transformation: "COALESCE(ARRAY_TO_STRING(ARRAY_SORT(ARRAY_COMPACT({})), ','), 'null_recon')" |
| 51 | + |
| 52 | + structs: |
| 53 | + description: "Serialize or flatten nested structures to JSON." |
| 54 | + databricks_type: ["STRUCT", "MAP", "VARIANT", "OBJECT"] |
| 55 | + databricks_transformation: "TO_JSON({})" |
| 56 | + rules: |
| 57 | + struct: |
| 58 | + source_types: ["OBJECT", "MAP"] |
| 59 | + source_transformation: "TO_JSON({})" |
| 60 | + |
| 61 | + # ------------------------------------------------------------------------ |
| 62 | + # NUMERIC CATEGORY |
| 63 | + # ------------------------------------------------------------------------ |
| 64 | + |
| 65 | + |
| 66 | + # ------------------------------------------------------------------------ |
| 67 | + # OTHER CATEGORY |
| 68 | + # ------------------------------------------------------------------------ |
| 69 | + booleans: |
| 70 | + description: "Normalize boolean and boolean-like values to TRUE/FALSE." |
| 71 | + databricks_type: ["BOOLEAN"] |
| 72 | + databricks_transformation: "{}" |
| 73 | + rules: |
| 74 | + boolean: |
| 75 | + source_types: [ "BOOLEAN" ] |
| 76 | + source_transformation: "{}" |
| 77 | + integer_as_boolean: |
| 78 | + source_types: [ "INT", "INTEGER", "SMALLINT", "TINYINT", "BYTEINT", "NUMBER" ] |
| 79 | + source_transformation: "CASE WHEN {} = 1 THEN TRUE WHEN {} = 0 THEN FALSE ELSE FALSE END" |
| 80 | + string_as_boolean: |
| 81 | + source_types: [ "CHAR", "VARCHAR", "TEXT", "STRING" ] |
| 82 | + source_transformation: "CASE WHEN UPPER({}) IN ('Y','YES','TRUE','1') THEN TRUE ELSE FALSE END" |
0 commit comments