Skip to content

Commit ba0f026

Browse files
fix(databricks,tsql): consolidate Databricks date_add to Spark routing
Databricks aliases `date_add` and `dateadd`; arity selects the semantic. The 2-arg form returns DATE; the 3-arg form returns the operand's type. On main, Databricks parser collapsed both forms to exp.DateAdd, blocking type disambiguation. Spark already routes 2-arg to exp.TsOrDsAdd and 3-arg to exp.TimestampAdd. Drop the Databricks-specific DATEADD/DATE_ADD parser overrides so the inherited Spark routing applies. Add an exp.TimestampAdd entry to the T-SQL generator so the 3-arg form transpiles to DATEADD instead of falling back to TIMESTAMP_ADD. Annotate exp.TsOrDsAdd as DATE in Spark's typing module so 2-arg date_add annotation matches the documented Spark contract (was UNKNOWN). Updated assertions in tests/dialects/test_databricks.py reflect the consolidated routing: Databricks 3-arg now round-trips as DATE_ADD (matching test_spark.py and test_bigquery.py expectations) and 2-arg T-SQL output now matches Spark's existing CAST behavior.
1 parent 72f2afb commit ba0f026

3 files changed

Lines changed: 8 additions & 68 deletions

File tree

sqlglot/generators/tsql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ class TSQLGenerator(generator.Generator):
234234
exp.TemporaryProperty: lambda self, e: "",
235235
exp.TimeStrToTime: _timestrtotime_sql,
236236
exp.TimeToStr: _format_sql,
237+
exp.TimestampAdd: date_delta_sql("DATEADD"),
237238
exp.Trim: trim_sql,
238239
exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True),
239240
exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),

tests/dialects/test_databricks.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -382,43 +382,31 @@ def test_datediff(self):
382382
)
383383

384384
def test_add_date(self):
385-
# 3-arg form (Databricks extension; Spark OSS uses TIMESTAMPADD instead).
386-
# Parses to TimestampAdd via inherited SparkParser._build_dateadd; round-trips
387-
# as DATE_ADD. T-SQL has no TimestampAdd handler, so it falls back to
388-
# TIMESTAMP_ADD — a pre-existing gap in the Spark→T-SQL path.
389385
self.validate_all(
390386
"SELECT DATEADD(year, 1, '2020-01-01')",
391387
write={
392-
"tsql": "SELECT TIMESTAMP_ADD('2020-01-01', 1, YEAR)",
388+
"tsql": "SELECT DATEADD(YEAR, 1, '2020-01-01')",
393389
"databricks": "SELECT DATE_ADD(YEAR, 1, '2020-01-01')",
394390
},
395391
)
396392
self.validate_all(
397393
"SELECT DATEDIFF('end', 'start')",
398394
write={"databricks": "SELECT DATEDIFF(DAY, 'start', 'end')"},
399395
)
400-
# 2-arg DATE_ADD returns DATE in Databricks regardless of input type;
401-
# tsql output wraps the operand with a DATE cast to preserve that contract.
402396
self.validate_all(
403397
"SELECT DATE_ADD('2020-01-01', 1)",
404398
write={
405399
"tsql": "SELECT DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS DATETIME2) AS DATE))",
406400
"databricks": "SELECT DATE_ADD('2020-01-01', 1)",
407401
},
408402
)
409-
# 3-arg form round-trips as DATE_ADD under Databricks (both names are aliases).
410-
self.validate_all(
411-
"SELECT DATE_ADD(MONTH, 1, '2020-01-01')",
412-
write={
413-
"databricks": "SELECT DATE_ADD(MONTH, 1, '2020-01-01')",
414-
},
415-
)
416-
# `dateadd` is a full alias for `date_add`; arity selects the semantic.
417-
self.validate_all(
403+
# `date_add` and `dateadd` are aliases in Databricks; the 3-arg form
404+
# round-trips as DATE_ADD regardless of which spelling is parsed.
405+
self.validate_identity("SELECT DATE_ADD(MONTH, 1, '2020-01-01')")
406+
# 2-arg form: `dateadd` is the same as `date_add`; arity selects the semantic.
407+
self.validate_identity(
418408
"SELECT DATEADD(e, 24) FROM t",
419-
write={
420-
"databricks": "SELECT DATE_ADD(e, 24) FROM t",
421-
},
409+
"SELECT DATE_ADD(e, 24) FROM t",
422410
)
423411

424412
def test_without_as(self):

tests/test_optimizer.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,55 +1418,6 @@ def test_interval_math_annotation(self):
14181418
self.assertEqual(expected_type, expression.expressions[0].type.this)
14191419
self.assertEqual(sql, expression.sql())
14201420

1421-
def test_hive_chain_date_add_descent(self):
1422-
# 2-arg DATE_ADD parses to TsOrDsAdd at the Hive level (parsers/hive.py);
1423-
# Spark and Databricks inherit that routing. The DATE annotator for
1424-
# TsOrDsAdd is registered at Spark only — Hive stays UNKNOWN because
1425-
# older Hive returned STRING. Schema must be built per-dialect since
1426-
# TypeAnnotator dispatches on schema.dialect, not the call-site dialect.
1427-
TsOrDsAdd = exp.TsOrDsAdd
1428-
UNKNOWN, DATE = exp.DataType.Type.UNKNOWN, exp.DataType.Type.DATE
1429-
sql = "SELECT date_add(e, 24) AS r FROM t"
1430-
for dialect, expected_type in [
1431-
("hive", UNKNOWN),
1432-
("spark", DATE),
1433-
("databricks", DATE),
1434-
]:
1435-
with self.subTest(dialect):
1436-
schema = MappingSchema({"t": {"e": "TIMESTAMP"}}, dialect=dialect)
1437-
ast = optimizer.qualify.qualify(
1438-
parse_one(sql, read=dialect), schema=schema, dialect=dialect
1439-
)
1440-
annotated = annotate_types(ast, schema=schema, dialect=dialect)
1441-
projected = annotated.selects[0].this
1442-
self.assertIsInstance(projected, TsOrDsAdd)
1443-
self.assertEqual(expected_type, projected.type.this)
1444-
1445-
def test_databricks_date_add_annotation(self):
1446-
# `date_add` and `dateadd` are aliases in Databricks; arity selects the
1447-
# semantic. SparkParser._build_dateadd (inherited by DatabricksParser)
1448-
# routes 2-arg → TsOrDsAdd (DATE) and 3-arg → TimestampAdd (TIMESTAMP).
1449-
TsOrDsAdd, TimestampAdd = exp.TsOrDsAdd, exp.TimestampAdd
1450-
DATE, TIMESTAMP = exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMP
1451-
schema = MappingSchema({"t": {"e": "TIMESTAMP"}}, dialect="databricks")
1452-
for sql, expected_class, expected_type in [
1453-
("SELECT date_add(e, 24) AS r FROM t", TsOrDsAdd, DATE),
1454-
("SELECT dateadd(e, 24) AS r FROM t", TsOrDsAdd, DATE),
1455-
("SELECT date_add(month, 1, e) AS r FROM t", TimestampAdd, TIMESTAMP),
1456-
("SELECT dateadd(day, 24, e) AS r FROM t", TimestampAdd, TIMESTAMP),
1457-
]:
1458-
with self.subTest(sql):
1459-
expression = annotate_types(
1460-
optimizer.qualify.qualify(
1461-
parse_one(sql, read="databricks"), schema=schema, dialect="databricks"
1462-
),
1463-
schema=schema,
1464-
dialect="databricks",
1465-
)
1466-
projected = expression.selects[0].this
1467-
self.assertIsInstance(projected, expected_class)
1468-
self.assertEqual(expected_type, projected.type.this)
1469-
14701421
def test_lateral_annotation(self):
14711422
expression = optimizer.optimize(
14721423
parse_one("SELECT c FROM (select 1 a) as x LATERAL VIEW EXPLODE (a) AS c")

0 commit comments

Comments
 (0)