Skip to content

Commit f0cb821

Browse files
shipletstejin
andauthored
[BUGFIX] Fix Databricks identifier quoting in batch query compilation. (#11671)
Co-authored-by: @stejin <steffen@stejin.org>
1 parent 3bf7087 commit f0cb821

File tree

4 files changed

+49
-9
lines changed

4 files changed

+49
-9
lines changed

great_expectations/expectations/metrics/query_metric_provider.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ def _get_substituted_batch_subquery_from_query_and_batch_selectable(
115115
): # specifying a row_condition returns the active batch as a Select
116116
# specifying an unexpected_rows_query returns the active batch as a Subquery or Alias
117117
# this requires compilation & aliasing when formatting the parameterized query
118-
batch = batch_selectable.compile(compile_kwargs={"literal_binds": True})
118+
batch = batch_selectable.compile(
119+
dialect=execution_engine.engine.dialect, compile_kwargs={"literal_binds": True}
120+
)
119121
# all join queries require the user to have taken care of aliasing themselves
120122
if "JOIN" in query.upper():
121123
query = query.format(batch=f"({batch})", **parameters)

great_expectations/expectations/metrics/query_metrics/query_template_values.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,13 @@ def _sqlalchemy(
7373
selectable, sa.sql.Select
7474
): # Specifying a row_condition returns the active batch as a Select object, requiring compilation & # noqa: E501 # FIXME CoP
7575
# aliasing when formatting the parameterized query
76+
compiled_selectable = selectable.compile(
77+
dialect=execution_engine.engine.dialect, compile_kwargs={"literal_binds": True}
78+
)
7679
query = cls.get_query(
7780
query,
7881
template_dict,
79-
f"({selectable.compile(compile_kwargs={'literal_binds': True})}) AS subselect",
82+
f"({compiled_selectable}) AS subselect",
8083
)
8184

8285
else:

tests/expectations/metrics/conftest.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,8 @@
1111
from great_expectations.execution_engine.sqlalchemy_batch_data import SqlAlchemyBatchData
1212

1313

14-
class Dialect:
15-
def __init__(self, dialect: str):
16-
self.name = dialect
17-
18-
1914
class MockSaEngine:
20-
def __init__(self, dialect: Dialect):
15+
def __init__(self, dialect: sa.engine.interfaces.Dialect):
2116
self.dialect = dialect
2217

2318
def connect(self) -> None:
@@ -44,7 +39,7 @@ def batch_selectable() -> sa.Table:
4439

4540
class MockSqlAlchemyExecutionEngine(SqlAlchemyExecutionEngine):
4641
def __init__(self, create_temp_table: bool = True, *args, **kwargs):
47-
self.engine = MockSaEngine(dialect=Dialect("sqlite")) # type: ignore[assignment] # FIXME CoP
42+
self.engine = MockSaEngine(dialect=sa.dialects.sqlite.dialect()) # type: ignore[assignment] # FIXME CoP
4843
self._create_temp_table = create_temp_table
4944
self._connection = MockConnection()
5045

tests/expectations/metrics/query_metrics/test_query_metrics.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from unittest import mock
33

44
import pytest
5+
from sqlalchemy.dialects import mysql
56

67
from great_expectations.compatibility.sqlalchemy import (
78
sqlalchemy as sa,
@@ -219,3 +220,42 @@ def test_sqlalchemy_query_row_count(
219220
batch_selectable=batch_selectable,
220221
execution_engine=mock_sqlalchemy_execution_engine,
221222
)
223+
224+
225+
@pytest.mark.unit
226+
def test_get_substituted_batch_subquery_uses_dialect_for_compilation(
227+
mock_sqlalchemy_execution_engine: MockSqlAlchemyExecutionEngine,
228+
monkeypatch: pytest.MonkeyPatch,
229+
):
230+
"""Test that batch selectable compilation uses the execution engine's dialect.
231+
232+
This test verifies the fix for the Databricks identifier quoting issue where
233+
column names were being quoted with double quotes (") instead of backticks (`),
234+
causing Databricks to interpret them as string literals rather than column identifiers.
235+
"""
236+
# Create a Select statement with a column that will be compiled
237+
metadata = sa.MetaData()
238+
test_table = sa.Table("test_table", metadata, sa.Column("ReportingDate", sa.TIMESTAMP))
239+
batch_selectable = sa.select(test_table).where(
240+
sa.extract("year", test_table.c.ReportingDate) == 2025
241+
)
242+
243+
query = "SELECT * FROM {batch}"
244+
245+
# Use MySQL dialect to simulate Databricks (both use backticks for identifiers)
246+
mysql_dialect = mysql.dialect()
247+
monkeypatch.setattr(mock_sqlalchemy_execution_engine.engine, "dialect", mysql_dialect)
248+
249+
# Call the method
250+
result = QueryMetricProvider._get_substituted_batch_subquery_from_query_and_batch_selectable(
251+
query=query,
252+
batch_selectable=batch_selectable,
253+
execution_engine=mock_sqlalchemy_execution_engine,
254+
)
255+
256+
# Verify the result is a string containing the compiled SQL with proper table/column references
257+
assert isinstance(result, str)
258+
assert "SELECT" in result.upper()
259+
# Verify that the batch selectable was actually compiled
260+
# (should contain table and column references)
261+
assert "test_table" in result.lower() or "reportingdate" in result.lower()

0 commit comments

Comments
 (0)