[BUGFIX] Fix Databricks identifier quoting in batch query compilation. (#11671)

shiplet · stejin · web-flow · commit f0cb821e4ebe · 2026-02-18T21:18:25.000Z
Co-authored-by: @stejin <steffen@stejin.org>
diff --git a/great_expectations/expectations/metrics/query_metric_provider.py b/great_expectations/expectations/metrics/query_metric_provider.py
@@ -115,7 +115,9 @@ def _get_substituted_batch_subquery_from_query_and_batch_selectable(
         ):  # specifying a row_condition returns the active batch as a Select
             # specifying an unexpected_rows_query returns the active batch as a Subquery or Alias
             # this requires compilation & aliasing when formatting the parameterized query
-            batch = batch_selectable.compile(compile_kwargs={"literal_binds": True})
+            batch = batch_selectable.compile(
+                dialect=execution_engine.engine.dialect, compile_kwargs={"literal_binds": True}
+            )
             # all join queries require the user to have taken care of aliasing themselves
             if "JOIN" in query.upper():
                 query = query.format(batch=f"({batch})", **parameters)
diff --git a/great_expectations/expectations/metrics/query_metrics/query_template_values.py b/great_expectations/expectations/metrics/query_metrics/query_template_values.py
@@ -73,10 +73,13 @@ def _sqlalchemy(
             selectable, sa.sql.Select
         ):  # Specifying a row_condition returns the active batch as a Select object, requiring compilation &  # noqa: E501 # FIXME CoP
             # aliasing when formatting the parameterized query
+            compiled_selectable = selectable.compile(
+                dialect=execution_engine.engine.dialect, compile_kwargs={"literal_binds": True}
+            )
             query = cls.get_query(
                 query,
                 template_dict,
-                f"({selectable.compile(compile_kwargs={'literal_binds': True})}) AS subselect",
+                f"({compiled_selectable}) AS subselect",
             )
 
         else:
diff --git a/tests/expectations/metrics/conftest.py b/tests/expectations/metrics/conftest.py
@@ -11,13 +11,8 @@
 from great_expectations.execution_engine.sqlalchemy_batch_data import SqlAlchemyBatchData
 
 
-class Dialect:
-    def __init__(self, dialect: str):
-        self.name = dialect
-
-
 class MockSaEngine:
-    def __init__(self, dialect: Dialect):
+    def __init__(self, dialect: sa.engine.interfaces.Dialect):
         self.dialect = dialect
 
     def connect(self) -> None:
@@ -44,7 +39,7 @@ def batch_selectable() -> sa.Table:
 
 class MockSqlAlchemyExecutionEngine(SqlAlchemyExecutionEngine):
     def __init__(self, create_temp_table: bool = True, *args, **kwargs):
-        self.engine = MockSaEngine(dialect=Dialect("sqlite"))  # type: ignore[assignment] # FIXME CoP
+        self.engine = MockSaEngine(dialect=sa.dialects.sqlite.dialect())  # type: ignore[assignment] # FIXME CoP
         self._create_temp_table = create_temp_table
         self._connection = MockConnection()
 
diff --git a/tests/expectations/metrics/query_metrics/test_query_metrics.py b/tests/expectations/metrics/query_metrics/test_query_metrics.py
@@ -2,6 +2,7 @@
 from unittest import mock
 
 import pytest
+from sqlalchemy.dialects import mysql
 
 from great_expectations.compatibility.sqlalchemy import (
     sqlalchemy as sa,
@@ -219,3 +220,42 @@ def test_sqlalchemy_query_row_count(
         batch_selectable=batch_selectable,
         execution_engine=mock_sqlalchemy_execution_engine,
     )
+
+
+@pytest.mark.unit
+def test_get_substituted_batch_subquery_uses_dialect_for_compilation(
+    mock_sqlalchemy_execution_engine: MockSqlAlchemyExecutionEngine,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """Test that batch selectable compilation uses the execution engine's dialect.
+
+    This test verifies the fix for the Databricks identifier quoting issue where
+    column names were being quoted with double quotes (") instead of backticks (`),
+    causing Databricks to interpret them as string literals rather than column identifiers.
+    """
+    # Create a Select statement with a column that will be compiled
+    metadata = sa.MetaData()
+    test_table = sa.Table("test_table", metadata, sa.Column("ReportingDate", sa.TIMESTAMP))
+    batch_selectable = sa.select(test_table).where(
+        sa.extract("year", test_table.c.ReportingDate) == 2025
+    )
+
+    query = "SELECT * FROM {batch}"
+
+    # Use MySQL dialect to simulate Databricks (both use backticks for identifiers)
+    mysql_dialect = mysql.dialect()
+    monkeypatch.setattr(mock_sqlalchemy_execution_engine.engine, "dialect", mysql_dialect)
+
+    # Call the method
+    result = QueryMetricProvider._get_substituted_batch_subquery_from_query_and_batch_selectable(
+        query=query,
+        batch_selectable=batch_selectable,
+        execution_engine=mock_sqlalchemy_execution_engine,
+    )
+
+    # Verify the result is a string containing the compiled SQL with proper table/column references
+    assert isinstance(result, str)
+    assert "SELECT" in result.upper()
+    # Verify that the batch selectable was actually compiled
+    # (should contain table and column references)
+    assert "test_table" in result.lower() or "reportingdate" in result.lower()