[BUGFIX] ensure unexpected_rows are included if requested (#11358)

joshua-stauffer · web-flow · commit c8c8d82e7c6e · 2025-09-04T17:33:01.000Z
diff --git a/great_expectations/expectations/core/expect_column_values_to_be_null.py b/great_expectations/expectations/core/expect_column_values_to_be_null.py
@@ -367,8 +367,16 @@ def _validate(
 
         nonnull_count = None
 
+        # Handle unexpected_rows for include_unexpected_rows feature
+        parsed_result_format = parse_result_format(result_format)
+        unexpected_rows = None
+        if parsed_result_format.get("include_unexpected_rows", False):
+            unexpected_rows = metrics.get(
+                f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_ROWS.value}"
+            )
+
         return _format_map_output(
-            result_format=parse_result_format(result_format),
+            result_format=parsed_result_format,
             success=success,
             element_count=metrics.get("table.row_count"),
             nonnull_count=nonnull_count,
@@ -384,4 +392,5 @@ def _validate(
             unexpected_index_query=metrics.get(
                 f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_INDEX_QUERY.value}"
             ),
+            unexpected_rows=unexpected_rows,
         )
diff --git a/great_expectations/expectations/core/expect_column_values_to_not_be_null.py b/great_expectations/expectations/core/expect_column_values_to_not_be_null.py
@@ -406,8 +406,16 @@ def _validate(
 
         nonnull_count = None
 
+        # Handle unexpected_rows for include_unexpected_rows feature
+        parsed_result_format = parse_result_format(result_format)
+        unexpected_rows = None
+        if parsed_result_format.get("include_unexpected_rows", False):
+            unexpected_rows = metrics.get(
+                f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_ROWS.value}"
+            )
+
         return _format_map_output(
-            result_format=parse_result_format(result_format),
+            result_format=parsed_result_format,
             success=success,
             element_count=metrics.get("table.row_count"),
             nonnull_count=nonnull_count,
@@ -423,4 +431,5 @@ def _validate(
             unexpected_index_query=metrics.get(
                 f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_INDEX_QUERY.value}"
             ),
+            unexpected_rows=unexpected_rows,
         )
diff --git a/great_expectations/expectations/expectation.py b/great_expectations/expectations/expectation.py
@@ -2335,9 +2335,14 @@ def _validate(
             self._get_result_format(runtime_configuration=runtime_configuration)
         )
 
+        include_unexpected_rows: bool
         unexpected_index_column_names = None
         if isinstance(result_format, dict):
+            include_unexpected_rows = bool(result_format.get("include_unexpected_rows", False))
             unexpected_index_column_names = result_format.get("unexpected_index_column_names", None)
+        else:
+            include_unexpected_rows = False
+
         total_count: Optional[int] = metrics.get("table.row_count")
         unexpected_count: Optional[int] = metrics.get(
             f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}"
@@ -2355,6 +2360,12 @@ def _validate(
             f"{self.map_metric}.{SummarizationMetricNameSuffixes.FILTERED_ROW_COUNT.value}"
         )
 
+        unexpected_rows = None
+        if include_unexpected_rows:
+            unexpected_rows = metrics.get(
+                f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_ROWS.value}"
+            )
+
         if (
             total_count is None
             or unexpected_count is None
@@ -2381,6 +2392,7 @@ def _validate(
             unexpected_index_list=unexpected_index_list,
             unexpected_index_query=unexpected_index_query,
             unexpected_index_column_names=unexpected_index_column_names,
+            unexpected_rows=unexpected_rows,
         )
 
 
@@ -2540,9 +2552,6 @@ def get_validation_dependencies(
             ),
         )
 
-        if result_format_str == ResultFormat.BASIC:
-            return validation_dependencies
-
         if include_unexpected_rows:
             metric_kwargs = get_metric_kwargs(
                 metric_name=f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_ROWS.value}",
@@ -2558,6 +2567,9 @@ def get_validation_dependencies(
                 ),
             )
 
+        if result_format_str == ResultFormat.BASIC:
+            return validation_dependencies
+
         from great_expectations.execution_engine import (
             SqlAlchemyExecutionEngine,
         )
@@ -2605,9 +2617,14 @@ def _validate(
         execution_engine: Optional[ExecutionEngine] = None,
     ):
         result_format = self._get_result_format(runtime_configuration=runtime_configuration)
+
+        include_unexpected_rows: bool
         unexpected_index_column_names = None
         if isinstance(result_format, dict):
+            include_unexpected_rows = bool(result_format.get("include_unexpected_rows", False))
             unexpected_index_column_names = result_format.get("unexpected_index_column_names", None)
+        else:
+            include_unexpected_rows = False
 
         total_count: Optional[int] = metrics.get("table.row_count")
         unexpected_count: Optional[int] = metrics.get(
@@ -2626,6 +2643,12 @@ def _validate(
             f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_INDEX_QUERY.value}"
         )
 
+        unexpected_rows = None
+        if include_unexpected_rows:
+            unexpected_rows = metrics.get(
+                f"{self.map_metric}.{SummarizationMetricNameSuffixes.UNEXPECTED_ROWS.value}"
+            )
+
         if (
             total_count is None
             or unexpected_count is None
@@ -2652,6 +2675,7 @@ def _validate(
             unexpected_index_list=unexpected_index_list,
             unexpected_index_query=unexpected_index_query,
             unexpected_index_column_names=unexpected_index_column_names,
+            unexpected_rows=unexpected_rows,
         )
 
 
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any, Dict, cast
 from unittest.mock import ANY
 
 import pandas as pd
@@ -220,3 +221,32 @@ def test_success_with_suite_param_ignore_row_if_(
         expectation, expectation_parameters={suite_param_key: suite_param_value}
     )
     assert result.success == expected_result
+
+
+@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
+def test_include_unexpected_rows(batch_for_datasource: Batch) -> None:
+    """Test that include_unexpected_rows works correctly for ExpectColumnPairValuesToBeEqual."""
+    expectation = gxe.ExpectColumnPairValuesToBeEqual(
+        column_A=EQUAL_STRINGS_A, column_B=UNEQUAL_STRINGS
+    )
+    result = batch_for_datasource.validate(
+        expectation, result_format={"result_format": "BASIC", "include_unexpected_rows": True}
+    )
+
+    assert not result.success
+    result_dict = cast("Dict[str, Any]", result.to_json_dict()["result"])
+
+    # Verify that unexpected_rows is present and contains the expected data
+    assert "unexpected_rows" in result_dict
+    assert result_dict["unexpected_rows"] is not None
+
+    # Convert to DataFrame for easier comparison
+    unexpected_rows_data = result_dict["unexpected_rows"]
+    assert isinstance(unexpected_rows_data, list)
+    unexpected_rows_df = pd.DataFrame(unexpected_rows_data)
+
+    # Should contain 1 row where column_A != column_B
+    assert len(unexpected_rows_df) == 1
+
+    # The unexpected row should have different values in column_A and column_B
+    assert unexpected_rows_df.loc[0, EQUAL_STRINGS_A] != unexpected_rows_df.loc[0, UNEQUAL_STRINGS]
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_null.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_null.py
@@ -1,3 +1,4 @@
+from typing import Any, Dict, cast
 from unittest.mock import ANY
 
 import pandas as pd
@@ -112,3 +113,34 @@ def test_failure(
 ) -> None:
     result = batch_for_datasource.validate(expectation)
     assert not result.success
+
+
+@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
+def test_include_unexpected_rows_pandas(batch_for_datasource: Batch) -> None:
+    """Test that include_unexpected_rows works correctly for ExpectColumnValuesToBeNull."""
+    expectation = gxe.ExpectColumnValuesToBeNull(column=MOSTLY_NULL_COLUMN)
+    result = batch_for_datasource.validate(
+        expectation, result_format={"result_format": "BASIC", "include_unexpected_rows": True}
+    )
+
+    assert not result.success
+    result_dict = cast("Dict[str, Any]", result.to_json_dict()["result"])
+
+    # Verify that unexpected_rows is present and contains the expected data
+    assert "unexpected_rows" in result_dict
+    assert result_dict["unexpected_rows"] is not None
+
+    # Convert to DataFrame for easier comparison
+    unexpected_rows_data = result_dict["unexpected_rows"]
+    assert isinstance(unexpected_rows_data, list)
+    unexpected_rows_df = pd.DataFrame(unexpected_rows_data)
+
+    # Should contain 1 row where MOSTLY_NULL_COLUMN is not null (index 0 with value 1)
+    assert len(unexpected_rows_df) == 1
+    assert list(unexpected_rows_df.index) == [0]
+
+    # The unexpected row should have value 1 in MOSTLY_NULL_COLUMN
+    assert unexpected_rows_df.loc[0, MOSTLY_NULL_COLUMN] == 1
+
+    # ALL_NULL_COLUMN should be null in the unexpected row
+    assert pd.isna(unexpected_rows_df.loc[0, ALL_NULL_COLUMN])
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_null.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_null.py
@@ -1,3 +1,4 @@
+from typing import Any, Dict, cast
 from unittest.mock import ANY
 
 import pandas as pd
@@ -174,3 +175,35 @@ def test_failure(
 ) -> None:
     result = batch_for_datasource.validate(expectation)
     assert not result.success
+
+
+@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
+def test_include_unexpected_rows_pandas(batch_for_datasource: Batch) -> None:
+    """Test that include_unexpected_rows works correctly for ExpectColumnValuesToNotBeNull."""
+    expectation = gxe.ExpectColumnValuesToNotBeNull(column=MOSTLY_NULL_COLUMN)
+    result = batch_for_datasource.validate(
+        expectation, result_format={"result_format": "BASIC", "include_unexpected_rows": True}
+    )
+
+    assert not result.success
+    result_dict = cast("Dict[str, Any]", result.to_json_dict()["result"])
+
+    # Verify that unexpected_rows is present and contains the expected data
+    assert "unexpected_rows" in result_dict
+    assert result_dict["unexpected_rows"] is not None
+
+    # Convert to DataFrame for easier comparison
+    unexpected_rows_data = result_dict["unexpected_rows"]
+    assert isinstance(unexpected_rows_data, list)
+    unexpected_rows_df = pd.DataFrame(unexpected_rows_data)
+
+    # Should contain 4 rows where MOSTLY_NULL_COLUMN is null
+    assert len(unexpected_rows_df) == 4
+
+    # All values in the MOSTLY_NULL_COLUMN should be null in the unexpected rows
+    assert unexpected_rows_df[MOSTLY_NULL_COLUMN].isnull().all()
+
+    # Other columns should have their original values (rows with indices 1,2,3,4)
+    # In the unexpected_rows result, these get re-indexed starting from 0
+    assert list(unexpected_rows_df[NON_NULL_COLUMN]) == [2, 3, 4, 5]
+    assert list(unexpected_rows_df[ALL_NULL_COLUMN].isnull()) == [True, True, True, True]
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py
@@ -1,3 +1,5 @@
+from typing import Any, Dict, cast
+
 import pandas as pd
 import pytest
 
@@ -114,3 +116,31 @@ def test_success_with_suite_param_ignore_row_if_(
         expectation, expectation_parameters={suite_param_key: suite_param_value}
     )
     assert result.success == expected_result
+
+
+@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
+def test_include_unexpected_rows(batch_for_datasource: Batch) -> None:
+    """Test that include_unexpected_rows works correctly for ExpectMulticolumnSumToEqual."""
+    expectation = gxe.ExpectMulticolumnSumToEqual(column_list=[COL_A, COL_B], sum_total=10)
+    result = batch_for_datasource.validate(
+        expectation, result_format={"result_format": "BASIC", "include_unexpected_rows": True}
+    )
+
+    assert not result.success
+    result_dict = cast("Dict[str, Any]", result.to_json_dict()["result"])
+
+    # Verify that unexpected_rows is present and contains the expected data
+    assert "unexpected_rows" in result_dict
+    assert result_dict["unexpected_rows"] is not None
+
+    # Convert to DataFrame for easier comparison
+    unexpected_rows_data = result_dict["unexpected_rows"]
+    assert isinstance(unexpected_rows_data, list)
+    unexpected_rows_df = pd.DataFrame(unexpected_rows_data)
+
+    # Should contain multiple rows where sum != 10
+    assert len(unexpected_rows_df) > 0
+
+    # Check that the rows contain the expected columns
+    assert COL_A in unexpected_rows_df.columns
+    assert COL_B in unexpected_rows_df.columns
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_select_column_values_to_be_unique_within_record.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_select_column_values_to_be_unique_within_record.py
@@ -1,3 +1,5 @@
+from typing import Any, Dict, cast
+
 import pandas as pd
 import pytest
 
@@ -112,3 +114,34 @@ def test_success_with_suite_param_ignore_row_if_(
         expectation, expectation_parameters={suite_param_key: suite_param_value}
     )
     assert result.success == expected_result
+
+
+@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
+def test_include_unexpected_rows(batch_for_datasource: Batch) -> None:
+    """Test include_unexpected_rows for ExpectSelectColumnValuesToBeUniqueWithinRecord."""
+    expectation = gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord(
+        column_list=[INT_COL_A, INT_COL_B, INT_COL_C]
+    )
+    result = batch_for_datasource.validate(
+        expectation, result_format={"result_format": "BASIC", "include_unexpected_rows": True}
+    )
+
+    assert not result.success
+    result_dict = cast("Dict[str, Any]", result.to_json_dict()["result"])
+
+    # Verify that unexpected_rows is present and contains the expected data
+    assert "unexpected_rows" in result_dict
+    assert result_dict["unexpected_rows"] is not None
+
+    # Convert to DataFrame for easier comparison
+    unexpected_rows_data = result_dict["unexpected_rows"]
+    assert isinstance(unexpected_rows_data, list)
+    unexpected_rows_df = pd.DataFrame(unexpected_rows_data)
+
+    # Should contain rows that have non-unique values in the selected columns
+    assert len(unexpected_rows_df) > 0
+
+    # Check that the rows contain the expected columns
+    assert INT_COL_A in unexpected_rows_df.columns
+    assert INT_COL_B in unexpected_rows_df.columns
+    assert INT_COL_C in unexpected_rows_df.columns