From 0a276f43ff7cfac64c6d50275ef63a2be2cb829c Mon Sep 17 00:00:00 2001 From: creazyfrog Date: Sun, 17 May 2026 00:57:24 -0700 Subject: [PATCH 1/2] [BUGFIX] Fix misleading result format docs for ExpectColumnValuesToBeOfType (#11076) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docstring Code Examples for ExpectColumnValuesToBeOfType showed the full Column Map result format (element_count, unexpected_count, partial_unexpected_list, etc.) for all backends. In practice this format is only returned when Pandas is used with a column whose dtype is 'object' (row-level inspection). For all other backends — SQL (including Databricks, Snowflake, SQL Server, PostgreSQL, Trino), Spark, and Pandas with non-object dtypes — the expectation validates the column's schema-level data type and returns only {"observed_value": ""}. Users relying on the documented format for Databricks or Spark were silently getting a different structure and had no way to know which format to expect. Changes: - Replaced the misleading Code Examples in the class docstring with a clear "Result Format" section that documents both shapes and explains when each applies. - Added a unit test that asserts 'observed_value' is present (and 'element_count' is absent) when running against a Pandas non-object column, preventing future regressions where the aggregate path accidentally switches to the map format (or vice-versa). Fixes #11076 --- .../expect_column_values_to_be_of_type.py | 62 ++++++++++++------- ...test_expect_column_values_to_be_of_type.py | 58 +++++++++++++++++ 2 files changed, 97 insertions(+), 23 deletions(-) diff --git a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py index d75ca6263d95..6da8fc9ca554 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py @@ -179,12 +179,46 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): 1 "abcde" 2 2 "1b3d5" 3 + Result Format: + The shape of ``result`` depends on the backend and the column dtype: + + **SQL, Spark, and Pandas (non-object dtype)** — the expectation validates the + column's *schema-level* data type rather than individual row values, so the + result contains only ``observed_value`` (the actual column type string): + + .. code-block:: json + + {{ + "result": {{ + "observed_value": "int64" + }} + }} + + **Pandas with object dtype** (row-level check) — when the column dtype is + ``object`` and ``type_`` is not ``"object"``/``"O"``, each row's Python type is + inspected individually and the full Column Map result format is returned: + + .. code-block:: json + + {{ + "result": {{ + "element_count": 3, + "unexpected_count": 0, + "unexpected_percent": 0.0, + "partial_unexpected_list": [], + "missing_count": 0, + "missing_percent": 0.0, + "unexpected_percent_total": 0.0, + "unexpected_percent_nonmissing": 0.0 + }} + }} + Code Examples: - Passing Case: + Passing Case (SQL / Spark — observed_value result): Input: ExpectColumnValuesToBeOfType( column="test2", - type_="NUMBER" + type_="INTEGER" ) Output: @@ -195,20 +229,13 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): "exception_message": null }}, "result": {{ - "element_count": 3, - "unexpected_count": 0, - "unexpected_percent": 0.0, - "partial_unexpected_list": [], - "missing_count": 0, - "missing_percent": 0.0, - "unexpected_percent_total": 0.0, - "unexpected_percent_nonmissing": 0.0 + "observed_value": "INTEGER" }}, "meta": {{}}, "success": true }} - Failing Case: + Failing Case (SQL / Spark — observed_value result): Input: ExpectColumnValuesToBeOfType( column="test", @@ -223,18 +250,7 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): "exception_message": null }}, "result": {{ - "element_count": 3, - "unexpected_count": 3, - "unexpected_percent": 100.0, - "partial_unexpected_list": [ - "12345", - "abcde", - "1b3d5" - ], - "missing_count": 0, - "missing_percent": 0.0, - "unexpected_percent_total": 100.0, - "unexpected_percent_nonmissing": 100.0 + "observed_value": "VARCHAR" }}, "meta": {{}}, "success": false diff --git a/tests/expectations/core/test_expect_column_values_to_be_of_type.py b/tests/expectations/core/test_expect_column_values_to_be_of_type.py index c51aa1b820b7..8af9799f7d28 100644 --- a/tests/expectations/core/test_expect_column_values_to_be_of_type.py +++ b/tests/expectations/core/test_expect_column_values_to_be_of_type.py @@ -193,3 +193,61 @@ def test_expect_column_values_to_be_of_type_case_insensitivity(sa, dialect_name) f"but got success={result.success}. " f"Observed value: {result.result.get('observed_value')}" ) + + +@pytest.mark.unit +def test_expect_column_values_to_be_of_type_result_contains_observed_value_for_pandas(): + """For Pandas with a non-object dtype column, _validate_pandas performs a schema-level + type check and must return a result dict with 'observed_value', NOT the full Column Map + format (element_count / unexpected_count / etc.). Regression test for issue #11076.""" + import pandas as pd + + from great_expectations.core.expectation_validation_result import ( + ExpectationValidationResult, + ) + from great_expectations.execution_engine import PandasExecutionEngine + from great_expectations.validator.validator import Validator + + df = pd.DataFrame({"amount": pd.array([1, 2, 3], dtype="int64")}) + + context = None + try: + from great_expectations.data_context import get_context + + context = get_context(mode="ephemeral") + except Exception: + pass + + engine = PandasExecutionEngine() + + # Simulate what build_pandas_engine_with_data would do. + from great_expectations.core.batch import BatchDefinition, BatchMarkers + from great_expectations.core.id_dict import BatchSpec + + batch_spec = BatchSpec(path="test") + batch_definition = BatchDefinition( + datasource_name="test", + data_connector_name="test", + data_asset_name="test", + batch_identifiers={}, + ) + import pandas as pd + + engine.load_batch_data( + batch_id="test_batch", + batch_data=pd.DataFrame({"amount": pd.array([1, 2, 3], dtype="int64")}), + ) + + validator = Validator(execution_engine=engine) + result: ExpectationValidationResult = validator.expect_column_values_to_be_of_type( + "amount", type_="int64" + ) + + # The result must contain 'observed_value', not 'element_count' / 'unexpected_count'. + assert "observed_value" in result.result, ( + f"Expected 'observed_value' key in result, got: {result.result}" + ) + assert "element_count" not in result.result, ( + "'element_count' should not appear in aggregate-mode result (issue #11076)" + ) + assert result.success is True From e450dd482f28569a281f9db1e978c1dd3b843bf1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 07:58:13 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../core/test_expect_column_values_to_be_of_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/expectations/core/test_expect_column_values_to_be_of_type.py b/tests/expectations/core/test_expect_column_values_to_be_of_type.py index 8af9799f7d28..6aff6cd55864 100644 --- a/tests/expectations/core/test_expect_column_values_to_be_of_type.py +++ b/tests/expectations/core/test_expect_column_values_to_be_of_type.py @@ -221,7 +221,7 @@ def test_expect_column_values_to_be_of_type_result_contains_observed_value_for_p engine = PandasExecutionEngine() # Simulate what build_pandas_engine_with_data would do. - from great_expectations.core.batch import BatchDefinition, BatchMarkers + from great_expectations.core.batch import BatchDefinition from great_expectations.core.id_dict import BatchSpec batch_spec = BatchSpec(path="test")