Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,46 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation):
1 "abcde" 2
2 "1b3d5" 3

Result Format:
The shape of ``result`` depends on the backend and the column dtype:

**SQL, Spark, and Pandas (non-object dtype)** — the expectation validates the
column's *schema-level* data type rather than individual row values, so the
result contains only ``observed_value`` (the actual column type string):

.. code-block:: json

{{
"result": {{
"observed_value": "int64"
}}
}}

**Pandas with object dtype** (row-level check) — when the column dtype is
``object`` and ``type_`` is not ``"object"``/``"O"``, each row's Python type is
inspected individually and the full Column Map result format is returned:

.. code-block:: json

{{
"result": {{
"element_count": 3,
"unexpected_count": 0,
"unexpected_percent": 0.0,
"partial_unexpected_list": [],
"missing_count": 0,
"missing_percent": 0.0,
"unexpected_percent_total": 0.0,
"unexpected_percent_nonmissing": 0.0
}}
}}

Code Examples:
Passing Case:
Passing Case (SQL / Spark — observed_value result):
Input:
ExpectColumnValuesToBeOfType(
column="test2",
type_="NUMBER"
type_="INTEGER"
)

Output:
Expand All @@ -195,20 +229,13 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation):
"exception_message": null
}},
"result": {{
"element_count": 3,
"unexpected_count": 0,
"unexpected_percent": 0.0,
"partial_unexpected_list": [],
"missing_count": 0,
"missing_percent": 0.0,
"unexpected_percent_total": 0.0,
"unexpected_percent_nonmissing": 0.0
"observed_value": "INTEGER"
}},
"meta": {{}},
"success": true
}}

Failing Case:
Failing Case (SQL / Spark — observed_value result):
Input:
ExpectColumnValuesToBeOfType(
column="test",
Expand All @@ -223,18 +250,7 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation):
"exception_message": null
}},
"result": {{
"element_count": 3,
"unexpected_count": 3,
"unexpected_percent": 100.0,
"partial_unexpected_list": [
"12345",
"abcde",
"1b3d5"
],
"missing_count": 0,
"missing_percent": 0.0,
"unexpected_percent_total": 100.0,
"unexpected_percent_nonmissing": 100.0
"observed_value": "VARCHAR"
}},
"meta": {{}},
"success": false
Expand Down
58 changes: 58 additions & 0 deletions tests/expectations/core/test_expect_column_values_to_be_of_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,61 @@ def test_expect_column_values_to_be_of_type_case_insensitivity(sa, dialect_name)
f"but got success={result.success}. "
f"Observed value: {result.result.get('observed_value')}"
)


@pytest.mark.unit
def test_expect_column_values_to_be_of_type_result_contains_observed_value_for_pandas():
"""For Pandas with a non-object dtype column, _validate_pandas performs a schema-level
type check and must return a result dict with 'observed_value', NOT the full Column Map
format (element_count / unexpected_count / etc.). Regression test for issue #11076."""
import pandas as pd

from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.execution_engine import PandasExecutionEngine
from great_expectations.validator.validator import Validator

df = pd.DataFrame({"amount": pd.array([1, 2, 3], dtype="int64")})

context = None
try:
from great_expectations.data_context import get_context

context = get_context(mode="ephemeral")
except Exception:
pass

engine = PandasExecutionEngine()

# Simulate what build_pandas_engine_with_data would do.
from great_expectations.core.batch import BatchDefinition
from great_expectations.core.id_dict import BatchSpec

batch_spec = BatchSpec(path="test")
batch_definition = BatchDefinition(
datasource_name="test",
data_connector_name="test",
data_asset_name="test",
batch_identifiers={},
)
import pandas as pd

engine.load_batch_data(
batch_id="test_batch",
batch_data=pd.DataFrame({"amount": pd.array([1, 2, 3], dtype="int64")}),
)

validator = Validator(execution_engine=engine)
result: ExpectationValidationResult = validator.expect_column_values_to_be_of_type(
"amount", type_="int64"
)

# The result must contain 'observed_value', not 'element_count' / 'unexpected_count'.
assert "observed_value" in result.result, (
f"Expected 'observed_value' key in result, got: {result.result}"
)
assert "element_count" not in result.result, (
"'element_count' should not appear in aggregate-mode result (issue #11076)"
)
assert result.success is True
Loading