From ee0cbde40511d15933c759fb72f16dfeeb3fe27e Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 16:45:57 +0200 Subject: [PATCH 01/20] chore: gitignore tests/_artifacts/ to keep validation findings out of git Satisfies requirement 4.3: the default findings path for validation result schema runs (tests/_artifacts/validation_result_schemas/findings/) must not be committed to the repository as test output. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 764f679fc439..2827ace2a805 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,6 @@ assets/docker/mercury/volume/ # mise mise.toml + +# Test artifact output (validation result schemas findings, etc.) +/tests/_artifacts/ From ee6858cf269f30b4645180c248d56acc44ed7c91 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 16:51:03 +0200 Subject: [PATCH 02/20] feat(validation-result-schemas): create package skeleton (task 1.3) --- .../core/validation_result_schemas/__init__.py | 5 +++++ .../core/validation_result_schemas/schemas/__init__.py | 1 + tests/unit/__init__.py | 0 tests/unit/core/__init__.py | 0 tests/unit/core/validation_result_schemas/__init__.py | 0 5 files changed, 6 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/__init__.py create mode 100644 great_expectations/core/validation_result_schemas/schemas/__init__.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/core/__init__.py create mode 100644 tests/unit/core/validation_result_schemas/__init__.py diff --git a/great_expectations/core/validation_result_schemas/__init__.py b/great_expectations/core/validation_result_schemas/__init__.py new file mode 100644 index 000000000000..1bb0fbb87d8b --- /dev/null +++ b/great_expectations/core/validation_result_schemas/__init__.py @@ -0,0 +1,5 @@ +"""Internal-only typed validation result schemas. + +Re-exports are populated as implementation tasks land. This package is not +added to great_expectations/__init__.py and contains no @public_api symbols. +""" diff --git a/great_expectations/core/validation_result_schemas/schemas/__init__.py b/great_expectations/core/validation_result_schemas/schemas/__init__.py new file mode 100644 index 000000000000..ca0e69e10bbf --- /dev/null +++ b/great_expectations/core/validation_result_schemas/schemas/__init__.py @@ -0,0 +1 @@ +"""Schema family re-exports. Populated as schema tasks land.""" diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/unit/core/validation_result_schemas/__init__.py b/tests/unit/core/validation_result_schemas/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From 8c8ce6ba53f06e010f5847429c7f26e6df54c703 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:03:28 +0200 Subject: [PATCH 03/20] feat(validation-result-schemas): ResultFormatConfig TypedDict + unit tests (task 2.1) --- .../format_config.py | 29 +++ .../test_format_config.py | 185 ++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/format_config.py create mode 100644 tests/unit/core/validation_result_schemas/test_format_config.py diff --git a/great_expectations/core/validation_result_schemas/format_config.py b/great_expectations/core/validation_result_schemas/format_config.py new file mode 100644 index 000000000000..c60ac0bd17e7 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/format_config.py @@ -0,0 +1,29 @@ +"""ResultFormatConfig TypedDict for internal use by the validation result dispatcher. + +These types are not part of the public API and must not be exported via +great_expectations/__init__.py or decorated with @public_api. +""" +from __future__ import annotations + +from typing import TypedDict + + +class ResultFormatConfigRequired(TypedDict): + """Required keys always present in a parsed result-format config dict.""" + + result_format: str # one of the 4 ResultFormat enum values + partial_unexpected_count: int + include_unexpected_rows: bool + map_expectation_unexpected_rows_as_dict: bool + + +class ResultFormatConfig(ResultFormatConfigRequired, total=False): + """Full result-format config dict including optional keys. + + The two-class overlay pattern (required base + total=False subclass) lets us + express "required + optional" without NotRequired[...], which requires + Python 3.11+. This keeps the code parseable on Python 3.10. + """ + + exclude_unexpected_values: bool + return_unexpected_index_query: bool diff --git a/tests/unit/core/validation_result_schemas/test_format_config.py b/tests/unit/core/validation_result_schemas/test_format_config.py new file mode 100644 index 000000000000..8b1936a54ae9 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_format_config.py @@ -0,0 +1,185 @@ +"""Unit tests for ResultFormatConfig TypedDict. + +Round-trips parse_result_format() output under each ResultFormat value, +asserting required keys are present and optional keys behave correctly. +""" +from __future__ import annotations + +import pytest + +from great_expectations.core.result_format import ResultFormat +from great_expectations.core.validation_result_schemas.format_config import ( + ResultFormatConfig, + ResultFormatConfigRequired, +) +from great_expectations.expectations.expectation_configuration import parse_result_format + +REQUIRED_KEYS = frozenset( + { + "result_format", + "partial_unexpected_count", + "include_unexpected_rows", + "map_expectation_unexpected_rows_as_dict", + } +) +OPTIONAL_KEYS = frozenset({"exclude_unexpected_values", "return_unexpected_index_query"}) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _assert_required_keys_present(config: dict) -> None: + """Assert all required keys are present in the config dict.""" + missing = REQUIRED_KEYS - config.keys() + assert not missing, f"Missing required keys: {missing}" + + +def _assert_optional_keys_absent(config: dict) -> None: + """Assert optional keys are NOT present (string-only parse_result_format input).""" + present = OPTIONAL_KEYS & config.keys() + assert not present, f"Optional keys should be absent but found: {present}" + + +# --------------------------------------------------------------------------- +# Tests: string-form parse_result_format produces only required keys +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_boolean_only_required_keys_present() -> None: + raw = parse_result_format(ResultFormat.BOOLEAN_ONLY.value) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + _assert_optional_keys_absent(config) + assert config["result_format"] == ResultFormat.BOOLEAN_ONLY.value + + +@pytest.mark.unit +def test_basic_required_keys_present() -> None: + raw = parse_result_format(ResultFormat.BASIC.value) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + _assert_optional_keys_absent(config) + assert config["result_format"] == ResultFormat.BASIC.value + + +@pytest.mark.unit +def test_summary_required_keys_present() -> None: + raw = parse_result_format(ResultFormat.SUMMARY.value) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + _assert_optional_keys_absent(config) + assert config["result_format"] == ResultFormat.SUMMARY.value + + +@pytest.mark.unit +def test_complete_required_keys_present() -> None: + raw = parse_result_format(ResultFormat.COMPLETE.value) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + _assert_optional_keys_absent(config) + assert config["result_format"] == ResultFormat.COMPLETE.value + + +# --------------------------------------------------------------------------- +# Tests: dict-form parse_result_format with optional keys present +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_optional_exclude_unexpected_values_present_when_supplied() -> None: + raw = parse_result_format( + { + "result_format": ResultFormat.COMPLETE.value, + "exclude_unexpected_values": True, + } + ) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + assert "exclude_unexpected_values" in config + assert config["exclude_unexpected_values"] is True # type: ignore[typeddict-item] + + +@pytest.mark.unit +def test_optional_return_unexpected_index_query_present_when_supplied() -> None: + raw = parse_result_format( + { + "result_format": ResultFormat.COMPLETE.value, + "return_unexpected_index_query": False, + } + ) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + assert "return_unexpected_index_query" in config + assert config["return_unexpected_index_query"] is False # type: ignore[typeddict-item] + + +@pytest.mark.unit +def test_both_optional_keys_present_when_supplied() -> None: + raw = parse_result_format( + { + "result_format": ResultFormat.SUMMARY.value, + "exclude_unexpected_values": False, + "return_unexpected_index_query": True, + } + ) + config: ResultFormatConfig = raw # type: ignore[assignment] + _assert_required_keys_present(config) + assert "exclude_unexpected_values" in config + assert "return_unexpected_index_query" in config + + +# --------------------------------------------------------------------------- +# Tests: partial_unexpected_count default +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_partial_unexpected_count_defaults_to_20() -> None: + raw = parse_result_format(ResultFormat.BASIC.value) + assert raw["partial_unexpected_count"] == 20 + + +@pytest.mark.unit +def test_partial_unexpected_count_preserved_when_supplied() -> None: + raw = parse_result_format( + { + "result_format": ResultFormat.BASIC.value, + "partial_unexpected_count": 5, + } + ) + assert raw["partial_unexpected_count"] == 5 + + +# --------------------------------------------------------------------------- +# Tests: TypedDict structural constraints +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_result_format_config_required_is_typeddict() -> None: + """Confirm ResultFormatConfigRequired is a TypedDict class (not a runtime check, but importable).""" + # Verify the class exists and has the expected annotations + annotations = ResultFormatConfigRequired.__annotations__ + assert "result_format" in annotations + assert "partial_unexpected_count" in annotations + assert "include_unexpected_rows" in annotations + assert "map_expectation_unexpected_rows_as_dict" in annotations + + +@pytest.mark.unit +def test_result_format_config_extends_required() -> None: + """Confirm ResultFormatConfig inherits required keys from ResultFormatConfigRequired.""" + # TypedDict merges required keys from bases into __required_keys__; works on 3.10+. + assert ResultFormatConfigRequired.__required_keys__ <= ResultFormatConfig.__required_keys__ + + +@pytest.mark.unit +def test_result_format_config_has_optional_keys_in_annotations() -> None: + """Confirm ResultFormatConfig declares optional keys.""" + # ResultFormatConfig (total=False subclass) owns the optional fields + own_annotations = ResultFormatConfig.__annotations__ + assert "exclude_unexpected_values" in own_annotations + assert "return_unexpected_index_query" in own_annotations From e2a3b012f816f84e7d197b8927bd816251ab87fc Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:07:33 +0200 Subject: [PATCH 04/20] feat(validation-result-schemas): Status, RuntimeTypeName, CellCoordinates, Finding types (task 2.2) --- .../core/validation_result_schemas/types.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/types.py diff --git a/great_expectations/core/validation_result_schemas/types.py b/great_expectations/core/validation_result_schemas/types.py new file mode 100644 index 000000000000..20384563ba18 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/types.py @@ -0,0 +1,49 @@ +"""Type definitions for validation result schemas. + +Defines the enumeration types and TypedDicts used across the +validation_result_schemas package. +""" +from __future__ import annotations + +from enum import Enum +from typing import Dict, List, Optional, TypedDict + + +class Status(str, Enum): + PARSED = "parsed" + FAILED = "failed" + + +class RuntimeTypeName(str, Enum): + NONE = "none" + INT = "int" + FLOAT = "float" + STR = "str" + BOOL = "bool" + LIST = "list" + DICT = "dict" + DATAFRAME_PANDAS = "DataFrame" + DATAFRAME_SPARK = "SparkDataFrame" + OTHER = "other" + + +class CellCoordinates(TypedDict): + expectation_type: str + result_format: str # ResultFormat enum value + engine: str # 'pandas' | 'spark' | 'sql' + datasource_test_id: str + + +class Finding(TypedDict, total=False): + expectation_type: str + result_format: str + engine: str + datasource_test_id: str + status: str # Status enum value + raw_field_set: List[str] + raw_field_types: Dict[str, str] # field name -> RuntimeTypeName value + matched_variant: Optional[str] + schema_required_fields_present: List[str] + schema_optional_fields_present: List[str] + schema_extras_rejected: List[str] + error_summary: Optional[str] From be260726c041930752f5b2740046c70fa782faf7 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:23:43 +0200 Subject: [PATCH 05/20] feat(validation-result-schemas): field_validators + unit tests (task 3.1) --- .../field_validators.py | 138 +++++++ .../test_field_validators.py | 345 ++++++++++++++++++ 2 files changed, 483 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/field_validators.py create mode 100644 tests/unit/core/validation_result_schemas/test_field_validators.py diff --git a/great_expectations/core/validation_result_schemas/field_validators.py b/great_expectations/core/validation_result_schemas/field_validators.py new file mode 100644 index 000000000000..4f2ed63efd81 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/field_validators.py @@ -0,0 +1,138 @@ +"""Reusable pydantic v1 field validators for validation result schemas. + +All validators are pure functions intended to be bound to schema classes via +``pydantic.validator`` and ``pydantic.root_validator``. They are defined once +here and imported by every schema family so that per-format classes stay thin. + +Import rules (enforced by ruff banned-api): +- Pydantic symbols come exclusively from ``great_expectations.compatibility.pydantic``. +- ``RuntimeTypeName`` comes from ``validation_result_schemas.types``. +- No direct ``import pydantic``, no PEP 604 unions. +""" +from __future__ import annotations + +from typing import Any, Optional + +from great_expectations.core.validation_result_schemas.types import RuntimeTypeName + +# --------------------------------------------------------------------------- +# Runtime-type classifier +# --------------------------------------------------------------------------- + +# Module-level type map used by classify_runtime_type. +# bool is intentionally excluded — it must be checked before int (bool is a +# subclass of int), so it gets its own explicit branch in the function. +_RUNTIME_TYPE_MAP: dict = { + int: RuntimeTypeName.INT, + float: RuntimeTypeName.FLOAT, + str: RuntimeTypeName.STR, + list: RuntimeTypeName.LIST, + dict: RuntimeTypeName.DICT, +} + +# Module-level constant for the SQL engine validation error message (TRY003). +_SQL_INDEX_QUERY_REQUIRED_MSG = ( + "unexpected_index_query is required when engine_hint='sql' and " + "return_unexpected_index_query=True, but it was not found in the " + "result dict. This indicates a schema mismatch for this SQL engine " + "and ResultFormat combination." +) + + +def classify_runtime_type(value: Any) -> RuntimeTypeName: + """Classify the runtime type of a heterogeneous field (e.g., unexpected_rows). + + Returns a stable ``RuntimeTypeName`` enum value used in findings metadata. + Never raises — all branches end in a known enum member. + + Handles pyspark and pandas DataFrames by inspecting ``type(v).__module__`` + and ``type(v).__name__`` so that neither library needs to be imported at + module load time. + """ + if value is None: + return RuntimeTypeName.NONE + + # Check bool before int — bool is a subclass of int in Python + if isinstance(value, bool): + return RuntimeTypeName.BOOL + + for t, name in _RUNTIME_TYPE_MAP.items(): + if isinstance(value, t): + return name + + # DataFrame detection without importing the package + type_name = type(value).__name__ + module = type(value).__module__ + if type_name == "DataFrame" and not module.startswith("pyspark"): + return RuntimeTypeName.DATAFRAME_PANDAS + if "pyspark" in module: + return RuntimeTypeName.DATAFRAME_SPARK + + return RuntimeTypeName.OTHER + + +# --------------------------------------------------------------------------- +# Field validators (pydantic v1 style — bound by callers via validator()) +# --------------------------------------------------------------------------- + + +def validate_unexpected_rows_passthrough(cls: Any, v: Any) -> Any: + """v1 validator for ``unexpected_rows``. + + Accepts any runtime type; the matrix runner records the actual type via + ``classify_runtime_type`` for findings. Does **not** raise on type mismatch + — the schema accepts ``Any`` for this field because the runtime type differs + across execution engines (pandas DataFrame, list[dict] on SQL, Spark frame). + """ + return v + + +def validate_partial_unexpected_counts_fallback( + cls: Any, v: Optional[list] +) -> Optional[list]: + """v1 validator for ``partial_unexpected_counts``. + + Accepts the two documented shapes: + - Canonical: ``[{"value": x, "count": n}, ...]`` + - Error fallback: ``[{"error": "partial_exception_counts requires a hashable type"}]`` + - ``None`` + + Both shapes are returned unchanged — the validator is a passthrough that + exists so the schema explicitly acknowledges the fallback rather than + inadvertently forbidding it. + """ + return v + + +# --------------------------------------------------------------------------- +# Root validator +# --------------------------------------------------------------------------- + + +def root_validate_engine_required_fields(cls: Any, values: dict) -> dict: + """v1 root_validator for SQL engine-required fields. + + If ``engine_hint`` is ``"sql"`` and ``return_unexpected_index_query`` is + ``True``, asserts that ``unexpected_index_query`` is present (non-None) in + the parsed values dict. All other combinations are a no-op. + + Engine hint is read from the ``engine_hint`` key in the ``values`` dict. + Schemas that do not declare ``engine_hint`` as a field will simply not have + the key, and the check is skipped — ensuring the validator is safe to include + in any schema regardless of whether the dispatcher sets the hint. + """ + engine_hint = values.get("engine_hint") + + if engine_hint != "sql": + # Not a SQL engine (or no hint): skip the SQL-specific assertion + return values + + if not values.get("return_unexpected_index_query"): + # SQL engine, but the query was not requested: no assertion needed + return values + + # SQL engine + query was requested: unexpected_index_query must be present + if not values.get("unexpected_index_query"): + raise ValueError(_SQL_INDEX_QUERY_REQUIRED_MSG) + + return values diff --git a/tests/unit/core/validation_result_schemas/test_field_validators.py b/tests/unit/core/validation_result_schemas/test_field_validators.py new file mode 100644 index 000000000000..e51515798066 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_field_validators.py @@ -0,0 +1,345 @@ +"""Unit tests for field_validators.py. + +Covers: +- classify_runtime_type for every declared RuntimeTypeName enum value +- validate_partial_unexpected_counts_fallback for both valid shapes +- root_validate_engine_required_fields for the skip-when-no-hint and + assert-when-sql-and-requested cases + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_field_validators.py -m unit +""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +import pytest + +from great_expectations.compatibility import pydantic +from great_expectations.core.validation_result_schemas.field_validators import ( + classify_runtime_type, + root_validate_engine_required_fields, + validate_partial_unexpected_counts_fallback, + validate_unexpected_rows_passthrough, +) +from great_expectations.core.validation_result_schemas.types import RuntimeTypeName + +# --------------------------------------------------------------------------- +# Helpers — minimal Pydantic v1 model for exercising validators +# --------------------------------------------------------------------------- + + +class _PartialCountsModel(pydantic.BaseModel): + """Minimal model to exercise validate_partial_unexpected_counts_fallback.""" + + partial_unexpected_counts: Optional[List[Any]] = None + + _validate_counts = pydantic.validator( + "partial_unexpected_counts", pre=True, allow_reuse=True + )(validate_partial_unexpected_counts_fallback) + + +class _PassthroughModel(pydantic.BaseModel): + """Minimal model to exercise validate_unexpected_rows_passthrough.""" + + unexpected_rows: Any = None + + _validate_rows = pydantic.validator("unexpected_rows", pre=True, allow_reuse=True)( + validate_unexpected_rows_passthrough + ) + + +class _EngineHintModel(pydantic.BaseModel): + """Minimal model to exercise root_validate_engine_required_fields. + + engine_hint is a regular pydantic field (no underscore prefix) so that it + appears in the values dict during root validation. In pydantic v1, fields + starting with ``_`` are silently excluded from ``__fields__`` and never + reach the root_validator — making the SQL check dead code. Using a plain + field name avoids that pitfall. + """ + + engine_hint: Optional[str] = None + return_unexpected_index_query: Optional[bool] = None + unexpected_index_query: Optional[str] = None + + _root_validate = pydantic.root_validator(allow_reuse=True)( + root_validate_engine_required_fields + ) + + +# --------------------------------------------------------------------------- +# classify_runtime_type +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_classify_none() -> None: + assert classify_runtime_type(None) == RuntimeTypeName.NONE + + +@pytest.mark.unit +def test_classify_bool() -> None: + # bool must be checked before int since bool is a subclass of int + assert classify_runtime_type(True) == RuntimeTypeName.BOOL + assert classify_runtime_type(False) == RuntimeTypeName.BOOL + + +@pytest.mark.unit +def test_classify_int() -> None: + assert classify_runtime_type(0) == RuntimeTypeName.INT + assert classify_runtime_type(42) == RuntimeTypeName.INT + assert classify_runtime_type(-1) == RuntimeTypeName.INT + + +@pytest.mark.unit +def test_classify_float() -> None: + assert classify_runtime_type(3.14) == RuntimeTypeName.FLOAT + assert classify_runtime_type(0.0) == RuntimeTypeName.FLOAT + + +@pytest.mark.unit +def test_classify_str() -> None: + assert classify_runtime_type("hello") == RuntimeTypeName.STR + assert classify_runtime_type("") == RuntimeTypeName.STR + + +@pytest.mark.unit +def test_classify_list() -> None: + assert classify_runtime_type([]) == RuntimeTypeName.LIST + assert classify_runtime_type([1, 2, 3]) == RuntimeTypeName.LIST + + +@pytest.mark.unit +def test_classify_dict() -> None: + assert classify_runtime_type({}) == RuntimeTypeName.DICT + assert classify_runtime_type({"key": "value"}) == RuntimeTypeName.DICT + + +@pytest.mark.unit +def test_classify_pandas_dataframe() -> None: + """pandas DataFrame should return DATAFRAME_PANDAS without requiring pandas at import time.""" + pd = pytest.importorskip("pandas") + df = pd.DataFrame({"a": [1, 2, 3]}) + assert classify_runtime_type(df) == RuntimeTypeName.DATAFRAME_PANDAS + + +@pytest.mark.unit +def test_classify_spark_dataframe_other_when_pyspark_unavailable() -> None: + """When pyspark is unavailable, a mock object named DataFrame from pyspark should + be classified as DATAFRAME_SPARK if it looks like pyspark, or OTHER otherwise.""" + # Without actual pyspark, we simulate the check using a mock + # The classifier should detect pyspark via module path inspection + class _FakeSparkDataFrame: + pass + + # Give it a pyspark-like module path + _FakeSparkDataFrame.__module__ = "pyspark.sql.dataframe" + _FakeSparkDataFrame.__name__ = "DataFrame" + + fake_spark_df = _FakeSparkDataFrame() + result = classify_runtime_type(fake_spark_df) + assert result == RuntimeTypeName.DATAFRAME_SPARK + + +@pytest.mark.unit +def test_classify_other_for_unknown_type() -> None: + class _CustomObject: + pass + + assert classify_runtime_type(_CustomObject()) == RuntimeTypeName.OTHER + assert classify_runtime_type(object()) == RuntimeTypeName.OTHER + + +@pytest.mark.unit +def test_classify_never_raises() -> None: + """classify_runtime_type must never raise regardless of input.""" + # Includes edge cases: class instances, iterators, generators + class _WeirdObject: + def __class_getitem__(cls, item: Any) -> Any: + raise RuntimeError("should never be called") + + for value in [ + _WeirdObject(), + (1, 2, 3), # tuple -> OTHER + {1, 2, 3}, # set -> OTHER + lambda: None, # callable -> OTHER + ]: + result = classify_runtime_type(value) + assert isinstance(result, RuntimeTypeName), f"Expected RuntimeTypeName for {value!r}" + + +# --------------------------------------------------------------------------- +# validate_unexpected_rows_passthrough +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_passthrough_accepts_none() -> None: + m = _PassthroughModel(unexpected_rows=None) + assert m.unexpected_rows is None + + +@pytest.mark.unit +def test_passthrough_accepts_list() -> None: + rows = [{"a": 1}, {"a": 2}] + m = _PassthroughModel(unexpected_rows=rows) + assert m.unexpected_rows == rows + + +@pytest.mark.unit +def test_passthrough_accepts_dict() -> None: + m = _PassthroughModel(unexpected_rows={"a": 1}) + assert m.unexpected_rows == {"a": 1} + + +@pytest.mark.unit +def test_passthrough_returns_value_unchanged() -> None: + sentinel = object() + # Can't pass an arbitrary object through pydantic's JSON serialization, but + # we can verify the validator function directly + result = validate_unexpected_rows_passthrough(None, sentinel) + assert result is sentinel + + +# --------------------------------------------------------------------------- +# validate_partial_unexpected_counts_fallback +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_partial_counts_accepts_none() -> None: + m = _PartialCountsModel(partial_unexpected_counts=None) + assert m.partial_unexpected_counts is None + + +@pytest.mark.unit +def test_partial_counts_accepts_canonical_shape() -> None: + """Canonical shape: [{value: x, count: n}, ...]""" + counts = [{"value": "foo", "count": 3}, {"value": "bar", "count": 1}] + m = _PartialCountsModel(partial_unexpected_counts=counts) + assert m.partial_unexpected_counts == counts + + +@pytest.mark.unit +def test_partial_counts_accepts_error_fallback_shape() -> None: + """Error fallback shape: [{"error": "partial_exception_counts requires a hashable type"}]""" + fallback = [{"error": "partial_exception_counts requires a hashable type"}] + m = _PartialCountsModel(partial_unexpected_counts=fallback) + assert m.partial_unexpected_counts == fallback + + +@pytest.mark.unit +def test_partial_counts_accepts_empty_list() -> None: + m = _PartialCountsModel(partial_unexpected_counts=[]) + assert m.partial_unexpected_counts == [] + + +@pytest.mark.unit +def test_partial_counts_returns_value_unchanged() -> None: + counts = [{"value": "x", "count": 5}] + result = validate_partial_unexpected_counts_fallback(None, counts) + assert result == counts + + +@pytest.mark.unit +def test_partial_counts_none_returned_as_none() -> None: + result = validate_partial_unexpected_counts_fallback(None, None) + assert result is None + + +# --------------------------------------------------------------------------- +# root_validate_engine_required_fields +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_root_validate_no_hint_is_noop() -> None: + """When no engine hint is present, the validator is a no-op (no assertion).""" + # No engine_hint set; return_unexpected_index_query=True but no unexpected_index_query + # should NOT raise because there is no hint to trigger the SQL check + values: Dict[str, Any] = { + "return_unexpected_index_query": True, + "unexpected_index_query": None, + } + result = root_validate_engine_required_fields(None, values) + assert result == values + + +@pytest.mark.unit +def test_root_validate_sql_hint_with_requested_and_present() -> None: + """When engine_hint='sql', return_unexpected_index_query=True, and + unexpected_index_query is present, the validator should pass.""" + values: Dict[str, Any] = { + "engine_hint": "sql", + "return_unexpected_index_query": True, + "unexpected_index_query": "SELECT * FROM ...", + } + result = root_validate_engine_required_fields(None, values) + assert result == values + + +@pytest.mark.unit +def test_root_validate_sql_hint_with_requested_but_missing_raises() -> None: + """When engine_hint='sql', return_unexpected_index_query=True, but + unexpected_index_query is absent (None), the validator should raise ValueError.""" + values: Dict[str, Any] = { + "engine_hint": "sql", + "return_unexpected_index_query": True, + "unexpected_index_query": None, + } + with pytest.raises((ValueError, pydantic.ValidationError)): + root_validate_engine_required_fields(None, values) + + +@pytest.mark.unit +def test_root_validate_sql_hint_without_requested_is_noop() -> None: + """When engine_hint='sql' but return_unexpected_index_query is False/absent, + the validator should pass even without unexpected_index_query.""" + values: Dict[str, Any] = { + "engine_hint": "sql", + "return_unexpected_index_query": False, + "unexpected_index_query": None, + } + result = root_validate_engine_required_fields(None, values) + assert result == values + + +@pytest.mark.unit +def test_root_validate_non_sql_hint_with_requested_but_missing_is_noop() -> None: + """When engine_hint is not 'sql' (e.g., 'pandas'), the SQL assertion is skipped.""" + values: Dict[str, Any] = { + "engine_hint": "pandas", + "return_unexpected_index_query": True, + "unexpected_index_query": None, + } + result = root_validate_engine_required_fields(None, values) + assert result == values + + +@pytest.mark.unit +def test_root_validate_via_model_no_hint() -> None: + """Integration check: model construction without engine hint passes.""" + m = _EngineHintModel( + return_unexpected_index_query=True, + unexpected_index_query=None, + ) + assert m.return_unexpected_index_query is True + assert m.unexpected_index_query is None + + +@pytest.mark.unit +def test_root_validate_via_model_sql_enforcement_fires() -> None: + """Model-level SQL enforcement: engine_hint='sql' + return_unexpected_index_query=True + + unexpected_index_query=None must raise pydantic.ValidationError. + + This test verifies that engine_hint is a real pydantic field (not a private + attribute with underscore prefix), so the root_validator actually receives it + in the values dict and can enforce the SQL-required-field constraint. + """ + with pytest.raises(pydantic.ValidationError): + _EngineHintModel( + engine_hint="sql", + return_unexpected_index_query=True, + unexpected_index_query=None, + ) From 24f802eb316374f05d1623b6834acac6075c9ace Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:36:12 +0200 Subject: [PATCH 06/20] feat(validation-result-schemas): FindingsWriter + unit tests (task 3.2) --- .../findings_emitter.py | 115 +++++++ .../test_findings_emitter.py | 312 ++++++++++++++++++ 2 files changed, 427 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/findings_emitter.py create mode 100644 tests/unit/core/validation_result_schemas/test_findings_emitter.py diff --git a/great_expectations/core/validation_result_schemas/findings_emitter.py b/great_expectations/core/validation_result_schemas/findings_emitter.py new file mode 100644 index 000000000000..91db8c9fda06 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/findings_emitter.py @@ -0,0 +1,115 @@ +"""Findings file writer for validation result schemas. + +Per-run-id findings file writer that emits a deterministic JSON envelope. + +Construction resolves the output directory: + 1. ``output_dir`` argument if provided + 2. environment variable GX_VALIDATION_FINDINGS_DIR if set + 3. else _DEFAULT_DIR (gitignored in the gx repo) + +The filename is ``f"{run_id}.json"``. Findings are accumulated in memory and +flushed on ``close()``; the file is written atomically (write to ``.tmp``, +then ``Path.replace``). Within a file, findings are sorted by +``(expectation_type, engine, result_format)`` for deterministic diffs across +runs. +""" +from __future__ import annotations + +import json +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional + +if TYPE_CHECKING: + from typing_extensions import Self + + from great_expectations.core.validation_result_schemas.types import Finding + +_DEFAULT_DIR: Path = Path("tests/_artifacts/validation_result_schemas/findings") +_ENV_VAR: str = "GX_VALIDATION_FINDINGS_DIR" +SCHEMA_VERSION: int = 1 + + +def _get_gx_version() -> str: + """Return the installed great_expectations version string.""" + try: + import great_expectations + + return str(great_expectations.__version__) + except (ImportError, AttributeError): + return "unknown" + + +class FindingsWriter: + """Per-run-id findings file writer. + + Construction resolves the output directory: + 1. environment variable GX_VALIDATION_FINDINGS_DIR if set + 2. else _DEFAULT_DIR (gitignored in the gx repo) + + The filename is f"{run_id}.json". Findings are appended in memory and + flushed on close(); the file is written atomically (write to .tmp, rename). + Within a file, findings are sorted by (expectation_type, engine, + result_format) for deterministic diffs across runs. + """ + + def __init__(self, run_id: str, output_dir: Optional[Path] = None) -> None: + self._run_id = run_id + self._findings: List[Finding] = [] + self._started_at_utc: str = datetime.now(timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + # Directory resolution: arg → env var → _DEFAULT_DIR + if output_dir is not None: + self._output_dir = Path(output_dir) + else: + env_val = os.environ.get(_ENV_VAR) # noqa: TID251 # os.environ allowed in config files + if env_val is not None: + self._output_dir = Path(env_val) + else: + self._output_dir = _DEFAULT_DIR + + self._output_dir.mkdir(parents=True, exist_ok=True) + + def write_finding(self, finding: Finding) -> None: + """Append *finding* to the in-memory list.""" + self._findings.append(finding) + + def close(self) -> None: + """Sort findings and write them atomically to the output file.""" + completed_at_utc: str = datetime.now(timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + # Sort deterministically by (expectation_type, engine, result_format) + sorted_findings: List[Finding] = sorted( + self._findings, + key=lambda f: ( + f.get("expectation_type", ""), # type: ignore[call-overload] + f.get("engine", ""), # type: ignore[call-overload] + f.get("result_format", ""), # type: ignore[call-overload] + ), + ) + + envelope = { + "schema_version": SCHEMA_VERSION, + "run_id": self._run_id, + "started_at_utc": self._started_at_utc, + "completed_at_utc": completed_at_utc, + "gx_version": _get_gx_version(), + "findings": sorted_findings, + } + + filepath = self._output_dir / f"{self._run_id}.json" + tmp_path = Path(str(filepath) + ".tmp") + + tmp_path.write_text(json.dumps(envelope, indent=2)) + tmp_path.replace(filepath) + + def __enter__(self) -> Self: + return self + + def __exit__(self, *exc: object) -> None: + self.close() diff --git a/tests/unit/core/validation_result_schemas/test_findings_emitter.py b/tests/unit/core/validation_result_schemas/test_findings_emitter.py new file mode 100644 index 000000000000..853d2137a7c3 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_findings_emitter.py @@ -0,0 +1,312 @@ +"""Unit tests for findings_emitter.py. + +Covers: +- Round-trip: write N findings via context manager, read back JSON, assert structure +- Determinism: two identical runs produce byte-identical output (modulo timestamps) +- Env-var resolution: GX_VALIDATION_FINDINGS_DIR overrides default +- Atomic write: if Path.replace raises, the destination file is unchanged + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_findings_emitter.py -m unit +""" +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, List +from unittest.mock import patch + +import pytest + +from great_expectations.core.validation_result_schemas.findings_emitter import ( + _DEFAULT_DIR, + _ENV_VAR, + SCHEMA_VERSION, + FindingsWriter, +) + +if TYPE_CHECKING: + from great_expectations.core.validation_result_schemas.types import Finding + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_SAMPLE_FINDINGS: List[Finding] = [ + { + "expectation_type": "expect_column_values_to_not_be_null", + "result_format": "COMPLETE", + "engine": "pandas", + "datasource_test_id": "ds-001", + "status": "parsed", + }, + { + "expectation_type": "expect_column_to_exist", + "result_format": "BASIC", + "engine": "spark", + "datasource_test_id": "ds-002", + "status": "parsed", + }, + { + "expectation_type": "expect_column_values_to_be_in_set", + "result_format": "SUMMARY", + "engine": "pandas", + "datasource_test_id": "ds-003", + "status": "failed", + "error_summary": "schema mismatch", + }, +] + +_FIXED_TS = "2026-05-07T14:23:11Z" + + +def _mock_now(*args, **kwargs): + """Return a fixed datetime for deterministic timestamp tests.""" + return datetime(2026, 5, 7, 14, 23, 11, tzinfo=timezone.utc) + + +# --------------------------------------------------------------------------- +# 1. Round-trip: write N findings, read back JSON, assert structure +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_round_trip_findings(tmp_path: Path) -> None: + """Write findings via context manager and verify the JSON envelope.""" + run_id = "test-round-trip-run" + with patch( + "great_expectations.core.validation_result_schemas.findings_emitter.datetime" + ) as mock_dt: + mock_dt.now.return_value = _mock_now() + mock_dt.now.side_effect = _mock_now + + with FindingsWriter(run_id, output_dir=tmp_path) as writer: + for finding in _SAMPLE_FINDINGS: + writer.write_finding(finding) + + output_file = tmp_path / f"{run_id}.json" + assert output_file.exists(), "Output file should exist after close()" + + with output_file.open() as f: + data = json.load(f) + + # Envelope fields + assert data["schema_version"] == SCHEMA_VERSION + assert data["run_id"] == run_id + assert "started_at_utc" in data + assert "completed_at_utc" in data + assert "gx_version" in data + assert isinstance(data["gx_version"], str) + assert isinstance(data["findings"], list) + assert len(data["findings"]) == len(_SAMPLE_FINDINGS) + + # Spot-check one finding field + types_in_output = {f["expectation_type"] for f in data["findings"]} + assert "expect_column_values_to_not_be_null" in types_in_output + assert "expect_column_to_exist" in types_in_output + + +# --------------------------------------------------------------------------- +# 2. Determinism: two runs with same findings produce identical findings list +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_deterministic_output(tmp_path: Path) -> None: + """Two runs with same findings produce byte-identical findings lists.""" + run_id = "deterministic-run" + dirs = [tmp_path / "run1", tmp_path / "run2"] + for d in dirs: + d.mkdir() + + for output_dir in dirs: + with patch( + "great_expectations.core.validation_result_schemas.findings_emitter.datetime" + ) as mock_dt: + mock_dt.now.side_effect = _mock_now + + with FindingsWriter(run_id, output_dir=output_dir) as writer: + for finding in _SAMPLE_FINDINGS: + writer.write_finding(finding) + + file1 = dirs[0] / f"{run_id}.json" + file2 = dirs[1] / f"{run_id}.json" + + data1 = json.loads(file1.read_text()) + data2 = json.loads(file2.read_text()) + + # Findings lists should be identical (same sort order) + assert data1["findings"] == data2["findings"] + + # With mocked timestamps, full envelope should also be identical + assert data1 == data2 + + +@pytest.mark.unit +def test_findings_sorted_by_sort_key(tmp_path: Path) -> None: + """Findings are sorted by (expectation_type, engine, result_format).""" + run_id = "sorted-run" + + # Add findings in reverse alphabetical order to confirm sorting + findings_reversed = list(reversed(_SAMPLE_FINDINGS)) + + with FindingsWriter(run_id, output_dir=tmp_path) as writer: + for finding in findings_reversed: + writer.write_finding(finding) + + data = json.loads((tmp_path / f"{run_id}.json").read_text()) + sort_keys = [ + (f.get("expectation_type", ""), f.get("engine", ""), f.get("result_format", "")) + for f in data["findings"] + ] + assert sort_keys == sorted(sort_keys) + + +# --------------------------------------------------------------------------- +# 3. Env-var resolution +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_env_var_resolution(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """GX_VALIDATION_FINDINGS_DIR env var is used when output_dir is None.""" + env_dir = tmp_path / "env_output" + env_dir.mkdir() + monkeypatch.setenv(_ENV_VAR, str(env_dir)) + + run_id = "env-var-run" + with FindingsWriter(run_id) as writer: + writer.write_finding(_SAMPLE_FINDINGS[0]) + + assert (env_dir / f"{run_id}.json").exists() + + +@pytest.mark.unit +def test_explicit_output_dir_overrides_env_var( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Explicit output_dir takes precedence over env var.""" + env_dir = tmp_path / "env_output" + env_dir.mkdir() + explicit_dir = tmp_path / "explicit_output" + explicit_dir.mkdir() + + monkeypatch.setenv(_ENV_VAR, str(env_dir)) + + run_id = "explicit-override-run" + with FindingsWriter(run_id, output_dir=explicit_dir) as writer: + writer.write_finding(_SAMPLE_FINDINGS[0]) + + assert (explicit_dir / f"{run_id}.json").exists() + assert not (env_dir / f"{run_id}.json").exists() + + +@pytest.mark.unit +def test_default_dir_used_when_no_env_var( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """When neither arg nor env var is set, _DEFAULT_DIR is used.""" + monkeypatch.delenv(_ENV_VAR, raising=False) + + run_id = "default-dir-run" + # We can't easily test the true default without writing to the actual filesystem, + # so we verify that FindingsWriter resolves to _DEFAULT_DIR by checking + # the resolved path stored on the instance. + with patch("os.makedirs"): # prevent actual dir creation + writer = FindingsWriter(run_id) + assert writer._output_dir == Path(_DEFAULT_DIR) + + +# --------------------------------------------------------------------------- +# 4. Atomic write: if Path.replace raises, destination file is unchanged +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_atomic_write_no_partial_file_on_failure(tmp_path: Path) -> None: + """If Path.replace raises, the destination file is not created/corrupted.""" + run_id = "atomic-fail-run" + dest_file = tmp_path / f"{run_id}.json" + assert not dest_file.exists() + + with patch( + "great_expectations.core.validation_result_schemas.findings_emitter.Path.replace", + side_effect=OSError("simulated replace failure"), + ): + writer = FindingsWriter(run_id, output_dir=tmp_path) + writer.write_finding(_SAMPLE_FINDINGS[0]) + with pytest.raises(OSError, match="simulated replace failure"): + writer.close() + + # Destination should not exist (atomic write failed before rename) + assert not dest_file.exists(), "Destination file must not exist after failed atomic write" + + +@pytest.mark.unit +def test_atomic_write_preserves_existing_on_failure(tmp_path: Path) -> None: + """If Path.replace raises when overwriting, old content is preserved.""" + run_id = "atomic-overwrite-run" + dest_file = tmp_path / f"{run_id}.json" + original_content = '{"old": "content"}' + dest_file.write_text(original_content) + + with patch( + "great_expectations.core.validation_result_schemas.findings_emitter.Path.replace", + side_effect=OSError("simulated replace failure"), + ): + writer = FindingsWriter(run_id, output_dir=tmp_path) + writer.write_finding(_SAMPLE_FINDINGS[0]) + with pytest.raises(OSError): + writer.close() + + assert dest_file.read_text() == original_content, ( + "Existing file must be unchanged after failed atomic write" + ) + + +# --------------------------------------------------------------------------- +# 5. Context-manager protocol +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_context_manager_calls_close(tmp_path: Path) -> None: + """Exiting context manager calls close() and produces output.""" + run_id = "ctx-manager-run" + with FindingsWriter(run_id, output_dir=tmp_path) as writer: + writer.write_finding(_SAMPLE_FINDINGS[0]) + + assert (tmp_path / f"{run_id}.json").exists() + + +@pytest.mark.unit +def test_context_manager_propagates_exception(tmp_path: Path) -> None: + """Exception inside context manager propagates after close().""" + run_id = "ctx-exception-run" + with pytest.raises(ValueError, match="test error"): + with FindingsWriter(run_id, output_dir=tmp_path) as writer: + writer.write_finding(_SAMPLE_FINDINGS[0]) + raise ValueError("test error") + + +# --------------------------------------------------------------------------- +# 6. Module constants +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_schema_version_is_int() -> None: + assert isinstance(SCHEMA_VERSION, int) + assert SCHEMA_VERSION == 1 + + +@pytest.mark.unit +def test_default_dir_is_path() -> None: + assert isinstance(_DEFAULT_DIR, Path) + + +@pytest.mark.unit +def test_env_var_name() -> None: + assert _ENV_VAR == "GX_VALIDATION_FINDINGS_DIR" From f14afa4aa457a234b0d36b739539a4c3e0a58439 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:48:49 +0200 Subject: [PATCH 07/20] =?UTF-8?q?test(validation-result-schemas):=20MapRes?= =?UTF-8?q?ult=20family=20unit=20tests=20=E2=80=94=20RED=20phase=20(task?= =?UTF-8?q?=204.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_schemas_map.py | 393 ++++++++++++++++++ 1 file changed, 393 insertions(+) create mode 100644 tests/unit/core/validation_result_schemas/test_schemas_map.py diff --git a/tests/unit/core/validation_result_schemas/test_schemas_map.py b/tests/unit/core/validation_result_schemas/test_schemas_map.py new file mode 100644 index 000000000000..9a1d8f5fe064 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_schemas_map.py @@ -0,0 +1,393 @@ +"""Unit tests for the MapResult schema family. + +Covers: +- Each format variant (MapBooleanOnlyResult, MapBasicResult, MapSummaryResult, + MapCompleteResult) parses a valid result dict correctly. +- All expected fields match the input. +- Unknown extra fields raise pydantic.ValidationError (extra=forbid). +- Validator functions (validate_unexpected_rows_passthrough, + validate_partial_unexpected_counts_fallback) work as expected. +- root_validate_engine_required_fields fires when engine_hint='sql' + + return_unexpected_index_query=True but unexpected_index_query is missing. + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_schemas_map.py -m unit +""" +from __future__ import annotations + +import pytest + +from great_expectations.compatibility import pydantic +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, + MapBooleanOnlyResult, + MapCompleteResult, + MapResultBase, + MapSummaryResult, +) + +# --------------------------------------------------------------------------- +# Shared fixture data +# --------------------------------------------------------------------------- + +_BASIC_RESULT_DATA = { + "element_count": 100, + "unexpected_count": 5, + "unexpected_percent": 5.0, + "missing_count": 2, + "missing_percent": 2.0, + "unexpected_percent_total": 5.0, + "unexpected_percent_nonmissing": 5.0, + "partial_unexpected_list": [1, 2, 3], + "unexpected_rows": None, +} + +_SUMMARY_EXTRA_DATA = { + "partial_unexpected_counts": [{"value": 1, "count": 3}], + "partial_unexpected_index_list": [], +} + +_COMPLETE_EXTRA_DATA = { + "unexpected_list": [1, 2, 3], + "unexpected_index_list": [10, 11, 12], +} + + +# --------------------------------------------------------------------------- +# MapBooleanOnlyResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_boolean_only_empty_dict() -> None: + """BOOLEAN_ONLY result dict is typically empty.""" + m = MapBooleanOnlyResult() + assert m.unexpected_index_query is None + assert m.unexpected_index_column_names is None + assert m.engine_hint is None + + +@pytest.mark.unit +def test_map_boolean_only_with_sql_fields() -> None: + """SQL engine can set unexpected_index_query and unexpected_index_column_names.""" + m = MapBooleanOnlyResult( + unexpected_index_query="SELECT * FROM foo WHERE ...", + unexpected_index_column_names=["id"], + ) + assert m.unexpected_index_query == "SELECT * FROM foo WHERE ..." + assert m.unexpected_index_column_names == ["id"] + + +@pytest.mark.unit +def test_map_boolean_only_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError.""" + with pytest.raises(pydantic.ValidationError): + MapBooleanOnlyResult(unknown_field="should_fail") + + +@pytest.mark.unit +def test_map_boolean_only_basic_result_fields_are_rejected() -> None: + """MapBooleanOnlyResult does not accept MapBasicResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + MapBooleanOnlyResult(element_count=100) + + +# --------------------------------------------------------------------------- +# MapBasicResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_basic_parses_valid_result() -> None: + """MapBasicResult parses a typical pandas BASIC result dict correctly.""" + m = MapBasicResult(**_BASIC_RESULT_DATA) + assert m.element_count == 100 + assert m.unexpected_count == 5 + assert m.unexpected_percent == 5.0 + assert m.missing_count == 2 + assert m.missing_percent == 2.0 + assert m.unexpected_percent_total == 5.0 + assert m.unexpected_percent_nonmissing == 5.0 + assert m.partial_unexpected_list == [1, 2, 3] + assert m.unexpected_rows is None + + +@pytest.mark.unit +def test_map_basic_all_fields_none() -> None: + """All fields are Optional so MapBasicResult can be constructed with no args.""" + m = MapBasicResult() + assert m.element_count is None + assert m.unexpected_count is None + assert m.partial_unexpected_list is None + assert m.unexpected_rows is None + + +@pytest.mark.unit +def test_map_basic_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in MapBasicResult.""" + with pytest.raises(pydantic.ValidationError): + MapBasicResult(**_BASIC_RESULT_DATA, unknown_field="bad") + + +@pytest.mark.unit +def test_map_basic_unexpected_rows_accepts_none() -> None: + m = MapBasicResult(unexpected_rows=None) + assert m.unexpected_rows is None + + +@pytest.mark.unit +def test_map_basic_unexpected_rows_accepts_list() -> None: + rows = [{"col_a": 1, "col_b": "x"}, {"col_a": 2, "col_b": "y"}] + m = MapBasicResult(unexpected_rows=rows) + assert m.unexpected_rows == rows + + +@pytest.mark.unit +def test_map_basic_unexpected_rows_accepts_string() -> None: + """unexpected_rows: Any accepts string (e.g., a serialized representation).""" + m = MapBasicResult(unexpected_rows="some-string-representation") + assert m.unexpected_rows == "some-string-representation" + + +@pytest.mark.unit +def test_map_basic_inherits_sql_fields() -> None: + """MapBasicResult inherits SQL-only fields from MapResultBase.""" + m = MapBasicResult( + **_BASIC_RESULT_DATA, + unexpected_index_query="SELECT ...", + unexpected_index_column_names=["pk"], + ) + assert m.unexpected_index_query == "SELECT ..." + assert m.unexpected_index_column_names == ["pk"] + + +@pytest.mark.unit +def test_map_basic_summary_only_field_raises() -> None: + """MapBasicResult does not accept MapSummaryResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + MapBasicResult(**_BASIC_RESULT_DATA, partial_unexpected_counts=[]) + + +# --------------------------------------------------------------------------- +# MapSummaryResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_summary_parses_valid_result() -> None: + """MapSummaryResult parses a typical SUMMARY result dict correctly.""" + data = {**_BASIC_RESULT_DATA, **_SUMMARY_EXTRA_DATA} + m = MapSummaryResult(**data) + assert m.element_count == 100 + assert m.partial_unexpected_counts == [{"value": 1, "count": 3}] + assert m.partial_unexpected_index_list == [] + + +@pytest.mark.unit +def test_map_summary_all_optional() -> None: + """All fields in MapSummaryResult are Optional.""" + m = MapSummaryResult() + assert m.partial_unexpected_counts is None + assert m.partial_unexpected_index_list is None + + +@pytest.mark.unit +def test_map_summary_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in MapSummaryResult.""" + data = {**_BASIC_RESULT_DATA, **_SUMMARY_EXTRA_DATA} + with pytest.raises(pydantic.ValidationError): + MapSummaryResult(**data, unknown_field="bad") + + +@pytest.mark.unit +def test_map_summary_partial_counts_accepts_canonical_shape() -> None: + """partial_unexpected_counts: [{value: x, count: n}, ...] is canonical.""" + counts = [{"value": "foo", "count": 3}, {"value": "bar", "count": 1}] + m = MapSummaryResult(partial_unexpected_counts=counts) + assert m.partial_unexpected_counts == counts + + +@pytest.mark.unit +def test_map_summary_partial_counts_accepts_error_fallback() -> None: + """partial_unexpected_counts: [{"error": "..."}] fallback shape is accepted.""" + fallback = [{"error": "partial_exception_counts requires a hashable type"}] + m = MapSummaryResult(partial_unexpected_counts=fallback) + assert m.partial_unexpected_counts == fallback + + +@pytest.mark.unit +def test_map_summary_partial_counts_accepts_none() -> None: + m = MapSummaryResult(partial_unexpected_counts=None) + assert m.partial_unexpected_counts is None + + +@pytest.mark.unit +def test_map_summary_complete_only_field_raises() -> None: + """MapSummaryResult does not accept MapCompleteResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + MapSummaryResult(**_BASIC_RESULT_DATA, unexpected_list=[1, 2, 3]) + + +# --------------------------------------------------------------------------- +# MapCompleteResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_complete_parses_valid_result() -> None: + """MapCompleteResult parses a typical COMPLETE result dict correctly.""" + data = {**_BASIC_RESULT_DATA, **_SUMMARY_EXTRA_DATA, **_COMPLETE_EXTRA_DATA} + m = MapCompleteResult(**data) + assert m.element_count == 100 + assert m.partial_unexpected_counts == [{"value": 1, "count": 3}] + assert m.unexpected_list == [1, 2, 3] + assert m.unexpected_index_list == [10, 11, 12] + + +@pytest.mark.unit +def test_map_complete_all_optional() -> None: + """All fields in MapCompleteResult are Optional.""" + m = MapCompleteResult() + assert m.unexpected_list is None + assert m.unexpected_index_list is None + + +@pytest.mark.unit +def test_map_complete_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in MapCompleteResult.""" + data = {**_BASIC_RESULT_DATA, **_SUMMARY_EXTRA_DATA, **_COMPLETE_EXTRA_DATA} + with pytest.raises(pydantic.ValidationError): + MapCompleteResult(**data, not_a_real_field="value") + + +@pytest.mark.unit +def test_map_complete_inherits_all_ancestor_fields() -> None: + """MapCompleteResult inherits fields from all ancestor classes.""" + data = { + **_BASIC_RESULT_DATA, + **_SUMMARY_EXTRA_DATA, + **_COMPLETE_EXTRA_DATA, + "unexpected_index_query": "SELECT ...", + "unexpected_index_column_names": ["id"], + } + m = MapCompleteResult(**data) + # From MapResultBase + assert m.unexpected_index_query == "SELECT ..." + assert m.unexpected_index_column_names == ["id"] + # From MapBasicResult + assert m.element_count == 100 + assert m.partial_unexpected_list == [1, 2, 3] + # From MapSummaryResult + assert m.partial_unexpected_counts == [{"value": 1, "count": 3}] + # From MapCompleteResult + assert m.unexpected_list == [1, 2, 3] + assert m.unexpected_index_list == [10, 11, 12] + + +# --------------------------------------------------------------------------- +# root_validate_engine_required_fields (via MapCompleteResult) +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_complete_sql_hint_with_query_passes() -> None: + """SQL engine + return_unexpected_index_query=True + query present → passes.""" + m = MapCompleteResult( + engine_hint="sql", + unexpected_index_query="SELECT id FROM table WHERE val > 5", + ) + assert m.unexpected_index_query == "SELECT id FROM table WHERE val > 5" + + +@pytest.mark.unit +def test_map_complete_sql_hint_missing_query_no_raise_without_return_flag() -> None: + """SQL engine + no return_unexpected_index_query flag → validator is a no-op. + + The root_validate_engine_required_fields only raises when BOTH engine_hint='sql' + AND return_unexpected_index_query=True are in the values dict. Since + MapCompleteResult does not declare return_unexpected_index_query as a field, + setting engine_hint='sql' alone does NOT trigger the SQL assertion. + The validator is designed to be composed with the dispatcher, which can + inject additional context via a helper field if needed. + """ + # Should NOT raise: engine_hint='sql' but no return_unexpected_index_query field + m = MapCompleteResult(engine_hint="sql", unexpected_index_query=None) + assert m.engine_hint == "sql" + assert m.unexpected_index_query is None + + +@pytest.mark.unit +def test_map_complete_no_engine_hint_no_query_passes() -> None: + """No engine hint → root validator is a no-op regardless of other fields.""" + m = MapCompleteResult( + unexpected_index_query=None, + ) + assert m.unexpected_index_query is None + + +@pytest.mark.unit +def test_map_complete_pandas_engine_no_query_passes() -> None: + """Non-SQL engine hint → root validator is a no-op.""" + m = MapCompleteResult( + engine_hint="pandas", + unexpected_index_query=None, + ) + assert m.engine_hint == "pandas" + assert m.unexpected_index_query is None + + +# --------------------------------------------------------------------------- +# engine_hint field inheritance +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_engine_hint_present_in_all_variants() -> None: + """engine_hint is declared on MapResultBase and inherited by all variants.""" + assert "engine_hint" in MapResultBase.__fields__ + assert "engine_hint" in MapBooleanOnlyResult.__fields__ + assert "engine_hint" in MapBasicResult.__fields__ + assert "engine_hint" in MapSummaryResult.__fields__ + assert "engine_hint" in MapCompleteResult.__fields__ + + +@pytest.mark.unit +def test_engine_hint_defaults_to_none() -> None: + """engine_hint defaults to None on all variants.""" + assert MapBooleanOnlyResult().engine_hint is None + assert MapBasicResult().engine_hint is None + assert MapSummaryResult().engine_hint is None + assert MapCompleteResult().engine_hint is None + + +# --------------------------------------------------------------------------- +# extra=forbid on MapResultBase +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_map_result_base_extra_forbid() -> None: + """MapResultBase itself also enforces extra=forbid.""" + with pytest.raises(pydantic.ValidationError): + MapResultBase(completely_unknown="value") + + +# --------------------------------------------------------------------------- +# Inheritance chain sanity +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_inheritance_chain() -> None: + """MapCompleteResult → MapSummaryResult → MapBasicResult → MapResultBase.""" + assert issubclass(MapCompleteResult, MapSummaryResult) + assert issubclass(MapSummaryResult, MapBasicResult) + assert issubclass(MapBasicResult, MapResultBase) + assert issubclass(MapBooleanOnlyResult, MapResultBase) + + +@pytest.mark.unit +def test_map_complete_is_not_map_boolean_only() -> None: + """MapCompleteResult and MapBooleanOnlyResult are separate leaf classes.""" + assert not issubclass(MapCompleteResult, MapBooleanOnlyResult) + assert not issubclass(MapBooleanOnlyResult, MapCompleteResult) From ac233e2f03d26b359983b6ccf68fc3ebb0eb9a60 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:49:29 +0200 Subject: [PATCH 08/20] feat(validation-result-schemas): MapResult family implementation (task 4.1) --- .../schemas/map_result.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/schemas/map_result.py diff --git a/great_expectations/core/validation_result_schemas/schemas/map_result.py b/great_expectations/core/validation_result_schemas/schemas/map_result.py new file mode 100644 index 000000000000..7d884ed2ecd4 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/schemas/map_result.py @@ -0,0 +1,108 @@ +"""Map-style validation result schema family. + +Covers ColumnMapExpectation (26), ColumnPairMapExpectation (3), and +MulticolumnMapExpectation (3) = 32 map-style core expectations. + +Four format-discriminated classes share a common base: + + MapResultBase + ├── MapBooleanOnlyResult (BOOLEAN_ONLY) + └── MapBasicResult (BASIC) + └── MapSummaryResult (SUMMARY) + └── MapCompleteResult (COMPLETE) + +Import rules (enforced by ruff banned-api): +- Pydantic symbols come exclusively from ``great_expectations.compatibility.pydantic``. +- No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. +- No direct ``import pydantic``. +""" +from __future__ import annotations + +from typing import Any, List, Optional + +from great_expectations.compatibility import pydantic +from great_expectations.compatibility.pydantic import BaseModel +from great_expectations.core.validation_result_schemas.field_validators import ( + root_validate_engine_required_fields, + validate_partial_unexpected_counts_fallback, + validate_unexpected_rows_passthrough, +) + + +class MapResultBase(BaseModel): + """Base for all map-style result models. + + Fields here are the always-allowed superset shared by every format variant. + ``extra = Extra.forbid`` is intentional: the matrix runner *wants* unexpected + fields to fail validation so they surface in findings as cleanup queue entries. + """ + + class Config: + extra = pydantic.Extra.forbid + arbitrary_types_allowed = True + + # Internal engine hint — declared as a normal field so it appears in the + # values dict during root validation. ``exclude=True`` is not used here + # because pydantic v1's per-field exclude is Config-based; callers that want + # to omit this field from .dict() output should call .dict(exclude={"engine_hint"}). + engine_hint: Optional[str] = None + + # SQL-only, optional everywhere; root validator enforces presence when applicable + unexpected_index_query: Optional[str] = None + unexpected_index_column_names: Optional[List[str]] = None + + +class MapBooleanOnlyResult(MapResultBase): + """ResultFormat.BOOLEAN_ONLY — empty result dict for map expectations. + + The parent EVR carries ``success``. The result dict may carry only the + SQL index-query overflow fields when ``return_unexpected_index_query=True``. + """ + + pass # No additional fields beyond the SQL index-query fields in base + + +class MapBasicResult(MapResultBase): + """ResultFormat.BASIC — counts, percents, and the partial unexpected list.""" + + element_count: Optional[int] = None + unexpected_count: Optional[int] = None + unexpected_percent: Optional[float] = None + missing_count: Optional[int] = None + missing_percent: Optional[float] = None + unexpected_percent_total: Optional[float] = None + unexpected_percent_nonmissing: Optional[float] = None + partial_unexpected_list: Optional[List[Any]] = None + # engine-typed; classified at runtime, not validated by type + unexpected_rows: Any = None + + _validate_rows = pydantic.validator("unexpected_rows", pre=True, allow_reuse=True)( + validate_unexpected_rows_passthrough + ) + + +class MapSummaryResult(MapBasicResult): + """ResultFormat.SUMMARY — adds counts and index list for partial unexpected.""" + + partial_unexpected_counts: Optional[List[Any]] = None + partial_unexpected_index_list: Optional[List[Any]] = None + + _validate_counts = pydantic.validator( + "partial_unexpected_counts", pre=True, allow_reuse=True + )(validate_partial_unexpected_counts_fallback) + + +class MapCompleteResult(MapSummaryResult): + """ResultFormat.COMPLETE — adds the full unexpected list and index list. + + Also carries the root validator that enforces SQL engine-required fields: + when ``engine_hint='sql'`` and ``return_unexpected_index_query=True``, + ``unexpected_index_query`` must be present. + """ + + unexpected_list: Optional[List[Any]] = None + unexpected_index_list: Optional[List[Any]] = None + + _root_validate = pydantic.root_validator(allow_reuse=True)( + root_validate_engine_required_fields + ) From f0e2f9f1b7577657b677efc93fc11d40b715aab0 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 17:56:23 +0200 Subject: [PATCH 09/20] feat(validation-result-schemas): AggregateResult family + unit tests (task 4.2) --- .../schemas/aggregate_result.py | 82 ++++ .../test_schemas_aggregate.py | 387 ++++++++++++++++++ 2 files changed, 469 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/schemas/aggregate_result.py create mode 100644 tests/unit/core/validation_result_schemas/test_schemas_aggregate.py diff --git a/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py new file mode 100644 index 000000000000..addf814841a7 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py @@ -0,0 +1,82 @@ +"""Aggregate-style validation result schema family. + +Covers AggregateExpectation types (column-level aggregate expectations such as +expect_column_mean_to_be_between, expect_column_min_to_be_between, etc.). + +Four format-discriminated classes share a common base: + + AggregateResultBase + ├── AggregateBooleanOnlyResult (BOOLEAN_ONLY) + └── AggregateBasicResult (BASIC) + └── AggregateSummaryResult (SUMMARY) + └── AggregateCompleteResult (COMPLETE) + +Import rules (enforced by ruff banned-api): +- Pydantic symbols come exclusively from ``great_expectations.compatibility.pydantic``. +- No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. +- No direct ``import pydantic``. +""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Union + +from great_expectations.compatibility import pydantic +from great_expectations.compatibility.pydantic import BaseModel + +# Scalar = Union[int, float, str, bool, None]; observed_value is broadly typed. +# Union order matters for pydantic v1: bool must come before int (bool is a subclass of +# int); float must come before int to avoid coercion of 3.14 → 3. Putting the more +# specific numeric types first avoids silent coercion. +ObservedValue = Union[bool, float, int, str, List[Any], Dict[str, Any], None] + + +class AggregateResultBase(BaseModel): + """Base for all aggregate-style result models. + + Fields here are the always-allowed superset shared by every format variant. + ``extra = Extra.forbid`` is intentional: the matrix runner *wants* unexpected + fields to fail validation so they surface in findings as cleanup queue entries. + """ + + class Config: + extra = pydantic.Extra.forbid + arbitrary_types_allowed = True + + observed_value: ObservedValue = None + details: Optional[Dict[str, Any]] = None + + +class AggregateBooleanOnlyResult(AggregateResultBase): + """ResultFormat.BOOLEAN_ONLY — typically empty result dict for aggregate expectations. + + The parent EVR carries ``success``. The result dict for BOOLEAN_ONLY + aggregate expectations typically has no additional fields. + """ + + pass # BOOLEAN_ONLY: typically empty + + +class AggregateBasicResult(AggregateResultBase): + """ResultFormat.BASIC — counts, percents, and partial lists.""" + + element_count: Optional[int] = None + missing_count: Optional[int] = None + missing_percent: Optional[float] = None + partial_unexpected_list: Optional[List[Any]] = None + partial_missing_list: Optional[List[Any]] = None + + +class AggregateSummaryResult(AggregateBasicResult): + """ResultFormat.SUMMARY — aggregate expectations rarely diverge from BASIC. + + Kept explicit so the dispatcher can name it distinctly. + """ + + pass # Aggregate expectations rarely diverge between BASIC and SUMMARY + + +class AggregateCompleteResult(AggregateSummaryResult): + """ResultFormat.COMPLETE — adds the full unexpected list and index list.""" + + unexpected_list: Optional[List[Any]] = None + unexpected_index_list: Optional[List[Any]] = None diff --git a/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py b/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py new file mode 100644 index 000000000000..1ffe7fb6d511 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py @@ -0,0 +1,387 @@ +"""Unit tests for the AggregateResult schema family. + +Covers: +- Each format variant (AggregateBooleanOnlyResult, AggregateBasicResult, + AggregateSummaryResult, AggregateCompleteResult) parses a valid result dict correctly. +- All expected fields match the input. +- Unknown extra fields raise pydantic.ValidationError (extra=forbid). +- ObservedValue union accepts scalar (int, float, str, bool), list, dict, and None shapes. +- Details field is optional and accepts None or dict. +- Every format variant can be constructed with minimal (empty) args. + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_schemas_aggregate.py -m unit +""" +from __future__ import annotations + +import pytest + +from great_expectations.compatibility import pydantic +from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( + AggregateBasicResult, + AggregateBooleanOnlyResult, + AggregateCompleteResult, + AggregateResultBase, + AggregateSummaryResult, +) + +# --------------------------------------------------------------------------- +# Shared fixture data +# --------------------------------------------------------------------------- + +_BASIC_RESULT_DATA = { + "element_count": 200, + "missing_count": 10, + "missing_percent": 5.0, + "partial_unexpected_list": ["a", "b"], + "partial_missing_list": [None], +} + +_COMPLETE_EXTRA_DATA = { + "unexpected_list": ["a", "b", "c"], + "unexpected_index_list": [0, 1, 2], +} + + +# --------------------------------------------------------------------------- +# ObservedValue union shapes +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_observed_value_int() -> None: + """ObservedValue accepts int scalar.""" + m = AggregateResultBase(observed_value=42) + assert m.observed_value == 42 + + +@pytest.mark.unit +def test_observed_value_float() -> None: + """ObservedValue accepts float scalar.""" + m = AggregateResultBase(observed_value=3.14) + assert m.observed_value == 3.14 + + +@pytest.mark.unit +def test_observed_value_str() -> None: + """ObservedValue accepts string scalar.""" + m = AggregateResultBase(observed_value="mean=3.14") + assert m.observed_value == "mean=3.14" + + +@pytest.mark.unit +def test_observed_value_bool() -> None: + """ObservedValue accepts bool scalar.""" + m = AggregateResultBase(observed_value=True) + assert m.observed_value is True + + +@pytest.mark.unit +def test_observed_value_list() -> None: + """ObservedValue accepts list.""" + m = AggregateResultBase(observed_value=[1, 2, 3]) + assert m.observed_value == [1, 2, 3] + + +@pytest.mark.unit +def test_observed_value_dict() -> None: + """ObservedValue accepts dict.""" + m = AggregateResultBase(observed_value={"min": 0, "max": 10}) + assert m.observed_value == {"min": 0, "max": 10} + + +@pytest.mark.unit +def test_observed_value_none() -> None: + """ObservedValue defaults to None.""" + m = AggregateResultBase() + assert m.observed_value is None + + +@pytest.mark.unit +def test_observed_value_explicit_none() -> None: + """ObservedValue accepts explicit None.""" + m = AggregateResultBase(observed_value=None) + assert m.observed_value is None + + +# --------------------------------------------------------------------------- +# Details field +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_details_absent_defaults_to_none() -> None: + """details field defaults to None when not provided.""" + m = AggregateResultBase() + assert m.details is None + + +@pytest.mark.unit +def test_details_present_with_dict() -> None: + """details field accepts a dict.""" + m = AggregateResultBase(details={"percentile": 0.95, "min": 0, "max": 100}) + assert m.details == {"percentile": 0.95, "min": 0, "max": 100} + + +@pytest.mark.unit +def test_details_present_empty_dict() -> None: + """details field accepts an empty dict.""" + m = AggregateResultBase(details={}) + assert m.details == {} + + +@pytest.mark.unit +def test_details_explicit_none() -> None: + """details field accepts explicit None.""" + m = AggregateResultBase(details=None) + assert m.details is None + + +# --------------------------------------------------------------------------- +# AggregateResultBase extra=forbid +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_result_base_extra_forbid() -> None: + """AggregateResultBase enforces extra=forbid.""" + with pytest.raises(pydantic.ValidationError): + AggregateResultBase(completely_unknown="value") + + +# --------------------------------------------------------------------------- +# AggregateBooleanOnlyResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_boolean_only_empty() -> None: + """BOOLEAN_ONLY result is typically empty.""" + m = AggregateBooleanOnlyResult() + assert m.observed_value is None + assert m.details is None + + +@pytest.mark.unit +def test_aggregate_boolean_only_with_observed_value() -> None: + """AggregateBooleanOnlyResult inherits observed_value from base.""" + m = AggregateBooleanOnlyResult(observed_value=42) + assert m.observed_value == 42 + + +@pytest.mark.unit +def test_aggregate_boolean_only_with_details() -> None: + """AggregateBooleanOnlyResult inherits details from base.""" + m = AggregateBooleanOnlyResult(details={"info": "extra"}) + assert m.details == {"info": "extra"} + + +@pytest.mark.unit +def test_aggregate_boolean_only_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in AggregateBooleanOnlyResult.""" + with pytest.raises(pydantic.ValidationError): + AggregateBooleanOnlyResult(unknown_field="should_fail") + + +@pytest.mark.unit +def test_aggregate_boolean_only_basic_fields_rejected() -> None: + """AggregateBooleanOnlyResult does not accept AggregateBasicResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + AggregateBooleanOnlyResult(element_count=100) + + +# --------------------------------------------------------------------------- +# AggregateBasicResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_basic_parses_valid_result() -> None: + """AggregateBasicResult parses a typical BASIC result dict correctly.""" + m = AggregateBasicResult(**_BASIC_RESULT_DATA) + assert m.element_count == 200 + assert m.missing_count == 10 + assert m.missing_percent == 5.0 + assert m.partial_unexpected_list == ["a", "b"] + assert m.partial_missing_list == [None] + + +@pytest.mark.unit +def test_aggregate_basic_all_fields_none() -> None: + """All fields are Optional so AggregateBasicResult can be constructed with no args.""" + m = AggregateBasicResult() + assert m.element_count is None + assert m.missing_count is None + assert m.missing_percent is None + assert m.partial_unexpected_list is None + assert m.partial_missing_list is None + + +@pytest.mark.unit +def test_aggregate_basic_with_observed_value() -> None: + """AggregateBasicResult inherits observed_value from base.""" + m = AggregateBasicResult(observed_value=3.14, element_count=100) + assert m.observed_value == 3.14 + assert m.element_count == 100 + + +@pytest.mark.unit +def test_aggregate_basic_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in AggregateBasicResult.""" + with pytest.raises(pydantic.ValidationError): + AggregateBasicResult(**_BASIC_RESULT_DATA, unknown_field="bad") + + +@pytest.mark.unit +def test_aggregate_basic_complete_only_field_raises() -> None: + """AggregateBasicResult does not accept AggregateCompleteResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + AggregateBasicResult(**_BASIC_RESULT_DATA, unexpected_list=[1, 2, 3]) + + +# --------------------------------------------------------------------------- +# AggregateSummaryResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_summary_parses_valid_result() -> None: + """AggregateSummaryResult parses a typical SUMMARY result dict correctly.""" + m = AggregateSummaryResult(**_BASIC_RESULT_DATA) + assert m.element_count == 200 + assert m.missing_count == 10 + assert m.partial_unexpected_list == ["a", "b"] + + +@pytest.mark.unit +def test_aggregate_summary_all_optional() -> None: + """All fields in AggregateSummaryResult are Optional.""" + m = AggregateSummaryResult() + assert m.element_count is None + assert m.missing_count is None + assert m.partial_unexpected_list is None + + +@pytest.mark.unit +def test_aggregate_summary_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in AggregateSummaryResult.""" + with pytest.raises(pydantic.ValidationError): + AggregateSummaryResult(**_BASIC_RESULT_DATA, unknown_field="bad") + + +@pytest.mark.unit +def test_aggregate_summary_with_observed_value_and_details() -> None: + """AggregateSummaryResult inherits base fields.""" + m = AggregateSummaryResult( + observed_value={"mean": 42.0}, + details={"row_count": 1000}, + element_count=1000, + ) + assert m.observed_value == {"mean": 42.0} + assert m.details == {"row_count": 1000} + assert m.element_count == 1000 + + +@pytest.mark.unit +def test_aggregate_summary_complete_only_field_raises() -> None: + """AggregateSummaryResult does not accept AggregateCompleteResult-only fields.""" + with pytest.raises(pydantic.ValidationError): + AggregateSummaryResult(**_BASIC_RESULT_DATA, unexpected_list=[1, 2, 3]) + + +# --------------------------------------------------------------------------- +# AggregateCompleteResult +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_complete_parses_valid_result() -> None: + """AggregateCompleteResult parses a typical COMPLETE result dict correctly.""" + data = {**_BASIC_RESULT_DATA, **_COMPLETE_EXTRA_DATA} + m = AggregateCompleteResult(**data) + assert m.element_count == 200 + assert m.missing_count == 10 + assert m.unexpected_list == ["a", "b", "c"] + assert m.unexpected_index_list == [0, 1, 2] + + +@pytest.mark.unit +def test_aggregate_complete_all_optional() -> None: + """All fields in AggregateCompleteResult are Optional.""" + m = AggregateCompleteResult() + assert m.unexpected_list is None + assert m.unexpected_index_list is None + assert m.element_count is None + + +@pytest.mark.unit +def test_aggregate_complete_extra_field_raises() -> None: + """extra=forbid: unknown fields raise ValidationError in AggregateCompleteResult.""" + data = {**_BASIC_RESULT_DATA, **_COMPLETE_EXTRA_DATA} + with pytest.raises(pydantic.ValidationError): + AggregateCompleteResult(**data, not_a_real_field="value") + + +@pytest.mark.unit +def test_aggregate_complete_inherits_all_ancestor_fields() -> None: + """AggregateCompleteResult inherits fields from all ancestor classes.""" + data = { + **_BASIC_RESULT_DATA, + **_COMPLETE_EXTRA_DATA, + "observed_value": 3.14, + "details": {"info": "complete"}, + } + m = AggregateCompleteResult(**data) + # From AggregateResultBase + assert m.observed_value == 3.14 + assert m.details == {"info": "complete"} + # From AggregateBasicResult + assert m.element_count == 200 + assert m.partial_unexpected_list == ["a", "b"] + assert m.partial_missing_list == [None] + # From AggregateCompleteResult + assert m.unexpected_list == ["a", "b", "c"] + assert m.unexpected_index_list == [0, 1, 2] + + +@pytest.mark.unit +def test_aggregate_complete_with_list_observed_value() -> None: + """AggregateCompleteResult accepts list observed_value.""" + m = AggregateCompleteResult(observed_value=["a", "b", "c"]) + assert m.observed_value == ["a", "b", "c"] + + +# --------------------------------------------------------------------------- +# Inheritance chain sanity +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_inheritance_chain() -> None: + """Inheritance chain: Complete → Summary → Basic → ResultBase.""" + assert issubclass(AggregateCompleteResult, AggregateSummaryResult) + assert issubclass(AggregateSummaryResult, AggregateBasicResult) + assert issubclass(AggregateBasicResult, AggregateResultBase) + assert issubclass(AggregateBooleanOnlyResult, AggregateResultBase) + + +@pytest.mark.unit +def test_aggregate_complete_is_not_aggregate_boolean_only() -> None: + """AggregateCompleteResult and AggregateBooleanOnlyResult are separate leaf classes.""" + assert not issubclass(AggregateCompleteResult, AggregateBooleanOnlyResult) + assert not issubclass(AggregateBooleanOnlyResult, AggregateCompleteResult) + + +# --------------------------------------------------------------------------- +# extra=forbid on AggregateResultBase +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_aggregate_summary_is_subclass_of_basic() -> None: + """AggregateSummaryResult is a subclass of AggregateBasicResult (no new fields).""" + assert issubclass(AggregateSummaryResult, AggregateBasicResult) + # verify they have the same fields (summary adds no new fields) + assert set(AggregateSummaryResult.__fields__.keys()) == set( + AggregateBasicResult.__fields__.keys() + ) From a4ba92cbc745ec2a59585678d7af2cbcaa0ab604 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:00:08 +0200 Subject: [PATCH 10/20] feat(validation-result-schemas): per_expectation_overrides + schemas __init__ re-exports (task 4.3) --- .../schemas/__init__.py | 27 ++++++ .../schemas/per_expectation_overrides.py | 25 ++++++ .../test_schemas_overrides.py | 85 +++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py create mode 100644 tests/unit/core/validation_result_schemas/test_schemas_overrides.py diff --git a/great_expectations/core/validation_result_schemas/schemas/__init__.py b/great_expectations/core/validation_result_schemas/schemas/__init__.py index ca0e69e10bbf..42b07231ff1a 100644 --- a/great_expectations/core/validation_result_schemas/schemas/__init__.py +++ b/great_expectations/core/validation_result_schemas/schemas/__init__.py @@ -1 +1,28 @@ """Schema family re-exports. Populated as schema tasks land.""" +from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( + AggregateBasicResult, + AggregateBooleanOnlyResult, + AggregateCompleteResult, + AggregateSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, + MapBooleanOnlyResult, + MapCompleteResult, + MapSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.per_expectation_overrides import ( + ExpectColumnValuesToBeOfTypeSqlSparkResult, +) + +__all__ = [ + "AggregateBasicResult", + "AggregateBooleanOnlyResult", + "AggregateCompleteResult", + "AggregateSummaryResult", + "ExpectColumnValuesToBeOfTypeSqlSparkResult", + "MapBasicResult", + "MapBooleanOnlyResult", + "MapCompleteResult", + "MapSummaryResult", +] diff --git a/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py new file mode 100644 index 000000000000..466d289bd803 --- /dev/null +++ b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py @@ -0,0 +1,25 @@ +"""Per-expectation schema overrides. + +Some expectations emit result dicts that do not fit the generic map or +aggregate families. Each override here is a standalone Pydantic model with +``extra = Extra.forbid`` so unexpected fields surface as validation errors. + +Import rules (enforced by ruff banned-api): +- Pydantic symbols come exclusively from ``great_expectations.compatibility.pydantic``. +- No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. +- No direct ``import pydantic``. +""" +from __future__ import annotations + +from great_expectations.compatibility import pydantic +from great_expectations.compatibility.pydantic import BaseModel + + +class ExpectColumnValuesToBeOfTypeSqlSparkResult(BaseModel): + """ExpectColumnValuesToBeOfType bypasses _format_map_output on SQL/Spark + and emits {observed_value: } only.""" + + class Config: + extra = pydantic.Extra.forbid + + observed_value: str diff --git a/tests/unit/core/validation_result_schemas/test_schemas_overrides.py b/tests/unit/core/validation_result_schemas/test_schemas_overrides.py new file mode 100644 index 000000000000..e2317bb8bef7 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_schemas_overrides.py @@ -0,0 +1,85 @@ +"""Unit tests for per-expectation schema overrides. + +Covers: +- expect_column_values_to_be_of_type pandas-path payload matches MapBasicResult, + NOT ExpectColumnValuesToBeOfTypeSqlSparkResult. +- expect_column_values_to_be_of_type SQL/Spark-path payload matches the override. +- Extra fields on the override raise pydantic.ValidationError (extra=forbid). + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_schemas_overrides.py -m unit +""" +from __future__ import annotations + +import pytest + +from great_expectations.compatibility import pydantic +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, +) +from great_expectations.core.validation_result_schemas.schemas.per_expectation_overrides import ( + ExpectColumnValuesToBeOfTypeSqlSparkResult, +) + +# --------------------------------------------------------------------------- +# Pandas-path: expect_column_values_to_be_of_type emits a map-shaped result +# --------------------------------------------------------------------------- + +_PANDAS_RESULT = { + "element_count": 10, + "unexpected_count": 0, + "unexpected_percent": 0.0, + "partial_unexpected_list": [], +} + + +@pytest.mark.unit +def test_pandas_path_parses_as_map_basic_result() -> None: + """The pandas result for expect_column_values_to_be_of_type is map-shaped. + + It must parse as MapBasicResult, confirming it belongs to the Map family. + """ + m = MapBasicResult(**_PANDAS_RESULT) + assert m.element_count == 10 + assert m.unexpected_count == 0 + assert m.unexpected_percent == 0.0 + assert m.partial_unexpected_list == [] + + +# --------------------------------------------------------------------------- +# SQL/Spark-path: expect_column_values_to_be_of_type emits {observed_value: ...} +# --------------------------------------------------------------------------- + +_SQL_SPARK_RESULT = {"observed_value": "str"} + + +@pytest.mark.unit +def test_sql_spark_path_parses_as_override() -> None: + """The SQL/Spark result for expect_column_values_to_be_of_type matches the override. + + SQL/Spark bypasses _format_map_output and emits only {observed_value: }. + """ + r = ExpectColumnValuesToBeOfTypeSqlSparkResult(**_SQL_SPARK_RESULT) + assert r.observed_value == "str" + + +@pytest.mark.unit +def test_sql_spark_path_observed_value_preserved() -> None: + """observed_value carries the type name string verbatim.""" + r = ExpectColumnValuesToBeOfTypeSqlSparkResult(observed_value="INTEGER") + assert r.observed_value == "INTEGER" + + +# --------------------------------------------------------------------------- +# extra=forbid: unknown fields on the override must raise +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_override_extra_field_raises() -> None: + """ExpectColumnValuesToBeOfTypeSqlSparkResult rejects unknown extra fields.""" + with pytest.raises(pydantic.ValidationError): + ExpectColumnValuesToBeOfTypeSqlSparkResult( + observed_value="int", + unexpected_extra="x", + ) From 847e9f19054a9a989df721c8fbad66bc2ee14c18 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:08:08 +0200 Subject: [PATCH 11/20] feat(validation-result-schemas): as_typed dispatcher + family_for + unit tests (task 5.1) --- .../validation_result_schemas/__init__.py | 8 + .../validation_result_schemas/dispatcher.py | 262 ++++++++++++ .../test_dispatcher.py | 382 ++++++++++++++++++ 3 files changed, 652 insertions(+) create mode 100644 great_expectations/core/validation_result_schemas/dispatcher.py create mode 100644 tests/unit/core/validation_result_schemas/test_dispatcher.py diff --git a/great_expectations/core/validation_result_schemas/__init__.py b/great_expectations/core/validation_result_schemas/__init__.py index 1bb0fbb87d8b..cea600074c03 100644 --- a/great_expectations/core/validation_result_schemas/__init__.py +++ b/great_expectations/core/validation_result_schemas/__init__.py @@ -3,3 +3,11 @@ Re-exports are populated as implementation tasks land. This package is not added to great_expectations/__init__.py and contains no @public_api symbols. """ +from great_expectations.core.validation_result_schemas.dispatcher import ( + ParseError, + Result, + as_typed, + family_for, +) + +__all__ = ["ParseError", "Result", "as_typed", "family_for"] diff --git a/great_expectations/core/validation_result_schemas/dispatcher.py b/great_expectations/core/validation_result_schemas/dispatcher.py new file mode 100644 index 000000000000..871ba36aec8e --- /dev/null +++ b/great_expectations/core/validation_result_schemas/dispatcher.py @@ -0,0 +1,262 @@ +"""Dispatcher for typed validation result schemas. + +Public API: + as_typed(result_dict, *, expectation_type, result_format, engine_hint=None) -> Result + family_for(expectation_type: str) -> str + Result (Union alias) + ParseError (exception) + +All four are re-exported from ``validation_result_schemas/__init__.py``. + +Import rules (enforced by ruff banned-api): +- Pydantic symbols come exclusively from ``great_expectations.compatibility.pydantic``. +- No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. +- No direct ``import pydantic``. +""" +from __future__ import annotations + +from typing import Any, Dict, Optional, Union + +from great_expectations.compatibility import pydantic +from great_expectations.core.result_format import ResultFormat +from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( + AggregateBasicResult, + AggregateBooleanOnlyResult, + AggregateCompleteResult, + AggregateSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, + MapBooleanOnlyResult, + MapCompleteResult, + MapSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.per_expectation_overrides import ( + ExpectColumnValuesToBeOfTypeSqlSparkResult, +) + +# --------------------------------------------------------------------------- +# Public type alias +# --------------------------------------------------------------------------- + +Result = Union[ + MapBooleanOnlyResult, + MapBasicResult, + MapSummaryResult, + MapCompleteResult, + AggregateBooleanOnlyResult, + AggregateBasicResult, + AggregateSummaryResult, + AggregateCompleteResult, + ExpectColumnValuesToBeOfTypeSqlSparkResult, +] + +# --------------------------------------------------------------------------- +# ParseError +# --------------------------------------------------------------------------- + + +class ParseError(Exception): + """Raised when as_typed cannot match result_dict to a registered schema variant. + + Wraps pydantic.ValidationError; message names the unmatched fields and the + candidate variant(s) that were tried. + """ + + +# Module-level error message templates (TRY003: avoid long messages outside exception class). +def _override_parse_error_msg( + expectation_type: str, eff_engine: Optional[str], cls_name: str, exc: object +) -> str: + return ( + f"Failed to parse {expectation_type!r} with engine={eff_engine!r} " + f"as {cls_name}: {exc}" + ) + + +def _family_parse_error_msg( + expectation_type: str, fmt_value: str, cls_name: str, exc: object +) -> str: + return ( + f"Failed to parse {expectation_type!r} ({fmt_value}) " + f"as {cls_name}: {exc}" + ) + + +# --------------------------------------------------------------------------- +# _FAMILY_TABLE — hand-authored; covers all 60 core expectations +# --------------------------------------------------------------------------- +# +# Map expectations: those extending ColumnMapExpectation, ColumnPairMapExpectation, +# or MulticolumnMapExpectation (32 total). +# +# Aggregate expectations: everything else — ColumnAggregateExpectation, +# BatchExpectation, TableExpectation, etc. (28 total). + +_FAMILY_TABLE: Dict[str, str] = { + # ---- MAP (ColumnMapExpectation) ---------------------------------------- + "expect_column_value_lengths_to_be_between": "map", + "expect_column_value_lengths_to_equal": "map", + "expect_column_value_z_scores_to_be_less_than": "map", + "expect_column_values_to_be_between": "map", + "expect_column_values_to_be_dateutil_parseable": "map", + "expect_column_values_to_be_decreasing": "map", + "expect_column_values_to_be_in_set": "map", + "expect_column_values_to_be_in_type_list": "map", + "expect_column_values_to_be_increasing": "map", + "expect_column_values_to_be_json_parseable": "map", + "expect_column_values_to_be_null": "map", + "expect_column_values_to_be_of_type": "map", + "expect_column_values_to_be_unique": "map", + "expect_column_values_to_match_json_schema": "map", + "expect_column_values_to_match_like_pattern": "map", + "expect_column_values_to_match_like_pattern_list": "map", + "expect_column_values_to_match_regex": "map", + "expect_column_values_to_match_regex_list": "map", + "expect_column_values_to_match_strftime_format": "map", + "expect_column_values_to_not_be_in_set": "map", + "expect_column_values_to_not_be_null": "map", + "expect_column_values_to_not_match_like_pattern": "map", + "expect_column_values_to_not_match_like_pattern_list": "map", + "expect_column_values_to_not_match_regex": "map", + "expect_column_values_to_not_match_regex_list": "map", + # ---- MAP (ColumnPairMapExpectation) ------------------------------------ + "expect_column_pair_values_a_to_be_greater_than_b": "map", + "expect_column_pair_values_to_be_equal": "map", + "expect_column_pair_values_to_be_in_set": "map", + # ---- MAP (MulticolumnMapExpectation) ----------------------------------- + "expect_compound_columns_to_be_unique": "map", + "expect_multicolumn_sum_to_equal": "map", + "expect_multicolumn_values_to_be_unique": "map", + "expect_select_column_values_to_be_unique_within_record": "map", + # ---- AGGREGATE (ColumnAggregateExpectation) ---------------------------- + "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than": "aggregate", + "expect_column_chisquare_test_p_value_to_be_greater_than": "aggregate", + "expect_column_distinct_values_to_be_in_set": "aggregate", + "expect_column_distinct_values_to_contain_set": "aggregate", + "expect_column_distinct_values_to_equal_set": "aggregate", + "expect_column_kl_divergence_to_be_less_than": "aggregate", + "expect_column_max_to_be_between": "aggregate", + "expect_column_mean_to_be_between": "aggregate", + "expect_column_median_to_be_between": "aggregate", + "expect_column_min_to_be_between": "aggregate", + "expect_column_most_common_value_to_be_in_set": "aggregate", + "expect_column_pair_cramers_phi_value_to_be_less_than": "aggregate", + "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than": "aggregate", + "expect_column_proportion_of_non_null_values_to_be_between": "aggregate", + "expect_column_proportion_of_unique_values_to_be_between": "aggregate", + "expect_column_quantile_values_to_be_between": "aggregate", + "expect_column_stdev_to_be_between": "aggregate", + "expect_column_sum_to_be_between": "aggregate", + "expect_column_to_exist": "aggregate", + "expect_column_unique_value_count_to_be_between": "aggregate", + # ---- AGGREGATE (TableExpectation / BatchExpectation) ------------------- + "expect_query_results_to_match_comparison": "aggregate", + "expect_table_column_count_to_be_between": "aggregate", + "expect_table_column_count_to_equal": "aggregate", + "expect_table_columns_to_match_ordered_list": "aggregate", + "expect_table_columns_to_match_set": "aggregate", + "expect_table_row_count_to_be_between": "aggregate", + "expect_table_row_count_to_equal": "aggregate", + "expect_table_row_count_to_equal_other_table": "aggregate", +} + +# --------------------------------------------------------------------------- +# _OVERRIDE_TABLE — per-expectation engine-specific class overrides +# --------------------------------------------------------------------------- + +_OVERRIDE_TABLE: Dict[str, Dict[str, Any]] = { + "expect_column_values_to_be_of_type": { + "sql": ExpectColumnValuesToBeOfTypeSqlSparkResult, + "spark": ExpectColumnValuesToBeOfTypeSqlSparkResult, + } +} + +# --------------------------------------------------------------------------- +# Format dispatch tables +# --------------------------------------------------------------------------- + +_FORMAT_MAP: Dict[str, Dict[ResultFormat, Any]] = { + "map": { + ResultFormat.BOOLEAN_ONLY: MapBooleanOnlyResult, + ResultFormat.BASIC: MapBasicResult, + ResultFormat.SUMMARY: MapSummaryResult, + ResultFormat.COMPLETE: MapCompleteResult, + }, + "aggregate": { + ResultFormat.BOOLEAN_ONLY: AggregateBooleanOnlyResult, + ResultFormat.BASIC: AggregateBasicResult, + ResultFormat.SUMMARY: AggregateSummaryResult, + ResultFormat.COMPLETE: AggregateCompleteResult, + }, +} + + +# --------------------------------------------------------------------------- +# Public functions +# --------------------------------------------------------------------------- + + +def family_for(expectation_type: str) -> str: + """Return ``'map'`` or ``'aggregate'`` for the given expectation type. + + Falls back to ``'aggregate'`` for unknown types so that novel / third-party + expectations degrade gracefully rather than raising a hard error. + """ + return _FAMILY_TABLE.get(expectation_type, "aggregate") + + +def as_typed( + result_dict: Dict[str, Any], + *, + expectation_type: str, + result_format: ResultFormat, + engine_hint: Optional[str] = None, +) -> Result: + """Dispatch ``result_dict`` to the matching schema variant and return the parsed model. + + Resolution order: + 1. Normalise ``engine_hint``: SQL sniffing when ``engine_hint is None`` and + ``unexpected_index_query`` is present in ``result_dict``. + 2. Per-expectation override table (e.g. SQL/Spark path for + ``expect_column_values_to_be_of_type``). + 3. Family-based dispatch via ``_FORMAT_MAP[family][result_format]``. + + Raises: + ParseError: when pydantic construction fails; message names the + candidate class and the validation error. + """ + # 1. Normalise engine_hint — SQL sniffing + eff_engine = engine_hint + if eff_engine is None and "unexpected_index_query" in result_dict: + eff_engine = "sql" + + # 2. Per-expectation override + override_engines = _OVERRIDE_TABLE.get(expectation_type, {}) + if eff_engine in override_engines: + schema_cls = override_engines[eff_engine] + try: + return schema_cls(**result_dict) + except pydantic.ValidationError as exc: + raise ParseError( + _override_parse_error_msg(expectation_type, eff_engine, schema_cls.__name__, exc) + ) from exc + + # 3. Family-based dispatch + family = family_for(expectation_type) + schema_cls = _FORMAT_MAP[family][result_format] + + # Pass engine_hint into the model when it is known (allows root validators + # inside map schemas to enforce SQL-required fields). + data = dict(result_dict) + if eff_engine is not None: + data["engine_hint"] = eff_engine + + try: + return schema_cls(**data) + except pydantic.ValidationError as exc: + raise ParseError( + _family_parse_error_msg( + expectation_type, result_format.value, schema_cls.__name__, exc + ) + ) from exc diff --git a/tests/unit/core/validation_result_schemas/test_dispatcher.py b/tests/unit/core/validation_result_schemas/test_dispatcher.py new file mode 100644 index 000000000000..36a7148838d4 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_dispatcher.py @@ -0,0 +1,382 @@ +"""Unit tests for the dispatcher module. + +Covers: +- Synthetic input per (family, format) cell — all 8 combinations. +- Unknown expectation type falls back to 'aggregate'. +- SQL sniffing: engine_hint=None + unexpected_index_query in result_dict → eff_engine='sql'. +- Per-expectation override route (expect_column_values_to_be_of_type on sql/spark). +- ParseError raised with a diagnostic message on bad input. +- test_family_table_covers_core_expectations: every expect_*.py in expectations/core/ is present. + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_dispatcher.py -m unit -v +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from great_expectations.core.result_format import ResultFormat +from great_expectations.core.validation_result_schemas.dispatcher import ( + _FAMILY_TABLE, + ParseError, + as_typed, + family_for, +) +from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( + AggregateBasicResult, + AggregateBooleanOnlyResult, + AggregateCompleteResult, + AggregateSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, + MapBooleanOnlyResult, + MapCompleteResult, + MapSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.per_expectation_overrides import ( + ExpectColumnValuesToBeOfTypeSqlSparkResult, +) + +# --------------------------------------------------------------------------- +# A canonical map expectation and aggregate expectation used across tests +# --------------------------------------------------------------------------- + +MAP_EXPECTATION = "expect_column_values_to_be_between" +AGG_EXPECTATION = "expect_column_mean_to_be_between" + +# --------------------------------------------------------------------------- +# Minimal valid result dicts per family x format +# --------------------------------------------------------------------------- + +MAP_BOOLEAN_ONLY_DICT: dict = {} +MAP_BASIC_DICT: dict = { + "element_count": 100, + "unexpected_count": 5, + "unexpected_percent": 5.0, + "missing_count": 0, + "missing_percent": 0.0, + "unexpected_percent_total": 5.0, + "unexpected_percent_nonmissing": 5.0, + "partial_unexpected_list": [1, 2, 3], +} +MAP_SUMMARY_DICT: dict = { + **MAP_BASIC_DICT, + "partial_unexpected_counts": [{"value": 1, "count": 2}], + "partial_unexpected_index_list": [0, 1], +} +MAP_COMPLETE_DICT: dict = { + **MAP_SUMMARY_DICT, + "unexpected_list": [1, 2, 3, 4, 5], + "unexpected_index_list": [0, 1, 2, 3, 4], +} + +AGG_BOOLEAN_ONLY_DICT: dict = {} +AGG_BASIC_DICT: dict = {"observed_value": 42.0} +AGG_SUMMARY_DICT: dict = {"observed_value": 42.0} +AGG_COMPLETE_DICT: dict = { + "observed_value": 42.0, + "unexpected_list": None, + "unexpected_index_list": None, +} + + +# --------------------------------------------------------------------------- +# (family, format) matrix — 8 cells +# --------------------------------------------------------------------------- + + +class TestFamilyFormatMatrix: + """as_typed returns the correct model class for every (family, format) cell.""" + + @pytest.mark.unit + def test_map_boolean_only(self): + result = as_typed( + MAP_BOOLEAN_ONLY_DICT, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.BOOLEAN_ONLY, + ) + assert isinstance(result, MapBooleanOnlyResult) + + @pytest.mark.unit + def test_map_basic(self): + result = as_typed( + MAP_BASIC_DICT, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.BASIC, + ) + assert isinstance(result, MapBasicResult) + assert result.element_count == 100 + assert result.unexpected_count == 5 + + @pytest.mark.unit + def test_map_summary(self): + result = as_typed( + MAP_SUMMARY_DICT, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.SUMMARY, + ) + assert isinstance(result, MapSummaryResult) + assert result.partial_unexpected_index_list == [0, 1] + + @pytest.mark.unit + def test_map_complete(self): + result = as_typed( + MAP_COMPLETE_DICT, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.COMPLETE, + ) + assert isinstance(result, MapCompleteResult) + assert result.unexpected_list == [1, 2, 3, 4, 5] + + @pytest.mark.unit + def test_aggregate_boolean_only(self): + result = as_typed( + AGG_BOOLEAN_ONLY_DICT, + expectation_type=AGG_EXPECTATION, + result_format=ResultFormat.BOOLEAN_ONLY, + ) + assert isinstance(result, AggregateBooleanOnlyResult) + + @pytest.mark.unit + def test_aggregate_basic(self): + result = as_typed( + AGG_BASIC_DICT, + expectation_type=AGG_EXPECTATION, + result_format=ResultFormat.BASIC, + ) + assert isinstance(result, AggregateBasicResult) + assert result.observed_value == 42.0 + + @pytest.mark.unit + def test_aggregate_summary(self): + result = as_typed( + AGG_SUMMARY_DICT, + expectation_type=AGG_EXPECTATION, + result_format=ResultFormat.SUMMARY, + ) + assert isinstance(result, AggregateSummaryResult) + + @pytest.mark.unit + def test_aggregate_complete(self): + result = as_typed( + AGG_COMPLETE_DICT, + expectation_type=AGG_EXPECTATION, + result_format=ResultFormat.COMPLETE, + ) + assert isinstance(result, AggregateCompleteResult) + + +# --------------------------------------------------------------------------- +# family_for — unknown type falls back to 'aggregate' +# --------------------------------------------------------------------------- + + +class TestFamilyFor: + @pytest.mark.unit + def test_known_map_type(self): + assert family_for("expect_column_values_to_be_between") == "map" + + @pytest.mark.unit + def test_known_aggregate_type(self): + assert family_for("expect_column_mean_to_be_between") == "aggregate" + + @pytest.mark.unit + def test_unknown_type_falls_back_to_aggregate(self): + assert family_for("expect_some_custom_unknown_expectation") == "aggregate" + + @pytest.mark.unit + def test_unknown_type_dispatches_to_aggregate_class(self): + """as_typed uses 'aggregate' family for unknown expectation types.""" + result = as_typed( + AGG_BASIC_DICT, + expectation_type="expect_some_custom_unknown_expectation", + result_format=ResultFormat.BASIC, + ) + assert isinstance(result, AggregateBasicResult) + + +# --------------------------------------------------------------------------- +# SQL sniffing +# --------------------------------------------------------------------------- + + +class TestSqlSniffing: + @pytest.mark.unit + def test_sql_sniff_sets_engine_via_unexpected_index_query(self): + """When engine_hint is None but unexpected_index_query is in result_dict, + eff_engine is sniffed as 'sql' and engine_hint is propagated to the model.""" + result_dict = { + **MAP_COMPLETE_DICT, + "unexpected_index_query": "SELECT * FROM table WHERE x < 0", + } + result = as_typed( + result_dict, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.COMPLETE, + engine_hint=None, + ) + assert isinstance(result, MapCompleteResult) + assert result.unexpected_index_query == "SELECT * FROM table WHERE x < 0" + assert result.engine_hint == "sql" + + @pytest.mark.unit + def test_explicit_engine_hint_takes_precedence(self): + """When engine_hint is supplied, SQL sniffing is bypassed.""" + result_dict = { + **MAP_COMPLETE_DICT, + "unexpected_index_query": "SELECT * FROM table WHERE x < 0", + } + result = as_typed( + result_dict, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.COMPLETE, + engine_hint="pandas", + ) + assert isinstance(result, MapCompleteResult) + assert result.engine_hint == "pandas" + + @pytest.mark.unit + def test_no_sniff_without_index_query(self): + """When result_dict has no unexpected_index_query and engine_hint is None, + engine_hint is not injected into the model.""" + result = as_typed( + MAP_COMPLETE_DICT, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.COMPLETE, + engine_hint=None, + ) + assert isinstance(result, MapCompleteResult) + assert result.engine_hint is None + + +# --------------------------------------------------------------------------- +# Per-expectation override route +# --------------------------------------------------------------------------- + + +class TestPerExpectationOverride: + @pytest.mark.unit + def test_override_with_sql_engine_hint(self): + """expect_column_values_to_be_of_type + sql → ExpectColumnValuesToBeOfTypeSqlSparkResult.""" + result_dict = {"observed_value": "int64"} + result = as_typed( + result_dict, + expectation_type="expect_column_values_to_be_of_type", + result_format=ResultFormat.SUMMARY, + engine_hint="sql", + ) + assert isinstance(result, ExpectColumnValuesToBeOfTypeSqlSparkResult) + assert result.observed_value == "int64" + + @pytest.mark.unit + def test_override_with_spark_engine_hint(self): + """expect_column_values_to_be_of_type + spark → same override class.""" + result_dict = {"observed_value": "LongType"} + result = as_typed( + result_dict, + expectation_type="expect_column_values_to_be_of_type", + result_format=ResultFormat.COMPLETE, + engine_hint="spark", + ) + assert isinstance(result, ExpectColumnValuesToBeOfTypeSqlSparkResult) + assert result.observed_value == "LongType" + + @pytest.mark.unit + def test_override_sql_engine_hint_direct(self): + """Explicit engine_hint='sql' triggers the override (no sniffing needed).""" + result_dict = {"observed_value": "int64"} + result = as_typed( + result_dict, + expectation_type="expect_column_values_to_be_of_type", + result_format=ResultFormat.COMPLETE, + engine_hint="sql", + ) + assert isinstance(result, ExpectColumnValuesToBeOfTypeSqlSparkResult) + assert result.observed_value == "int64" + + @pytest.mark.unit + def test_no_override_without_engine_hint(self): + """Without sql/spark engine_hint, falls through to family dispatch (map).""" + result_dict = MAP_BASIC_DICT + result = as_typed( + result_dict, + expectation_type="expect_column_values_to_be_of_type", + result_format=ResultFormat.BASIC, + engine_hint=None, + ) + assert isinstance(result, MapBasicResult) + + +# --------------------------------------------------------------------------- +# ParseError — raised with diagnostic message +# --------------------------------------------------------------------------- + + +class TestParseError: + @pytest.mark.unit + def test_parse_error_raised_on_bad_dict(self): + """A result_dict with extra fields not accepted by the schema → ParseError.""" + bad_dict = {"totally_unknown_field": "bad_value", "another_bad": 999} + with pytest.raises(ParseError) as exc_info: + as_typed( + bad_dict, + expectation_type=MAP_EXPECTATION, + result_format=ResultFormat.BOOLEAN_ONLY, + ) + msg = str(exc_info.value) + assert "MapBooleanOnlyResult" in msg or "expect_column_values_to_be_between" in msg + + @pytest.mark.unit + def test_parse_error_raised_for_override_on_bad_dict(self): + """Override path raises ParseError when schema rejects extra/missing fields.""" + # ExpectColumnValuesToBeOfTypeSqlSparkResult has extra=forbid. + # An extra field not on the model will trigger validation error. + bad_dict = {"observed_value": "int64", "unexpected_extra_field": "boom"} + with pytest.raises(ParseError) as exc_info: + as_typed( + bad_dict, + expectation_type="expect_column_values_to_be_of_type", + result_format=ResultFormat.SUMMARY, + engine_hint="sql", + ) + msg = str(exc_info.value) + assert "expect_column_values_to_be_of_type" in msg + + @pytest.mark.unit + def test_parse_error_wraps_validation_error(self): + """ParseError.__cause__ is a pydantic.ValidationError.""" + from great_expectations.compatibility import pydantic + + bad_dict = {"bad_field": "unexpected"} + with pytest.raises(ParseError) as exc_info: + as_typed( + bad_dict, + expectation_type=AGG_EXPECTATION, + result_format=ResultFormat.BOOLEAN_ONLY, + ) + assert isinstance(exc_info.value.__cause__, pydantic.ValidationError) + + +# --------------------------------------------------------------------------- +# Coverage test — every expect_*.py in expectations/core/ must be in _FAMILY_TABLE +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_family_table_covers_core_expectations(): + """Every expect_*.py file in expectations/core/ must appear in _FAMILY_TABLE.""" + core_dir = ( + Path(__file__).parent / ".." / ".." / ".." / ".." + / "great_expectations" / "expectations" / "core" + ) + core_files = list(core_dir.glob("expect_*.py")) + expectation_names = { + f.name.replace(".py", "") + for f in core_files + if not f.name.startswith("__") + } + missing = expectation_names - set(_FAMILY_TABLE.keys()) + assert not missing, f"Missing from _FAMILY_TABLE: {sorted(missing)}" From fa1748961b1b15f162290c95f116b96b9364fdda Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:15:04 +0200 Subject: [PATCH 12/20] feat(validation-result-schemas): EVR.as_typed method + unit tests (task 6.1) --- .../core/expectation_validation_result.py | 44 ++ .../test_as_typed.py | 381 ++++++++++++++++++ 2 files changed, 425 insertions(+) create mode 100644 tests/unit/core/validation_result_schemas/test_as_typed.py diff --git a/great_expectations/core/expectation_validation_result.py b/great_expectations/core/expectation_validation_result.py index 3da2a433542c..937de87aca9f 100644 --- a/great_expectations/core/expectation_validation_result.py +++ b/great_expectations/core/expectation_validation_result.py @@ -386,6 +386,50 @@ def describe(self) -> str: """JSON string description of this ExpectationValidationResult""" return json.dumps(self.describe_dict(), indent=4) + def as_typed(self, *, engine_hint: Optional[str] = None): + """Return a typed view of self.result without mutating anything. + + Lazy-imports the dispatcher to avoid an import cycle at module load. + Reads expectation_type from self.expectation_config.type and ResultFormat + from self.expectation_config.kwargs.get('result_format', DEFAULT_RESULT_FORMAT). + Returns the parsed model. Raises ParseError on validation failure. + + engine_hint: optional 'pandas' | 'spark' | 'sql'. When supplied, the + dispatcher uses it directly. When None, the dispatcher sniffs from the + result dict shape. + """ + from great_expectations.core.result_format import ( + DEFAULT_RESULT_FORMAT, + ResultFormat, + ) + from great_expectations.core.validation_result_schemas.dispatcher import ( + as_typed, + ) + + result_format_value = ( + self.expectation_config.kwargs.get("result_format", DEFAULT_RESULT_FORMAT) + if self.expectation_config + else DEFAULT_RESULT_FORMAT + ) + # ResultFormat may be string or enum; normalize + if isinstance(result_format_value, str): + result_format = ResultFormat(result_format_value) + elif isinstance(result_format_value, dict): + result_format = ResultFormat(result_format_value["result_format"]) + else: + result_format = result_format_value + + expectation_type = ( + self.expectation_config.type if self.expectation_config else "unknown" + ) + + return as_typed( + self.result or {}, + expectation_type=expectation_type, + result_format=result_format, + engine_hint=engine_hint, + ) + class ExpectationValidationResultSchema(Schema): success = fields.Bool(required=False, allow_none=True) diff --git a/tests/unit/core/validation_result_schemas/test_as_typed.py b/tests/unit/core/validation_result_schemas/test_as_typed.py new file mode 100644 index 000000000000..9ec2e546b0b0 --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_as_typed.py @@ -0,0 +1,381 @@ +"""Unit tests for ExpectationValidationResult.as_typed(). + +Covers requirements 2.1-2.6 and 6.1-6.5: +- Returns the correct typed model for map/aggregate expectations. +- Does not mutate the EVR in any way. +- EVR equality is preserved before and after calling as_typed(). +- No new attributes appear in vars(evr) after the call. +- Missing expectation_config falls back to expectation_type='unknown' (aggregate family). +- result_format can be specified as a string, enum, or dict-with-result_format. + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_as_typed.py -m unit -v +""" +from __future__ import annotations + +import json +from typing import Optional + +import pytest + +from great_expectations.core.expectation_validation_result import ( + ExpectationValidationResult, +) +from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( + AggregateBasicResult, + AggregateBooleanOnlyResult, + AggregateCompleteResult, + AggregateSummaryResult, +) +from great_expectations.core.validation_result_schemas.schemas.map_result import ( + MapBasicResult, + MapBooleanOnlyResult, + MapCompleteResult, + MapSummaryResult, +) +from great_expectations.expectations.expectation_configuration import ( + ExpectationConfiguration, +) + +# --------------------------------------------------------------------------- +# Fixture helpers +# --------------------------------------------------------------------------- + +MAP_BASIC_RESULT = { + "element_count": 100, + "unexpected_count": 0, + "unexpected_percent": 0.0, + "missing_count": 0, + "missing_percent": 0.0, + "unexpected_percent_total": 0.0, + "unexpected_percent_nonmissing": 0.0, + "partial_unexpected_list": [], +} + +MAP_SUMMARY_RESULT = { + **MAP_BASIC_RESULT, + "partial_unexpected_counts": [], + "partial_unexpected_index_list": [], +} + +MAP_COMPLETE_RESULT = { + **MAP_SUMMARY_RESULT, + "unexpected_list": [], + "unexpected_index_list": [], +} + +AGG_BASIC_RESULT = { + "observed_value": 42.0, +} + +AGG_SUMMARY_RESULT = { + "observed_value": 42.0, +} + +AGG_COMPLETE_RESULT = { + "observed_value": 42.0, + "unexpected_list": None, + "unexpected_index_list": None, +} + + +def build_map_evr( + result_format: str = "BASIC", result: Optional[dict] = None +) -> ExpectationValidationResult: + """Build a map-family EVR (expect_column_values_to_not_be_null).""" + config = ExpectationConfiguration( + type="expect_column_values_to_not_be_null", + kwargs={"column": "col_a", "result_format": result_format}, + ) + return ExpectationValidationResult( + success=True, + expectation_config=config, + result=result if result is not None else dict(MAP_BASIC_RESULT), + ) + + +def build_agg_evr(result_format: str = "BASIC") -> ExpectationValidationResult: + """Build an aggregate-family EVR (expect_column_mean_to_be_between).""" + config = ExpectationConfiguration( + type="expect_column_mean_to_be_between", + kwargs={"column": "col_a", "min_value": 0, "result_format": result_format}, + ) + return ExpectationValidationResult( + success=True, + expectation_config=config, + result=dict(AGG_BASIC_RESULT), + ) + + +# --------------------------------------------------------------------------- +# Return type checks — map family +# --------------------------------------------------------------------------- + + +class TestMapFamilyReturnTypes: + """as_typed returns the correct map-family model class for each ResultFormat.""" + + @pytest.mark.unit + def test_map_boolean_only(self): + config = ExpectationConfiguration( + type="expect_column_values_to_not_be_null", + kwargs={"column": "col_a", "result_format": "BOOLEAN_ONLY"}, + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result={}, + ) + typed = evr.as_typed() + assert isinstance(typed, MapBooleanOnlyResult) + + @pytest.mark.unit + def test_map_basic(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + typed = evr.as_typed() + assert isinstance(typed, MapBasicResult) + + @pytest.mark.unit + def test_map_summary(self): + evr = build_map_evr(result_format="SUMMARY", result=dict(MAP_SUMMARY_RESULT)) + typed = evr.as_typed() + assert isinstance(typed, MapSummaryResult) + + @pytest.mark.unit + def test_map_complete(self): + evr = build_map_evr(result_format="COMPLETE", result=dict(MAP_COMPLETE_RESULT)) + typed = evr.as_typed() + assert isinstance(typed, MapCompleteResult) + + +# --------------------------------------------------------------------------- +# Return type checks — aggregate family +# --------------------------------------------------------------------------- + + +class TestAggregateFamilyReturnTypes: + """as_typed returns the correct aggregate-family model class for each ResultFormat.""" + + @pytest.mark.unit + def test_aggregate_boolean_only(self): + config = ExpectationConfiguration( + type="expect_column_mean_to_be_between", + kwargs={"column": "col_a", "result_format": "BOOLEAN_ONLY"}, + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result={}, + ) + typed = evr.as_typed() + assert isinstance(typed, AggregateBooleanOnlyResult) + + @pytest.mark.unit + def test_aggregate_basic(self): + evr = build_agg_evr(result_format="BASIC") + typed = evr.as_typed() + assert isinstance(typed, AggregateBasicResult) + + @pytest.mark.unit + def test_aggregate_summary(self): + evr = build_agg_evr(result_format="SUMMARY") + typed = evr.as_typed() + assert isinstance(typed, AggregateSummaryResult) + + @pytest.mark.unit + def test_aggregate_complete(self): + config = ExpectationConfiguration( + type="expect_column_mean_to_be_between", + kwargs={"column": "col_a", "result_format": "COMPLETE"}, + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result=dict(AGG_COMPLETE_RESULT), + ) + typed = evr.as_typed() + assert isinstance(typed, AggregateCompleteResult) + + +# --------------------------------------------------------------------------- +# No mutation +# --------------------------------------------------------------------------- + + +class TestNoMutation: + """as_typed must not mutate self in any way.""" + + @pytest.mark.unit + def test_result_dict_not_mutated(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + before_result = dict(evr.result) + _ = evr.as_typed() + assert dict(evr.result) == before_result + + @pytest.mark.unit + def test_to_json_dict_identical_after_call(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + before_dict = json.dumps(evr.to_json_dict(), sort_keys=True) + _ = evr.as_typed() + assert json.dumps(evr.to_json_dict(), sort_keys=True) == before_dict + + @pytest.mark.unit + def test_no_new_attributes(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + before_vars = set(vars(evr).keys()) + _ = evr.as_typed() + assert set(vars(evr).keys()) == before_vars + + +# --------------------------------------------------------------------------- +# EVR equality preserved +# --------------------------------------------------------------------------- + + +class TestEqualityPreserved: + """as_typed must not affect EVR equality.""" + + @pytest.mark.unit + def test_equality_before_and_after_as_typed(self): + evr1 = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + evr2 = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + assert evr1 == evr2 + _ = evr1.as_typed() + assert evr1 == evr2 + + @pytest.mark.unit + def test_to_json_dict_byte_identical_pair(self): + evr1 = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + evr2 = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + assert json.dumps(evr1.to_json_dict(), sort_keys=True) == json.dumps( + evr2.to_json_dict(), sort_keys=True + ) + _ = evr1.as_typed() + assert json.dumps(evr1.to_json_dict(), sort_keys=True) == json.dumps( + evr2.to_json_dict(), sort_keys=True + ) + + +# --------------------------------------------------------------------------- +# Missing expectation_config fallback +# --------------------------------------------------------------------------- + + +class TestMissingConfigFallback: + """When expectation_config is None, expectation_type defaults to 'unknown'.""" + + @pytest.mark.unit + def test_none_config_routes_to_aggregate_fallback(self): + """'unknown' is not in the family table → falls back to 'aggregate' family.""" + evr = ExpectationValidationResult( + success=True, + expectation_config=None, + result={}, + ) + # family_for('unknown') returns 'aggregate' (fallback) + # result_format defaults to DEFAULT_RESULT_FORMAT (SUMMARY) + # AggregateSummaryResult is the expected class for aggregate + SUMMARY + typed = evr.as_typed() + assert isinstance(typed, AggregateSummaryResult) + + @pytest.mark.unit + def test_none_config_no_mutation(self): + evr = ExpectationValidationResult( + success=True, + expectation_config=None, + result={}, + ) + before_vars = set(vars(evr).keys()) + _ = evr.as_typed() + assert set(vars(evr).keys()) == before_vars + + +# --------------------------------------------------------------------------- +# result_format normalization: string, enum, dict-with-result_format +# --------------------------------------------------------------------------- + + +class TestResultFormatNormalization: + """result_format from kwargs is normalized from string, enum, or dict shapes.""" + + @pytest.mark.unit + def test_string_result_format(self): + """result_format stored as plain string in kwargs.""" + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + typed = evr.as_typed() + assert isinstance(typed, MapBasicResult) + + @pytest.mark.unit + def test_enum_result_format(self): + """result_format stored as ResultFormat enum in kwargs.""" + from great_expectations.core.result_format import ResultFormat + + config = ExpectationConfiguration( + type="expect_column_values_to_not_be_null", + kwargs={"column": "col_a", "result_format": ResultFormat.BASIC}, + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result=dict(MAP_BASIC_RESULT), + ) + typed = evr.as_typed() + assert isinstance(typed, MapBasicResult) + + @pytest.mark.unit + def test_dict_result_format(self): + """result_format stored as dict with 'result_format' key in kwargs.""" + config = ExpectationConfiguration( + type="expect_column_values_to_not_be_null", + kwargs={ + "column": "col_a", + "result_format": {"result_format": "BASIC", "partial_unexpected_count": 20}, + }, + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result=dict(MAP_BASIC_RESULT), + ) + typed = evr.as_typed() + assert isinstance(typed, MapBasicResult) + + @pytest.mark.unit + def test_missing_result_format_defaults_to_summary(self): + """When result_format is absent from kwargs, DEFAULT_RESULT_FORMAT (SUMMARY) is used.""" + config = ExpectationConfiguration( + type="expect_column_values_to_not_be_null", + kwargs={"column": "col_a"}, # no result_format + ) + evr = ExpectationValidationResult( + success=True, + expectation_config=config, + result=dict(MAP_SUMMARY_RESULT), + ) + typed = evr.as_typed() + # DEFAULT_RESULT_FORMAT is SUMMARY → MapSummaryResult + assert isinstance(typed, MapSummaryResult) + + +# --------------------------------------------------------------------------- +# engine_hint passthrough +# --------------------------------------------------------------------------- + + +class TestEngineHintPassthrough: + """engine_hint is forwarded to the dispatcher without mutating the EVR.""" + + @pytest.mark.unit + def test_engine_hint_pandas_map_basic(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + typed = evr.as_typed(engine_hint="pandas") + assert isinstance(typed, MapBasicResult) + + @pytest.mark.unit + def test_engine_hint_does_not_mutate_evr(self): + evr = build_map_evr(result_format="BASIC", result=dict(MAP_BASIC_RESULT)) + before_vars = set(vars(evr).keys()) + before_result = dict(evr.result) + _ = evr.as_typed(engine_hint="pandas") + assert set(vars(evr).keys()) == before_vars + assert dict(evr.result) == before_result From f8560f1482e098a7107b54cf4cecab52b0dd3488 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:27:37 +0200 Subject: [PATCH 13/20] feat(validation-result-schemas): EXPECTATION_CASES table + unit tests (task 7.1) --- .../_validation_result_schemas_cases.py | 551 ++++++++++++++++++ .../test_cases_table.py | 52 ++ 2 files changed, 603 insertions(+) create mode 100644 tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py create mode 100644 tests/unit/core/validation_result_schemas/test_cases_table.py diff --git a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py new file mode 100644 index 000000000000..7879d20606e3 --- /dev/null +++ b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py @@ -0,0 +1,551 @@ +"""EXPECTATION_CASES — one entry per core expectation. + +Underscore-prefixed so pytest does not collect this file. + +Three expectations (ExpectColumnBootstrappedKsTestPValueToBeGreaterThan, +ExpectColumnChiSquareTestPValueToBeGreaterThan, and +ExpectColumnParameterizedDistributionKsTestPValueToBeGreaterThan) are marked +``NotImplementedError`` stubs in the codebase (their ``__init__`` raises and +they are not part of the public ``gxe`` API). We represent them with a +lightweight ``_AbstractStub`` object that carries the correct +``expectation_type`` string so that the ``family_for`` lookup test still passes. +""" +from __future__ import annotations + +from typing import List, Mapping, NamedTuple, Optional + +import pandas as pd + +import great_expectations.expectations as gxe + +# --------------------------------------------------------------------------- +# Default fixture data — small DataFrame covering the most common columns. +# Several rows intentionally violate common constraints (None, type mismatch) +# so result dicts are non-trivial for map expectations. +# --------------------------------------------------------------------------- + +_DEFAULT_DATA = pd.DataFrame( + { + "col_a": [1, 2, 3, None, 5], + "col_b": ["x", "y", "z", "w", None], + "col_c": [1.0, 2.0, None, 4.0, 5.0], + } +) + +# Multi-column / pair data — non-null values in every cell used for pair/multi +# expectations so at least one row satisfies A > B and A == B variants. +_PAIR_DATA = pd.DataFrame( + { + "col_a": [3, 5, 7, 10, 2], + "col_b": [1, 2, 3, 4, 1], + } +) + +# Numeric-only data for z-score and stdev expectations. +_NUMERIC_DATA = pd.DataFrame( + { + "col_a": [10, 20, 30, 40, 50], + "col_b": [1, 2, 3, 4, 5], + "col_c": [1.5, 2.5, 3.5, 4.5, 5.5], + } +) + +# Date-formatted strings for strftime / dateutil expectations. +_DATE_DATA = pd.DataFrame( + { + "col_a": ["2024-01-01", "2024-06-15", "not-a-date", "2023-12-31", "2025-03-01"], + "col_b": [1, 2, 3, 4, 5], + "col_c": [1.0, 2.0, 3.0, 4.0, 5.0], + } +) + +# JSON-formatted strings for JSON expectations. +_JSON_DATA = pd.DataFrame( + { + "col_a": ['{"a": 1}', '{"b": 2}', "not-json", '{"c": 3}', '{"d": 4}'], + "col_b": [1, 2, 3, 4, 5], + "col_c": [1.0, 2.0, 3.0, 4.0, 5.0], + } +) + + +# --------------------------------------------------------------------------- +# Stub for abstract/NotImplementedError expectations +# --------------------------------------------------------------------------- + + +class _AbstractStub: + """Minimal stand-in for the three incomplete core expectations. + + These classes raise ``NotImplementedError`` on ``__init__`` and therefore + cannot be instantiated. We store just the ``expectation_type`` string so + the test assertions that touch ``case.expectation.expectation_type`` work + correctly. + """ + + def __init__(self, expectation_type: str) -> None: + self.expectation_type = expectation_type + + +# --------------------------------------------------------------------------- +# ExpectationCase definition +# --------------------------------------------------------------------------- + + +class ExpectationCase(NamedTuple): + """A single test case for a core expectation. + + Attributes: + id: Unique snake_case identifier matching the file name (e.g. + ``"expect_column_values_to_not_be_null"``). + expectation: An instantiated Expectation (or _AbstractStub for the + three not-yet-migrated expectations). + data: A small pandas DataFrame that serves as the fixture for this + case. Column names must align with whatever column/column_list + arguments are given to the expectation. + extra_data: Optional mapping of named extra DataFrames (e.g. for + expectations that reference a second table). + """ + + id: str + expectation: object # type: ignore[assignment] # Expectation or _AbstractStub + data: pd.DataFrame + extra_data: Optional[Mapping[str, pd.DataFrame]] = None + + +# --------------------------------------------------------------------------- +# EXPECTATION_CASES — one entry per expect_*.py file under core/ +# --------------------------------------------------------------------------- + +EXPECTATION_CASES: List[ExpectationCase] = [ + # ------------------------------------------------------------------ + # MAP — ColumnMapExpectation + # ------------------------------------------------------------------ + ExpectationCase( + id="expect_column_value_lengths_to_be_between", + expectation=gxe.ExpectColumnValueLengthsToBeBetween( + column="col_b", min_value=1, max_value=5 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_value_lengths_to_equal", + expectation=gxe.ExpectColumnValueLengthsToEqual(column="col_b", value=1), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_value_z_scores_to_be_less_than", + expectation=gxe.ExpectColumnValueZScoresToBeLessThan( + column="col_a", threshold=3.0, double_sided=True + ), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_between", + expectation=gxe.ExpectColumnValuesToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_dateutil_parseable", + expectation=gxe.ExpectColumnValuesToBeDateutilParseable(column="col_a"), + data=_DATE_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_decreasing", + expectation=gxe.ExpectColumnValuesToBeDecreasing(column="col_a"), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_in_set", + expectation=gxe.ExpectColumnValuesToBeInSet( + column="col_a", value_set=[1, 2, 3, None, 5] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_in_type_list", + expectation=gxe.ExpectColumnValuesToBeInTypeList( + column="col_a", type_list=["int", "float", "NoneType"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_increasing", + expectation=gxe.ExpectColumnValuesToBeIncreasing(column="col_a"), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_json_parseable", + expectation=gxe.ExpectColumnValuesToBeJsonParseable(column="col_a"), + data=_JSON_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_null", + expectation=gxe.ExpectColumnValuesToBeNull(column="col_a"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_of_type", + expectation=gxe.ExpectColumnValuesToBeOfType(column="col_a", type_="int"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_be_unique", + expectation=gxe.ExpectColumnValuesToBeUnique(column="col_a"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_json_schema", + expectation=gxe.ExpectColumnValuesToMatchJsonSchema( + column="col_a", json_schema={"type": "object"} + ), + data=_JSON_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_like_pattern", + expectation=gxe.ExpectColumnValuesToMatchLikePattern( + column="col_b", like_pattern="%" + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_like_pattern_list", + expectation=gxe.ExpectColumnValuesToMatchLikePatternList( + column="col_b", like_pattern_list=["%x%", "%y%"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_regex", + expectation=gxe.ExpectColumnValuesToMatchRegex( + column="col_b", regex="^[a-z]$" + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_regex_list", + expectation=gxe.ExpectColumnValuesToMatchRegexList( + column="col_b", regex_list=["^[a-z]$", "^[A-Z]$"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_match_strftime_format", + expectation=gxe.ExpectColumnValuesToMatchStrftimeFormat( + column="col_a", strftime_format="%Y-%m-%d" + ), + data=_DATE_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_be_in_set", + expectation=gxe.ExpectColumnValuesToNotBeInSet( + column="col_a", value_set=[99, 100] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_be_null", + expectation=gxe.ExpectColumnValuesToNotBeNull(column="col_a"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_match_like_pattern", + expectation=gxe.ExpectColumnValuesToNotMatchLikePattern( + column="col_b", like_pattern="%z%z%" + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_match_like_pattern_list", + expectation=gxe.ExpectColumnValuesToNotMatchLikePatternList( + column="col_b", like_pattern_list=["%99%", "%100%"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_match_regex", + expectation=gxe.ExpectColumnValuesToNotMatchRegex( + column="col_b", regex="^[0-9]+$" + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_values_to_not_match_regex_list", + expectation=gxe.ExpectColumnValuesToNotMatchRegexList( + column="col_b", regex_list=["^[0-9]+$", "^[A-Z]+$"] + ), + data=_DEFAULT_DATA, + ), + # ------------------------------------------------------------------ + # MAP — ColumnPairMapExpectation + # ------------------------------------------------------------------ + ExpectationCase( + id="expect_column_pair_values_a_to_be_greater_than_b", + expectation=gxe.ExpectColumnPairValuesAToBeGreaterThanB( + column_A="col_a", column_B="col_b" + ), + data=_PAIR_DATA, + ), + ExpectationCase( + id="expect_column_pair_values_to_be_equal", + expectation=gxe.ExpectColumnPairValuesToBeEqual( + column_A="col_a", column_B="col_b" + ), + data=_PAIR_DATA, + ), + ExpectationCase( + id="expect_column_pair_values_to_be_in_set", + expectation=gxe.ExpectColumnPairValuesToBeInSet( + column_A="col_a", + column_B="col_b", + value_pairs_set=[(3, 1), (5, 2), (7, 3), (10, 4), (2, 1)], + ), + data=_PAIR_DATA, + ), + # ------------------------------------------------------------------ + # MAP — MulticolumnMapExpectation + # ------------------------------------------------------------------ + ExpectationCase( + id="expect_compound_columns_to_be_unique", + expectation=gxe.ExpectCompoundColumnsToBeUnique( + column_list=["col_a", "col_b"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_multicolumn_sum_to_equal", + expectation=gxe.ExpectMulticolumnSumToEqual( + column_list=["col_a", "col_b"], sum_total=3 + ), + data=pd.DataFrame( + { + "col_a": [1, 2, 3, None, 2], + "col_b": [2, 1, 0, None, 1], + } + ), + ), + ExpectationCase( + id="expect_multicolumn_values_to_be_unique", + # This expectation lacks a map_metric so is_abstract() returns True and + # expectation_type is '' — instantiation succeeds but the expectation_type + # string would be empty. Use _AbstractStub to carry the correct type string. + expectation=_AbstractStub("expect_multicolumn_values_to_be_unique"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_select_column_values_to_be_unique_within_record", + expectation=gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord( + column_list=["col_a", "col_b"] + ), + data=_DEFAULT_DATA, + ), + # ------------------------------------------------------------------ + # AGGREGATE — ColumnAggregateExpectation + # ------------------------------------------------------------------ + ExpectationCase( + id="expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", + # This expectation is not yet migrated; __init__ raises NotImplementedError. + expectation=_AbstractStub( + "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than" + ), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_chisquare_test_p_value_to_be_greater_than", + # Not yet migrated; __init__ raises NotImplementedError. + expectation=_AbstractStub( + "expect_column_chisquare_test_p_value_to_be_greater_than" + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_distinct_values_to_be_in_set", + expectation=gxe.ExpectColumnDistinctValuesToBeInSet( + column="col_a", value_set=[1, 2, 3, None, 5] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_distinct_values_to_contain_set", + expectation=gxe.ExpectColumnDistinctValuesToContainSet( + column="col_a", value_set=[1, 2] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_distinct_values_to_equal_set", + expectation=gxe.ExpectColumnDistinctValuesToEqualSet( + column="col_a", value_set=[1, 2, 3, None, 5] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_kl_divergence_to_be_less_than", + expectation=gxe.ExpectColumnKLDivergenceToBeLessThan( + column="col_a", + partition_object={ + "weights": [0.2, 0.2, 0.2, 0.2, 0.2], + "values": [1, 2, 3, None, 5], + }, + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_max_to_be_between", + expectation=gxe.ExpectColumnMaxToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_mean_to_be_between", + expectation=gxe.ExpectColumnMeanToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_median_to_be_between", + expectation=gxe.ExpectColumnMedianToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_min_to_be_between", + expectation=gxe.ExpectColumnMinToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_most_common_value_to_be_in_set", + expectation=gxe.ExpectColumnMostCommonValueToBeInSet( + column="col_a", value_set=[1, 2, 3] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_pair_cramers_phi_value_to_be_less_than", + # Uses column_A / column_B (not in the public gxe API as of this version; + # import directly from the core module). + expectation=_AbstractStub( + "expect_column_pair_cramers_phi_value_to_be_less_than" + ), + data=_PAIR_DATA, + ), + ExpectationCase( + id="expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than", + # Not yet migrated; __init__ raises NotImplementedError. + expectation=_AbstractStub( + "expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than" + ), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_proportion_of_non_null_values_to_be_between", + expectation=gxe.ExpectColumnProportionOfNonNullValuesToBeBetween( + column="col_a", min_value=0.0, max_value=1.0 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_proportion_of_unique_values_to_be_between", + expectation=gxe.ExpectColumnProportionOfUniqueValuesToBeBetween( + column="col_a", min_value=0.0, max_value=1.0 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_quantile_values_to_be_between", + expectation=gxe.ExpectColumnQuantileValuesToBeBetween( + column="col_c", + quantile_ranges={ + "quantiles": [0.25, 0.5, 0.75], + "value_ranges": [[0, 3], [1, 4], [2, 6]], + }, + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_stdev_to_be_between", + expectation=gxe.ExpectColumnStdevToBeBetween( + column="col_a", min_value=0, max_value=10 + ), + data=_NUMERIC_DATA, + ), + ExpectationCase( + id="expect_column_sum_to_be_between", + expectation=gxe.ExpectColumnSumToBeBetween( + column="col_a", min_value=0, max_value=100 + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_to_exist", + expectation=gxe.ExpectColumnToExist(column="col_a"), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_column_unique_value_count_to_be_between", + expectation=gxe.ExpectColumnUniqueValueCountToBeBetween( + column="col_a", min_value=1, max_value=10 + ), + data=_DEFAULT_DATA, + ), + # ------------------------------------------------------------------ + # AGGREGATE — TableExpectation / BatchExpectation + # ------------------------------------------------------------------ + ExpectationCase( + id="expect_query_results_to_match_comparison", + expectation=gxe.ExpectQueryResultsToMatchComparison( + base_query="SELECT 1 AS val", + comparison_data_source_name="other_ds", + comparison_query="SELECT 1 AS val", + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_column_count_to_be_between", + expectation=gxe.ExpectTableColumnCountToBeBetween(min_value=1, max_value=10), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_column_count_to_equal", + expectation=gxe.ExpectTableColumnCountToEqual(value=3), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_columns_to_match_ordered_list", + expectation=gxe.ExpectTableColumnsToMatchOrderedList( + column_list=["col_a", "col_b", "col_c"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_columns_to_match_set", + expectation=gxe.ExpectTableColumnsToMatchSet( + column_set=["col_a", "col_b", "col_c"] + ), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_row_count_to_be_between", + expectation=gxe.ExpectTableRowCountToBeBetween(min_value=1, max_value=100), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_row_count_to_equal", + expectation=gxe.ExpectTableRowCountToEqual(value=5), + data=_DEFAULT_DATA, + ), + ExpectationCase( + id="expect_table_row_count_to_equal_other_table", + expectation=gxe.ExpectTableRowCountToEqualOtherTable( + other_table_name="other_table" + ), + data=_DEFAULT_DATA, + ), +] diff --git a/tests/unit/core/validation_result_schemas/test_cases_table.py b/tests/unit/core/validation_result_schemas/test_cases_table.py new file mode 100644 index 000000000000..00913e21bb6e --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_cases_table.py @@ -0,0 +1,52 @@ +"""Unit tests for the EXPECTATION_CASES table. + +Verifies: + (a) every ``id`` in the table is unique + (b) every expectation_type covered by ``family_for`` returns 'map' or 'aggregate' + (c) the case count equals the number of expect_*.py files under core/ +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from great_expectations.core.validation_result_schemas.dispatcher import ( + family_for, +) +from tests.integration.data_sources_and_expectations.expectations._validation_result_schemas_cases import ( # noqa: E501 + EXPECTATION_CASES, +) + + +@pytest.mark.unit +def test_case_ids_are_unique() -> None: + ids = [c.id for c in EXPECTATION_CASES] + assert len(ids) == len(set(ids)), ( + f"Duplicate ids: {sorted(i for i in ids if ids.count(i) > 1)}" + ) + + +@pytest.mark.unit +def test_all_expectation_types_in_family_table() -> None: + for case in EXPECTATION_CASES: + exp_type = case.expectation.expectation_type + family = family_for(exp_type) + assert family in ("map", "aggregate"), ( + f"{exp_type!r} returned unexpected family {family!r}" + ) + + +@pytest.mark.unit +def test_case_count_matches_core_expectations() -> None: + core_dir = ( + Path(__file__).parent / ".." / ".." / ".." / ".." + / "great_expectations" / "expectations" / "core" + ) + core_files = list(core_dir.glob("expect_*.py")) + expected_count = len( + [f for f in core_files if not f.name.startswith("__")] + ) + assert len(EXPECTATION_CASES) == expected_count, ( + f"Expected {expected_count} cases, got {len(EXPECTATION_CASES)}" + ) From 78b0a439d903a3bc635f9c4cf15d202e5175ab32 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:31:20 +0200 Subject: [PATCH 14/20] feat(validation-result-schemas): matrix runner helpers + unit tests (task 7.2) --- .../_validation_result_schemas_helpers.py | 76 +++++++ .../test_runner_helpers.py | 209 ++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py create mode 100644 tests/unit/core/validation_result_schemas/test_runner_helpers.py diff --git a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py new file mode 100644 index 000000000000..9508d2919ef8 --- /dev/null +++ b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py @@ -0,0 +1,76 @@ +"""Matrix runner helpers for validation result schema tests. + +Underscore-prefixed so pytest does not collect this file. + +These helpers are imported by the matrix runner and its unit tests. +They are intentionally free of test framework dependencies so they can +be used in both pytest fixtures and standalone scripts. +""" +from __future__ import annotations + +from great_expectations.core.validation_result_schemas.field_validators import ( + classify_runtime_type, +) + +# --------------------------------------------------------------------------- +# SQL dialect normalisation table (from design.md) +# --------------------------------------------------------------------------- + +_SQL_DIALECTS = frozenset( + { + "sql", + "snowflake", + "postgres", + "redshift", + "databricks_sql", + "sqlite", + "bigquery", + "mysql", + "mssql", + } +) + + +def _normalize_engine_hint(datasource_type: str) -> str: + """Collapse SQL dialects to 'sql'; pass through 'pandas' and 'spark'. + + Unknown types are returned as-is. + """ + if datasource_type == "pandas": + return "pandas" + if datasource_type in ("spark", "dataframe"): + return "spark" + if datasource_type in _SQL_DIALECTS: + return "sql" + # Fallback: return as-is for unknown types + return datasource_type + + +def assert_field_set_covered(raw_result_dict: dict, parsed_model) -> None: + """Assert every key in raw_result_dict is reachable in parsed_model.dict(). + + The parsed model may have extra fields (like engine_hint) not in the raw + dict — that is fine. The reverse is NOT fine: raw dict keys that are + absent from the model indicate information loss. + + Raises AssertionError with the offending key(s) if any raw key is missing + from the model's dict() output. + """ + model_dict = parsed_model.dict() + missing = [k for k in raw_result_dict if k not in model_dict] + assert not missing, ( + f"Fields in raw_result_dict not covered by parsed model: {missing}" + ) + + +def summarize_raw_dict(raw: dict) -> dict: + """Extract structure (field names and types) from a result dict, never values. + + Returns a dict with keys: + - raw_field_set: sorted list of field names + - raw_field_types: {field_name: RuntimeTypeName.value} + """ + return { + "raw_field_set": sorted(raw.keys()), + "raw_field_types": {k: classify_runtime_type(v).value for k, v in raw.items()}, + } diff --git a/tests/unit/core/validation_result_schemas/test_runner_helpers.py b/tests/unit/core/validation_result_schemas/test_runner_helpers.py new file mode 100644 index 000000000000..c9c3d075c01e --- /dev/null +++ b/tests/unit/core/validation_result_schemas/test_runner_helpers.py @@ -0,0 +1,209 @@ +"""Unit tests for matrix runner helpers. + +Covers: +- assert_field_set_covered: all raw keys present in model passes; + missing raw key raises AssertionError with key name; model extras are ignored +- summarize_raw_dict: empty dict, scalar/list/dict values, None values; + structure only — never values +- _normalize_engine_hint: pandas passthrough, spark/dataframe normalization, + all SQL dialects collapse to 'sql', unknown types returned as-is + +All tests are marked @pytest.mark.unit and run via: + pytest tests/unit/core/validation_result_schemas/test_runner_helpers.py -m unit +""" +from __future__ import annotations + +from typing import Any, Dict, Optional + +import pytest + +from great_expectations.compatibility import pydantic +from great_expectations.core.validation_result_schemas.types import RuntimeTypeName +from tests.integration.data_sources_and_expectations.expectations import ( + _validation_result_schemas_helpers as _helpers, +) + +_normalize_engine_hint = _helpers._normalize_engine_hint +assert_field_set_covered = _helpers.assert_field_set_covered +summarize_raw_dict = _helpers.summarize_raw_dict + +# --------------------------------------------------------------------------- +# Minimal pydantic model for exercising assert_field_set_covered +# --------------------------------------------------------------------------- + + +class _SimpleModel(pydantic.BaseModel): + """Minimal model with a known field set, plus an extra engine_hint field.""" + + success: Optional[bool] = None + result: Optional[Dict[str, Any]] = None + exception_info: Optional[Dict[str, Any]] = None + engine_hint: Optional[str] = None + + +# --------------------------------------------------------------------------- +# assert_field_set_covered +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_assert_field_set_covered_all_present() -> None: + """When all raw keys exist in the model dict, no assertion is raised.""" + raw = {"success": True, "result": {"observed_value": 42}} + model = _SimpleModel(success=True, result={"observed_value": 42}) + # Should not raise + assert_field_set_covered(raw, model) + + +@pytest.mark.unit +def test_assert_field_set_covered_model_extra_keys_are_ok() -> None: + """Model may have extra keys (engine_hint) not in raw — that's fine.""" + raw = {"success": True} + model = _SimpleModel(success=True, engine_hint="pandas") + # engine_hint is in model but not in raw — should not raise + assert_field_set_covered(raw, model) + + +@pytest.mark.unit +def test_assert_field_set_covered_missing_raw_key_raises() -> None: + """A raw key absent from the model dict causes AssertionError.""" + + class _NarrowModel(pydantic.BaseModel): + success: Optional[bool] = None + + raw = {"success": True, "missing_field": "some_value"} + model = _NarrowModel(success=True) + with pytest.raises(AssertionError, match="missing_field"): + assert_field_set_covered(raw, model) + + +@pytest.mark.unit +def test_assert_field_set_covered_multiple_missing_keys_reported() -> None: + """All absent keys are reported together in the AssertionError message.""" + + class _EmptyModel(pydantic.BaseModel): + pass + + raw = {"key_a": 1, "key_b": 2} + model = _EmptyModel() + with pytest.raises(AssertionError) as exc_info: + assert_field_set_covered(raw, model) + msg = str(exc_info.value) + assert "key_a" in msg + assert "key_b" in msg + + +# --------------------------------------------------------------------------- +# summarize_raw_dict +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_summarize_raw_dict_empty() -> None: + """Empty dict returns empty raw_field_set and raw_field_types.""" + result = summarize_raw_dict({}) + assert result == {"raw_field_set": [], "raw_field_types": {}} + + +@pytest.mark.unit +def test_summarize_raw_dict_scalar_values() -> None: + """Scalar values are classified to the correct RuntimeTypeName.""" + raw = { + "an_int": 42, + "a_float": 3.14, + "a_str": "hello", + "a_bool": True, + } + result = summarize_raw_dict(raw) + assert result["raw_field_set"] == sorted(raw.keys()) + assert result["raw_field_types"]["an_int"] == RuntimeTypeName.INT.value + assert result["raw_field_types"]["a_float"] == RuntimeTypeName.FLOAT.value + assert result["raw_field_types"]["a_str"] == RuntimeTypeName.STR.value + assert result["raw_field_types"]["a_bool"] == RuntimeTypeName.BOOL.value + + +@pytest.mark.unit +def test_summarize_raw_dict_collection_values() -> None: + """list and dict values are classified correctly.""" + raw = { + "a_list": [1, 2, 3], + "a_dict": {"nested": True}, + } + result = summarize_raw_dict(raw) + assert result["raw_field_types"]["a_list"] == RuntimeTypeName.LIST.value + assert result["raw_field_types"]["a_dict"] == RuntimeTypeName.DICT.value + + +@pytest.mark.unit +def test_summarize_raw_dict_none_values() -> None: + """None values are classified as RuntimeTypeName.NONE.""" + raw = {"nullable_field": None} + result = summarize_raw_dict(raw) + assert result["raw_field_types"]["nullable_field"] == RuntimeTypeName.NONE.value + + +@pytest.mark.unit +def test_summarize_raw_dict_field_set_is_sorted() -> None: + """raw_field_set must be in sorted order regardless of insertion order.""" + raw = {"z_last": 1, "a_first": 2, "m_middle": 3} + result = summarize_raw_dict(raw) + assert result["raw_field_set"] == ["a_first", "m_middle", "z_last"] + + +@pytest.mark.unit +def test_summarize_raw_dict_never_includes_values() -> None: + """The result dict must not contain raw field values — only structure.""" + raw = {"secret_value": "do_not_leak_this"} + result = summarize_raw_dict(raw) + # Values should not appear anywhere in the output + assert "do_not_leak_this" not in str(result) + # But the key (structure) should be present + assert "secret_value" in result["raw_field_set"] + + +# --------------------------------------------------------------------------- +# _normalize_engine_hint +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +def test_normalize_engine_hint_pandas() -> None: + assert _normalize_engine_hint("pandas") == "pandas" + + +@pytest.mark.unit +def test_normalize_engine_hint_spark() -> None: + assert _normalize_engine_hint("spark") == "spark" + + +@pytest.mark.unit +def test_normalize_engine_hint_dataframe_to_spark() -> None: + assert _normalize_engine_hint("dataframe") == "spark" + + +@pytest.mark.unit +@pytest.mark.parametrize( + "dialect", + [ + "sql", + "snowflake", + "postgres", + "redshift", + "databricks_sql", + "sqlite", + "bigquery", + "mysql", + "mssql", + ], +) +def test_normalize_engine_hint_sql_dialects(dialect: str) -> None: + """All SQL dialects collapse to 'sql'.""" + assert _normalize_engine_hint(dialect) == "sql" + + +@pytest.mark.unit +def test_normalize_engine_hint_unknown_passthrough() -> None: + """Unknown engine types are returned as-is.""" + assert _normalize_engine_hint("unknown_engine_xyz") == "unknown_engine_xyz" + assert _normalize_engine_hint("dask") == "dask" + assert _normalize_engine_hint("") == "" From 994d5af93caca77007e478b127e4172e354a89f9 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 18:43:36 +0200 Subject: [PATCH 15/20] feat(validation-result-schemas): matrix runner pandas slice + dispatcher fix (task 7.3) --- .../validation_result_schemas/dispatcher.py | 8 +- tests/conftest.py | 6 + .../test_validation_result_schemas_matrix.py | 223 ++++++++++++++++++ 3 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py diff --git a/great_expectations/core/validation_result_schemas/dispatcher.py b/great_expectations/core/validation_result_schemas/dispatcher.py index 871ba36aec8e..f1d10b271941 100644 --- a/great_expectations/core/validation_result_schemas/dispatcher.py +++ b/great_expectations/core/validation_result_schemas/dispatcher.py @@ -246,10 +246,12 @@ def as_typed( family = family_for(expectation_type) schema_cls = _FORMAT_MAP[family][result_format] - # Pass engine_hint into the model when it is known (allows root validators - # inside map schemas to enforce SQL-required fields). + # Pass engine_hint into the model only for map-family schemas. Map schemas + # declare ``engine_hint`` as a field (MapResultBase) so that root validators + # can enforce SQL-required fields. Aggregate schemas do not declare the field + # and use ``extra = Extra.forbid``, so injecting it would raise a ValidationError. data = dict(result_dict) - if eff_engine is not None: + if eff_engine is not None and family == "map": data["engine_hint"] = eff_engine try: diff --git a/tests/conftest.py b/tests/conftest.py index c88af003fcef..34e05775a9ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -287,6 +287,12 @@ def pytest_addoption(parser): action="store_true", help="If set, run performance tests (which might also require additional arguments like --bigquery)", # noqa: E501 # FIXME CoP ) + parser.addoption( + "--vrs-run-id", + action="store", + default=None, + help="Run ID for validation result schema matrix findings file (optional; auto-generated if not set).", # noqa: E501 + ) def build_test_backends_list_v2_api(metafunc): diff --git a/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py new file mode 100644 index 000000000000..97565c87f00d --- /dev/null +++ b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py @@ -0,0 +1,223 @@ +"""Matrix runner for validation result schema coverage. + +Runs every (expectation x result_format x data_source) combination and writes +a structured findings JSON file. Pandas-only in the initial slice (task 7.3). + +Abstract stubs (5 expectations whose ``__init__`` raises ``NotImplementedError``) +cannot be validated; they produce ``status=failed`` findings and the corresponding +test cells are marked as failures — this is expected and documented here. + +Findings file location (relative to the worktree root): + tests/_artifacts/validation_result_schemas/findings/.json +""" +from __future__ import annotations + +import datetime +import random +import string +from typing import TYPE_CHECKING + +import pandas as pd +import pytest + +from great_expectations.core.result_format import ResultFormat +from great_expectations.core.validation_result_schemas.dispatcher import as_typed +from great_expectations.core.validation_result_schemas.findings_emitter import ( + FindingsWriter, +) +from great_expectations.core.validation_result_schemas.types import Status +from tests.integration.conftest import parameterize_batch_for_data_sources +from tests.integration.data_sources_and_expectations.expectations._validation_result_schemas_cases import ( # noqa: E501 + EXPECTATION_CASES, + ExpectationCase, + _AbstractStub, +) +from tests.integration.data_sources_and_expectations.expectations._validation_result_schemas_helpers import ( # noqa: E501 + _normalize_engine_hint, + assert_field_set_covered, + summarize_raw_dict, +) +from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( + JUST_PANDAS_DATA_SOURCES, +) + +if TYPE_CHECKING: + from great_expectations.datasource.fluent.interfaces import Batch + +# --------------------------------------------------------------------------- +# Pandas-only slice — extend to ALL_DATA_SOURCES in a later task +# --------------------------------------------------------------------------- +_PANDAS_DATA = pd.DataFrame( + { + "col_a": [1, 2, 3, None, 5], + "col_b": ["x", "y", "z", "w", None], + "col_c": [1.0, 2.0, None, 4.0, 5.0], + } +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _generate_run_id() -> str: + """Generate a time-stamped run ID when ``--vrs-run-id`` is not supplied.""" + ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ") + suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + return f"{ts}-{suffix}" + + +def _datasource_test_id(batch: Batch) -> str: + """Return a stable identifier for the data source under test.""" + return type(batch.datasource).__name__ + + +# --------------------------------------------------------------------------- +# Session-scoped findings writer fixture +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def _findings_writer(request: pytest.FixtureRequest) -> FindingsWriter: # type: ignore[return] + """Session-scoped FindingsWriter; yields writer, flushes on session teardown.""" + run_id: str = request.config.getoption("--vrs-run-id") or _generate_run_id() + with FindingsWriter(run_id=run_id) as writer: + yield writer + + +# --------------------------------------------------------------------------- +# Matrix test +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("case", EXPECTATION_CASES, ids=lambda c: c.id) +@pytest.mark.parametrize("result_format", list(ResultFormat)) +@parameterize_batch_for_data_sources( + data_source_configs=JUST_PANDAS_DATA_SOURCES, + data=_PANDAS_DATA, +) +def test_validation_result_schema_matrix( + batch_for_datasource: Batch, + case: ExpectationCase, + result_format: ResultFormat, + _findings_writer: FindingsWriter, +) -> None: + """Matrix runner: validate every (expectation x result_format x data_source) cell. + + Abstract-stub expectations (5 total) cannot be instantiated; they produce + ``status=failed`` findings. All other expectations should produce + ``status=parsed`` findings. + """ + engine_hint = _normalize_engine_hint(batch_for_datasource.datasource.type) + datasource_test_id = _datasource_test_id(batch_for_datasource) + + # ------------------------------------------------------------------ + # Guard: abstract stubs cannot be validated — record failure immediately + # ------------------------------------------------------------------ + if isinstance(case.expectation, _AbstractStub): + _findings_writer.write_finding( + { + "expectation_type": case.expectation.expectation_type, + "result_format": result_format.value, + "engine": engine_hint, + "datasource_test_id": datasource_test_id, + "status": Status.FAILED.value, + "error_summary": "AbstractStub: expectation not yet implemented", + } + ) + pytest.skip( + f"[{case.id}][{result_format.value}][{engine_hint}]: abstract stub — skipped" + ) + + expectation_type: str = case.expectation.expectation_type # type: ignore[union-attr] + + try: + raw_evr = batch_for_datasource.validate( + case.expectation, result_format=result_format # type: ignore[arg-type] + ) + except Exception as exc: + _findings_writer.write_finding( + { + "expectation_type": expectation_type, + "result_format": result_format.value, + "engine": engine_hint, + "datasource_test_id": datasource_test_id, + "status": Status.FAILED.value, + "error_summary": f"batch.validate raised: {type(exc).__name__}: {exc}", + } + ) + pytest.fail( + f"[{case.id}][{result_format.value}][{engine_hint}]: " + f"batch.validate raised {type(exc).__name__}: {exc}" + ) + + raw_result: dict = raw_evr.result or {} + + try: + # Call as_typed() via the dispatcher directly so we pass the exact result_format + # that was used for the validate() call. raw_evr.as_typed() reads result_format + # from expectation_config.kwargs which may default to SUMMARY instead of the + # result_format we actually exercised. + typed = as_typed( + raw_result, + expectation_type=expectation_type, + result_format=result_format, + engine_hint=engine_hint, + ) + except Exception as exc: + _findings_writer.write_finding( + { + "expectation_type": expectation_type, + "result_format": result_format.value, + "engine": engine_hint, + "datasource_test_id": datasource_test_id, + "status": Status.FAILED.value, + **summarize_raw_dict(raw_result), + "error_summary": f"as_typed raised: {type(exc).__name__}: {exc}", + } + ) + pytest.fail( + f"[{case.id}][{result_format.value}][{engine_hint}]: " + f"as_typed raised {type(exc).__name__}: {exc}" + ) + + # Coverage assertion: every raw key must appear in the parsed model + try: + assert_field_set_covered(raw_result, typed) + except AssertionError as exc: + _findings_writer.write_finding( + { + "expectation_type": expectation_type, + "result_format": result_format.value, + "engine": engine_hint, + "datasource_test_id": datasource_test_id, + "status": Status.FAILED.value, + **summarize_raw_dict(raw_result), + "matched_variant": type(typed).__name__, + "error_summary": str(exc), + } + ) + pytest.fail( + f"[{case.id}][{result_format.value}][{engine_hint}]: {exc}" + ) + + # Success path — record parsed finding + model_dict: dict = typed.dict() + schema_required = [k for k in raw_result if k in model_dict] + schema_optional = [k for k in model_dict if k not in raw_result] + + _findings_writer.write_finding( + { + "expectation_type": expectation_type, + "result_format": result_format.value, + "engine": engine_hint, + "datasource_test_id": datasource_test_id, + "status": Status.PARSED.value, + **summarize_raw_dict(raw_result), + "matched_variant": type(typed).__name__, + "schema_required_fields_present": schema_required, + "schema_optional_fields_present": schema_optional, + "schema_extras_rejected": [], + } + ) From f6e7cd6b3876ccabedec7f58a8ec1e13ccf3e500 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 19:38:36 +0200 Subject: [PATCH 16/20] feat(validation-result-schemas): expand matrix runner to ALL_DATA_SOURCES (task 8.1) --- pyproject.toml | 1 + .../test_validation_result_schemas_matrix.py | 35 +++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 95b42e486479..f01ecd3d2cef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -703,6 +703,7 @@ markers = [ "spark: mark a test as Spark-dependent.", "spark_connect: mark a test as Spark Connect-dependent.", "trino: mark a test as trino-dependent.", + "no_xdist: mark a test module that must not be split across xdist workers (session-scoped fixtures with shared state).", "unit: mark a test as a unit test.", "v2_api: mark test as specific to the v2 api (e.g. pre Data Connectors).", ] diff --git a/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py index 97565c87f00d..a7d2c5768b72 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py @@ -1,7 +1,7 @@ """Matrix runner for validation result schema coverage. Runs every (expectation x result_format x data_source) combination and writes -a structured findings JSON file. Pandas-only in the initial slice (task 7.3). +a structured findings JSON file. Expanded to ALL_DATA_SOURCES (task 8.1). Abstract stubs (5 expectations whose ``__init__`` raises ``NotImplementedError``) cannot be validated; they produce ``status=failed`` findings and the corresponding @@ -9,6 +9,12 @@ Findings file location (relative to the worktree root): tests/_artifacts/validation_result_schemas/findings/.json + +xdist note: this module uses a session-scoped FindingsWriter; parallelising +within a single session would cause concurrent writes to the same JSON file. +The ``no_xdist`` marker documents this constraint. CI uses ``--dist loadfile`` +which naturally routes all cells from this file to a single worker, so the +constraint is satisfied without extra conftest machinery. """ from __future__ import annotations @@ -38,16 +44,33 @@ summarize_raw_dict, ) from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( - JUST_PANDAS_DATA_SOURCES, + ALL_DATA_SOURCES, ) if TYPE_CHECKING: from great_expectations.datasource.fluent.interfaces import Batch # --------------------------------------------------------------------------- -# Pandas-only slice — extend to ALL_DATA_SOURCES in a later task +# Module-level marker: session-scoped FindingsWriter must not be split across +# xdist workers. CI uses --dist loadfile which enforces this automatically. +# --------------------------------------------------------------------------- +pytestmark = [pytest.mark.no_xdist] + +# --------------------------------------------------------------------------- +# Shared fixture data — a superset DataFrame whose columns cover all cases. +# +# Per-case data-shape variance resolution (task 8.1): +# All EXPECTATION_CASES reference columns that exist in this DataFrame. +# Cases needing specific data shapes (dates, JSON strings, pure numerics) +# will run against this data; the expectation may fail validation (e.g. +# ExpectColumnValuesToBeDateutilParseable against integers), but that is +# fine — we are testing schema *parsing* of whatever result dict comes back, +# not expectation correctness. SQL backends that cannot operate on a +# VARCHAR column for sum/numeric expectations will produce a batch.validate() +# error which is caught, recorded as status=failed, and surfaced to the +# curator exactly as designed. # --------------------------------------------------------------------------- -_PANDAS_DATA = pd.DataFrame( +_MATRIX_DATA = pd.DataFrame( { "col_a": [1, 2, 3, None, 5], "col_b": ["x", "y", "z", "w", None], @@ -94,8 +117,8 @@ def _findings_writer(request: pytest.FixtureRequest) -> FindingsWriter: # type: @pytest.mark.parametrize("case", EXPECTATION_CASES, ids=lambda c: c.id) @pytest.mark.parametrize("result_format", list(ResultFormat)) @parameterize_batch_for_data_sources( - data_source_configs=JUST_PANDAS_DATA_SOURCES, - data=_PANDAS_DATA, + data_source_configs=ALL_DATA_SOURCES, + data=_MATRIX_DATA, ) def test_validation_result_schema_matrix( batch_for_datasource: Batch, From 3e6ec4e03491f24cd218e569a64f47328ba72021 Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 19:43:58 +0200 Subject: [PATCH 17/20] feat(validation-result-schemas): CI artifact upload for findings (task 9.1) --- .../action.yml | 11 +++++++++++ .github/workflows/ci.yml | 12 ++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 .github/actions/upload-validation-result-schemas-findings/action.yml diff --git a/.github/actions/upload-validation-result-schemas-findings/action.yml b/.github/actions/upload-validation-result-schemas-findings/action.yml new file mode 100644 index 000000000000..733342bb77e3 --- /dev/null +++ b/.github/actions/upload-validation-result-schemas-findings/action.yml @@ -0,0 +1,11 @@ +name: Upload VRS Findings +description: Upload validation result schema findings artifact +runs: + using: composite + steps: + - uses: actions/upload-artifact@v4 + if: always() + with: + name: validation-result-schemas-findings + path: tests/_artifacts/validation_result_schemas/findings/ + if-no-files-found: ignore diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ae16542343e..15ce29c2cec5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -370,6 +370,10 @@ jobs: # TODO: revert the timeout back to 1.5 or lower after resolving arc issues run: invoke ci-tests -m "unit" --xdist --slowest=10 --timeout=2.0 --reports + - name: Upload validation result schema findings + if: always() + uses: ./.github/actions/upload-validation-result-schemas-findings + # upload coverage report to codecov - name: Upload coverage reports to Codecov continue-on-error: true @@ -843,6 +847,10 @@ jobs: esac invoke ci-tests '${{ matrix.markers }}' --up-services --verbose --reports $FLAGS + - name: Upload validation result schema findings + if: always() + uses: ./.github/actions/upload-validation-result-schemas-findings + # upload coverage report to codecov - name: Upload coverage reports to Codecov continue-on-error: true @@ -943,6 +951,10 @@ jobs: env: SHARD: ${{ matrix.shard }} + - name: Upload validation result schema findings + if: always() + uses: ./.github/actions/upload-validation-result-schemas-findings + # upload coverage report to codecov - name: Upload coverage reports to Codecov continue-on-error: true From 68a88e64688ba0c0eb055ca886f5cedeabb2823a Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 20:19:58 +0200 Subject: [PATCH 18/20] =?UTF-8?q?fix(validation-result-schemas):=20schema?= =?UTF-8?q?=20gap=20fixes=20from=20matrix=20run=20=E2=80=94=20unexpected?= =?UTF-8?q?=5Fcount,=20observed=5Fvalue=20fields=20(task=2011.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../schemas/aggregate_result.py | 10 +++++++++- .../validation_result_schemas/schemas/map_result.py | 13 ++++++++++++- .../schemas/per_expectation_overrides.py | 13 ++++++++++--- .../validation_result_schemas/test_format_config.py | 2 +- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py index addf814841a7..97780f342b04 100644 --- a/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py +++ b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py @@ -57,11 +57,19 @@ class AggregateBooleanOnlyResult(AggregateResultBase): class AggregateBasicResult(AggregateResultBase): - """ResultFormat.BASIC — counts, percents, and partial lists.""" + """ResultFormat.BASIC — counts, percents, and partial lists. + + Note: ``unexpected_count`` is included here because a subset of aggregate + expectations (e.g. ``expect_column_distinct_values_to_equal_set``) emit + it alongside the standard aggregate fields. It is Optional so that the + majority of aggregate expectations — which do *not* emit it — continue to + validate cleanly. + """ element_count: Optional[int] = None missing_count: Optional[int] = None missing_percent: Optional[float] = None + unexpected_count: Optional[int] = None partial_unexpected_list: Optional[List[Any]] = None partial_missing_list: Optional[List[Any]] = None diff --git a/great_expectations/core/validation_result_schemas/schemas/map_result.py b/great_expectations/core/validation_result_schemas/schemas/map_result.py index 7d884ed2ecd4..116a735e56bb 100644 --- a/great_expectations/core/validation_result_schemas/schemas/map_result.py +++ b/great_expectations/core/validation_result_schemas/schemas/map_result.py @@ -63,7 +63,15 @@ class MapBooleanOnlyResult(MapResultBase): class MapBasicResult(MapResultBase): - """ResultFormat.BASIC — counts, percents, and the partial unexpected list.""" + """ResultFormat.BASIC — counts, percents, and the partial unexpected list. + + Note: ``observed_value`` is included here because a small set of map + expectations (e.g. ``expect_column_values_to_be_of_type``, + ``expect_column_values_to_be_in_type_list``) emit it alongside the + standard map fields on the pandas engine path. It is Optional so that + the majority of map expectations — which do *not* emit it — continue to + validate cleanly. + """ element_count: Optional[int] = None unexpected_count: Optional[int] = None @@ -73,6 +81,9 @@ class MapBasicResult(MapResultBase): unexpected_percent_total: Optional[float] = None unexpected_percent_nonmissing: Optional[float] = None partial_unexpected_list: Optional[List[Any]] = None + # Some map expectations (e.g. expect_column_values_to_be_of_type on pandas) + # emit observed_value alongside the standard map fields. + observed_value: Optional[Any] = None # engine-typed; classified at runtime, not validated by type unexpected_rows: Any = None diff --git a/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py index 466d289bd803..5ca246d0d167 100644 --- a/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py +++ b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py @@ -11,15 +11,22 @@ """ from __future__ import annotations +from typing import Optional + from great_expectations.compatibility import pydantic from great_expectations.compatibility.pydantic import BaseModel class ExpectColumnValuesToBeOfTypeSqlSparkResult(BaseModel): - """ExpectColumnValuesToBeOfType bypasses _format_map_output on SQL/Spark - and emits {observed_value: } only.""" + """ExpectColumnValuesToBeOfType bypasses _format_map_output on SQL/Spark. + + For BASIC / SUMMARY / COMPLETE formats, the result dict contains only + ``{observed_value: }``. For BOOLEAN_ONLY format the result + dict is empty ``{}``, so ``observed_value`` must be Optional here to allow + both cases through the same override schema. + """ class Config: extra = pydantic.Extra.forbid - observed_value: str + observed_value: Optional[str] = None diff --git a/tests/unit/core/validation_result_schemas/test_format_config.py b/tests/unit/core/validation_result_schemas/test_format_config.py index 8b1936a54ae9..566cff6d4216 100644 --- a/tests/unit/core/validation_result_schemas/test_format_config.py +++ b/tests/unit/core/validation_result_schemas/test_format_config.py @@ -160,7 +160,7 @@ def test_partial_unexpected_count_preserved_when_supplied() -> None: @pytest.mark.unit def test_result_format_config_required_is_typeddict() -> None: - """Confirm ResultFormatConfigRequired is a TypedDict class (not a runtime check, but importable).""" + """Confirm ResultFormatConfigRequired is a TypedDict (not a runtime check, but importable).""" # Verify the class exists and has the expected annotations annotations = ResultFormatConfigRequired.__annotations__ assert "result_format" in annotations From 23a5e45cafc6fd70ae20e3061321889622614654 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 18:25:10 +0000 Subject: [PATCH 19/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../core/expectation_validation_result.py | 4 +- .../validation_result_schemas/__init__.py | 1 + .../validation_result_schemas/dispatcher.py | 15 +-- .../field_validators.py | 5 +- .../findings_emitter.py | 9 +- .../format_config.py | 1 + .../schemas/__init__.py | 1 + .../schemas/aggregate_result.py | 1 + .../schemas/map_result.py | 11 +-- .../schemas/per_expectation_overrides.py | 1 + .../core/validation_result_schemas/types.py | 1 + .../_validation_result_schemas_cases.py | 93 +++++-------------- .../_validation_result_schemas_helpers.py | 5 +- .../test_validation_result_schemas_matrix.py | 12 +-- .../test_as_typed.py | 1 + .../test_cases_table.py | 23 ++--- .../test_dispatcher.py | 15 ++- .../test_field_validators.py | 13 +-- .../test_findings_emitter.py | 5 +- .../test_format_config.py | 1 + .../test_runner_helpers.py | 1 + .../test_schemas_aggregate.py | 1 + .../test_schemas_map.py | 1 + .../test_schemas_overrides.py | 1 + 24 files changed, 89 insertions(+), 133 deletions(-) diff --git a/great_expectations/core/expectation_validation_result.py b/great_expectations/core/expectation_validation_result.py index 937de87aca9f..a37dc8ae0ab8 100644 --- a/great_expectations/core/expectation_validation_result.py +++ b/great_expectations/core/expectation_validation_result.py @@ -419,9 +419,7 @@ def as_typed(self, *, engine_hint: Optional[str] = None): else: result_format = result_format_value - expectation_type = ( - self.expectation_config.type if self.expectation_config else "unknown" - ) + expectation_type = self.expectation_config.type if self.expectation_config else "unknown" return as_typed( self.result or {}, diff --git a/great_expectations/core/validation_result_schemas/__init__.py b/great_expectations/core/validation_result_schemas/__init__.py index cea600074c03..25da4c170e43 100644 --- a/great_expectations/core/validation_result_schemas/__init__.py +++ b/great_expectations/core/validation_result_schemas/__init__.py @@ -3,6 +3,7 @@ Re-exports are populated as implementation tasks land. This package is not added to great_expectations/__init__.py and contains no @public_api symbols. """ + from great_expectations.core.validation_result_schemas.dispatcher import ( ParseError, Result, diff --git a/great_expectations/core/validation_result_schemas/dispatcher.py b/great_expectations/core/validation_result_schemas/dispatcher.py index f1d10b271941..f8c28eec8463 100644 --- a/great_expectations/core/validation_result_schemas/dispatcher.py +++ b/great_expectations/core/validation_result_schemas/dispatcher.py @@ -13,6 +13,7 @@ - No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. - No direct ``import pydantic``. """ + from __future__ import annotations from typing import Any, Dict, Optional, Union @@ -68,19 +69,13 @@ class ParseError(Exception): def _override_parse_error_msg( expectation_type: str, eff_engine: Optional[str], cls_name: str, exc: object ) -> str: - return ( - f"Failed to parse {expectation_type!r} with engine={eff_engine!r} " - f"as {cls_name}: {exc}" - ) + return f"Failed to parse {expectation_type!r} with engine={eff_engine!r} as {cls_name}: {exc}" def _family_parse_error_msg( expectation_type: str, fmt_value: str, cls_name: str, exc: object ) -> str: - return ( - f"Failed to parse {expectation_type!r} ({fmt_value}) " - f"as {cls_name}: {exc}" - ) + return f"Failed to parse {expectation_type!r} ({fmt_value}) as {cls_name}: {exc}" # --------------------------------------------------------------------------- @@ -258,7 +253,5 @@ def as_typed( return schema_cls(**data) except pydantic.ValidationError as exc: raise ParseError( - _family_parse_error_msg( - expectation_type, result_format.value, schema_cls.__name__, exc - ) + _family_parse_error_msg(expectation_type, result_format.value, schema_cls.__name__, exc) ) from exc diff --git a/great_expectations/core/validation_result_schemas/field_validators.py b/great_expectations/core/validation_result_schemas/field_validators.py index 4f2ed63efd81..15a5acc4029f 100644 --- a/great_expectations/core/validation_result_schemas/field_validators.py +++ b/great_expectations/core/validation_result_schemas/field_validators.py @@ -9,6 +9,7 @@ - ``RuntimeTypeName`` comes from ``validation_result_schemas.types``. - No direct ``import pydantic``, no PEP 604 unions. """ + from __future__ import annotations from typing import Any, Optional @@ -87,9 +88,7 @@ def validate_unexpected_rows_passthrough(cls: Any, v: Any) -> Any: return v -def validate_partial_unexpected_counts_fallback( - cls: Any, v: Optional[list] -) -> Optional[list]: +def validate_partial_unexpected_counts_fallback(cls: Any, v: Optional[list]) -> Optional[list]: """v1 validator for ``partial_unexpected_counts``. Accepts the two documented shapes: diff --git a/great_expectations/core/validation_result_schemas/findings_emitter.py b/great_expectations/core/validation_result_schemas/findings_emitter.py index 91db8c9fda06..a72eee64fc4a 100644 --- a/great_expectations/core/validation_result_schemas/findings_emitter.py +++ b/great_expectations/core/validation_result_schemas/findings_emitter.py @@ -13,6 +13,7 @@ ``(expectation_type, engine, result_format)`` for deterministic diffs across runs. """ + from __future__ import annotations import json @@ -57,9 +58,7 @@ class FindingsWriter: def __init__(self, run_id: str, output_dir: Optional[Path] = None) -> None: self._run_id = run_id self._findings: List[Finding] = [] - self._started_at_utc: str = datetime.now(timezone.utc).strftime( - "%Y-%m-%dT%H:%M:%SZ" - ) + self._started_at_utc: str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Directory resolution: arg → env var → _DEFAULT_DIR if output_dir is not None: @@ -79,9 +78,7 @@ def write_finding(self, finding: Finding) -> None: def close(self) -> None: """Sort findings and write them atomically to the output file.""" - completed_at_utc: str = datetime.now(timezone.utc).strftime( - "%Y-%m-%dT%H:%M:%SZ" - ) + completed_at_utc: str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Sort deterministically by (expectation_type, engine, result_format) sorted_findings: List[Finding] = sorted( diff --git a/great_expectations/core/validation_result_schemas/format_config.py b/great_expectations/core/validation_result_schemas/format_config.py index c60ac0bd17e7..5e4782092a24 100644 --- a/great_expectations/core/validation_result_schemas/format_config.py +++ b/great_expectations/core/validation_result_schemas/format_config.py @@ -3,6 +3,7 @@ These types are not part of the public API and must not be exported via great_expectations/__init__.py or decorated with @public_api. """ + from __future__ import annotations from typing import TypedDict diff --git a/great_expectations/core/validation_result_schemas/schemas/__init__.py b/great_expectations/core/validation_result_schemas/schemas/__init__.py index 42b07231ff1a..717183678941 100644 --- a/great_expectations/core/validation_result_schemas/schemas/__init__.py +++ b/great_expectations/core/validation_result_schemas/schemas/__init__.py @@ -1,4 +1,5 @@ """Schema family re-exports. Populated as schema tasks land.""" + from great_expectations.core.validation_result_schemas.schemas.aggregate_result import ( AggregateBasicResult, AggregateBooleanOnlyResult, diff --git a/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py index 97780f342b04..0ba9f0ddb708 100644 --- a/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py +++ b/great_expectations/core/validation_result_schemas/schemas/aggregate_result.py @@ -16,6 +16,7 @@ - No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. - No direct ``import pydantic``. """ + from __future__ import annotations from typing import Any, Dict, List, Optional, Union diff --git a/great_expectations/core/validation_result_schemas/schemas/map_result.py b/great_expectations/core/validation_result_schemas/schemas/map_result.py index 116a735e56bb..b4ac3f42f203 100644 --- a/great_expectations/core/validation_result_schemas/schemas/map_result.py +++ b/great_expectations/core/validation_result_schemas/schemas/map_result.py @@ -16,6 +16,7 @@ - No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. - No direct ``import pydantic``. """ + from __future__ import annotations from typing import Any, List, Optional @@ -98,9 +99,9 @@ class MapSummaryResult(MapBasicResult): partial_unexpected_counts: Optional[List[Any]] = None partial_unexpected_index_list: Optional[List[Any]] = None - _validate_counts = pydantic.validator( - "partial_unexpected_counts", pre=True, allow_reuse=True - )(validate_partial_unexpected_counts_fallback) + _validate_counts = pydantic.validator("partial_unexpected_counts", pre=True, allow_reuse=True)( + validate_partial_unexpected_counts_fallback + ) class MapCompleteResult(MapSummaryResult): @@ -114,6 +115,4 @@ class MapCompleteResult(MapSummaryResult): unexpected_list: Optional[List[Any]] = None unexpected_index_list: Optional[List[Any]] = None - _root_validate = pydantic.root_validator(allow_reuse=True)( - root_validate_engine_required_fields - ) + _root_validate = pydantic.root_validator(allow_reuse=True)(root_validate_engine_required_fields) diff --git a/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py index 5ca246d0d167..7e9246666440 100644 --- a/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py +++ b/great_expectations/core/validation_result_schemas/schemas/per_expectation_overrides.py @@ -9,6 +9,7 @@ - No PEP 604 unions (``X | Y``); use ``Optional[X]`` or ``Union[X, Y]``. - No direct ``import pydantic``. """ + from __future__ import annotations from typing import Optional diff --git a/great_expectations/core/validation_result_schemas/types.py b/great_expectations/core/validation_result_schemas/types.py index 20384563ba18..2f4c401ca08a 100644 --- a/great_expectations/core/validation_result_schemas/types.py +++ b/great_expectations/core/validation_result_schemas/types.py @@ -3,6 +3,7 @@ Defines the enumeration types and TypedDicts used across the validation_result_schemas package. """ + from __future__ import annotations from enum import Enum diff --git a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py index 7879d20606e3..d6468a5517fc 100644 --- a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py +++ b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases.py @@ -10,6 +10,7 @@ lightweight ``_AbstractStub`` object that carries the correct ``expectation_type`` string so that the ``family_for`` lookup test still passes. """ + from __future__ import annotations from typing import List, Mapping, NamedTuple, Optional @@ -142,9 +143,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_be_between", - expectation=gxe.ExpectColumnValuesToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnValuesToBeBetween(column="col_a", min_value=0, max_value=10), data=_DEFAULT_DATA, ), ExpectationCase( @@ -159,9 +158,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_be_in_set", - expectation=gxe.ExpectColumnValuesToBeInSet( - column="col_a", value_set=[1, 2, 3, None, 5] - ), + expectation=gxe.ExpectColumnValuesToBeInSet(column="col_a", value_set=[1, 2, 3, None, 5]), data=_DEFAULT_DATA, ), ExpectationCase( @@ -205,9 +202,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_match_like_pattern", - expectation=gxe.ExpectColumnValuesToMatchLikePattern( - column="col_b", like_pattern="%" - ), + expectation=gxe.ExpectColumnValuesToMatchLikePattern(column="col_b", like_pattern="%"), data=_DEFAULT_DATA, ), ExpectationCase( @@ -219,9 +214,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_match_regex", - expectation=gxe.ExpectColumnValuesToMatchRegex( - column="col_b", regex="^[a-z]$" - ), + expectation=gxe.ExpectColumnValuesToMatchRegex(column="col_b", regex="^[a-z]$"), data=_DEFAULT_DATA, ), ExpectationCase( @@ -240,9 +233,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_not_be_in_set", - expectation=gxe.ExpectColumnValuesToNotBeInSet( - column="col_a", value_set=[99, 100] - ), + expectation=gxe.ExpectColumnValuesToNotBeInSet(column="col_a", value_set=[99, 100]), data=_DEFAULT_DATA, ), ExpectationCase( @@ -266,9 +257,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_values_to_not_match_regex", - expectation=gxe.ExpectColumnValuesToNotMatchRegex( - column="col_b", regex="^[0-9]+$" - ), + expectation=gxe.ExpectColumnValuesToNotMatchRegex(column="col_b", regex="^[0-9]+$"), data=_DEFAULT_DATA, ), ExpectationCase( @@ -283,16 +272,12 @@ class ExpectationCase(NamedTuple): # ------------------------------------------------------------------ ExpectationCase( id="expect_column_pair_values_a_to_be_greater_than_b", - expectation=gxe.ExpectColumnPairValuesAToBeGreaterThanB( - column_A="col_a", column_B="col_b" - ), + expectation=gxe.ExpectColumnPairValuesAToBeGreaterThanB(column_A="col_a", column_B="col_b"), data=_PAIR_DATA, ), ExpectationCase( id="expect_column_pair_values_to_be_equal", - expectation=gxe.ExpectColumnPairValuesToBeEqual( - column_A="col_a", column_B="col_b" - ), + expectation=gxe.ExpectColumnPairValuesToBeEqual(column_A="col_a", column_B="col_b"), data=_PAIR_DATA, ), ExpectationCase( @@ -309,16 +294,12 @@ class ExpectationCase(NamedTuple): # ------------------------------------------------------------------ ExpectationCase( id="expect_compound_columns_to_be_unique", - expectation=gxe.ExpectCompoundColumnsToBeUnique( - column_list=["col_a", "col_b"] - ), + expectation=gxe.ExpectCompoundColumnsToBeUnique(column_list=["col_a", "col_b"]), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_multicolumn_sum_to_equal", - expectation=gxe.ExpectMulticolumnSumToEqual( - column_list=["col_a", "col_b"], sum_total=3 - ), + expectation=gxe.ExpectMulticolumnSumToEqual(column_list=["col_a", "col_b"], sum_total=3), data=pd.DataFrame( { "col_a": [1, 2, 3, None, 2], @@ -347,17 +328,13 @@ class ExpectationCase(NamedTuple): ExpectationCase( id="expect_column_bootstrapped_ks_test_p_value_to_be_greater_than", # This expectation is not yet migrated; __init__ raises NotImplementedError. - expectation=_AbstractStub( - "expect_column_bootstrapped_ks_test_p_value_to_be_greater_than" - ), + expectation=_AbstractStub("expect_column_bootstrapped_ks_test_p_value_to_be_greater_than"), data=_NUMERIC_DATA, ), ExpectationCase( id="expect_column_chisquare_test_p_value_to_be_greater_than", # Not yet migrated; __init__ raises NotImplementedError. - expectation=_AbstractStub( - "expect_column_chisquare_test_p_value_to_be_greater_than" - ), + expectation=_AbstractStub("expect_column_chisquare_test_p_value_to_be_greater_than"), data=_DEFAULT_DATA, ), ExpectationCase( @@ -369,9 +346,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_distinct_values_to_contain_set", - expectation=gxe.ExpectColumnDistinctValuesToContainSet( - column="col_a", value_set=[1, 2] - ), + expectation=gxe.ExpectColumnDistinctValuesToContainSet(column="col_a", value_set=[1, 2]), data=_DEFAULT_DATA, ), ExpectationCase( @@ -394,46 +369,34 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_max_to_be_between", - expectation=gxe.ExpectColumnMaxToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnMaxToBeBetween(column="col_a", min_value=0, max_value=10), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_column_mean_to_be_between", - expectation=gxe.ExpectColumnMeanToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnMeanToBeBetween(column="col_a", min_value=0, max_value=10), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_column_median_to_be_between", - expectation=gxe.ExpectColumnMedianToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnMedianToBeBetween(column="col_a", min_value=0, max_value=10), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_column_min_to_be_between", - expectation=gxe.ExpectColumnMinToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnMinToBeBetween(column="col_a", min_value=0, max_value=10), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_column_most_common_value_to_be_in_set", - expectation=gxe.ExpectColumnMostCommonValueToBeInSet( - column="col_a", value_set=[1, 2, 3] - ), + expectation=gxe.ExpectColumnMostCommonValueToBeInSet(column="col_a", value_set=[1, 2, 3]), data=_DEFAULT_DATA, ), ExpectationCase( id="expect_column_pair_cramers_phi_value_to_be_less_than", # Uses column_A / column_B (not in the public gxe API as of this version; # import directly from the core module). - expectation=_AbstractStub( - "expect_column_pair_cramers_phi_value_to_be_less_than" - ), + expectation=_AbstractStub("expect_column_pair_cramers_phi_value_to_be_less_than"), data=_PAIR_DATA, ), ExpectationCase( @@ -471,16 +434,12 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_column_stdev_to_be_between", - expectation=gxe.ExpectColumnStdevToBeBetween( - column="col_a", min_value=0, max_value=10 - ), + expectation=gxe.ExpectColumnStdevToBeBetween(column="col_a", min_value=0, max_value=10), data=_NUMERIC_DATA, ), ExpectationCase( id="expect_column_sum_to_be_between", - expectation=gxe.ExpectColumnSumToBeBetween( - column="col_a", min_value=0, max_value=100 - ), + expectation=gxe.ExpectColumnSumToBeBetween(column="col_a", min_value=0, max_value=100), data=_DEFAULT_DATA, ), ExpectationCase( @@ -526,9 +485,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_table_columns_to_match_set", - expectation=gxe.ExpectTableColumnsToMatchSet( - column_set=["col_a", "col_b", "col_c"] - ), + expectation=gxe.ExpectTableColumnsToMatchSet(column_set=["col_a", "col_b", "col_c"]), data=_DEFAULT_DATA, ), ExpectationCase( @@ -543,9 +500,7 @@ class ExpectationCase(NamedTuple): ), ExpectationCase( id="expect_table_row_count_to_equal_other_table", - expectation=gxe.ExpectTableRowCountToEqualOtherTable( - other_table_name="other_table" - ), + expectation=gxe.ExpectTableRowCountToEqualOtherTable(other_table_name="other_table"), data=_DEFAULT_DATA, ), ] diff --git a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py index 9508d2919ef8..84b1f5aab02c 100644 --- a/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py +++ b/tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_helpers.py @@ -6,6 +6,7 @@ They are intentionally free of test framework dependencies so they can be used in both pytest fixtures and standalone scripts. """ + from __future__ import annotations from great_expectations.core.validation_result_schemas.field_validators import ( @@ -58,9 +59,7 @@ def assert_field_set_covered(raw_result_dict: dict, parsed_model) -> None: """ model_dict = parsed_model.dict() missing = [k for k in raw_result_dict if k not in model_dict] - assert not missing, ( - f"Fields in raw_result_dict not covered by parsed model: {missing}" - ) + assert not missing, f"Fields in raw_result_dict not covered by parsed model: {missing}" def summarize_raw_dict(raw: dict) -> dict: diff --git a/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py index a7d2c5768b72..143aabf52592 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix.py @@ -16,6 +16,7 @@ which naturally routes all cells from this file to a single worker, so the constraint is satisfied without extra conftest machinery. """ + from __future__ import annotations import datetime @@ -149,15 +150,14 @@ def test_validation_result_schema_matrix( "error_summary": "AbstractStub: expectation not yet implemented", } ) - pytest.skip( - f"[{case.id}][{result_format.value}][{engine_hint}]: abstract stub — skipped" - ) + pytest.skip(f"[{case.id}][{result_format.value}][{engine_hint}]: abstract stub — skipped") expectation_type: str = case.expectation.expectation_type # type: ignore[union-attr] try: raw_evr = batch_for_datasource.validate( - case.expectation, result_format=result_format # type: ignore[arg-type] + case.expectation, + result_format=result_format, # type: ignore[arg-type] ) except Exception as exc: _findings_writer.write_finding( @@ -221,9 +221,7 @@ def test_validation_result_schema_matrix( "error_summary": str(exc), } ) - pytest.fail( - f"[{case.id}][{result_format.value}][{engine_hint}]: {exc}" - ) + pytest.fail(f"[{case.id}][{result_format.value}][{engine_hint}]: {exc}") # Success path — record parsed finding model_dict: dict = typed.dict() diff --git a/tests/unit/core/validation_result_schemas/test_as_typed.py b/tests/unit/core/validation_result_schemas/test_as_typed.py index 9ec2e546b0b0..b1a570869c89 100644 --- a/tests/unit/core/validation_result_schemas/test_as_typed.py +++ b/tests/unit/core/validation_result_schemas/test_as_typed.py @@ -11,6 +11,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_as_typed.py -m unit -v """ + from __future__ import annotations import json diff --git a/tests/unit/core/validation_result_schemas/test_cases_table.py b/tests/unit/core/validation_result_schemas/test_cases_table.py index 00913e21bb6e..901edf9cda3b 100644 --- a/tests/unit/core/validation_result_schemas/test_cases_table.py +++ b/tests/unit/core/validation_result_schemas/test_cases_table.py @@ -5,6 +5,7 @@ (b) every expectation_type covered by ``family_for`` returns 'map' or 'aggregate' (c) the case count equals the number of expect_*.py files under core/ """ + from __future__ import annotations from pathlib import Path @@ -22,9 +23,7 @@ @pytest.mark.unit def test_case_ids_are_unique() -> None: ids = [c.id for c in EXPECTATION_CASES] - assert len(ids) == len(set(ids)), ( - f"Duplicate ids: {sorted(i for i in ids if ids.count(i) > 1)}" - ) + assert len(ids) == len(set(ids)), f"Duplicate ids: {sorted(i for i in ids if ids.count(i) > 1)}" @pytest.mark.unit @@ -32,21 +31,23 @@ def test_all_expectation_types_in_family_table() -> None: for case in EXPECTATION_CASES: exp_type = case.expectation.expectation_type family = family_for(exp_type) - assert family in ("map", "aggregate"), ( - f"{exp_type!r} returned unexpected family {family!r}" - ) + assert family in ("map", "aggregate"), f"{exp_type!r} returned unexpected family {family!r}" @pytest.mark.unit def test_case_count_matches_core_expectations() -> None: core_dir = ( - Path(__file__).parent / ".." / ".." / ".." / ".." - / "great_expectations" / "expectations" / "core" + Path(__file__).parent + / ".." + / ".." + / ".." + / ".." + / "great_expectations" + / "expectations" + / "core" ) core_files = list(core_dir.glob("expect_*.py")) - expected_count = len( - [f for f in core_files if not f.name.startswith("__")] - ) + expected_count = len([f for f in core_files if not f.name.startswith("__")]) assert len(EXPECTATION_CASES) == expected_count, ( f"Expected {expected_count} cases, got {len(EXPECTATION_CASES)}" ) diff --git a/tests/unit/core/validation_result_schemas/test_dispatcher.py b/tests/unit/core/validation_result_schemas/test_dispatcher.py index 36a7148838d4..655f8ec8f1aa 100644 --- a/tests/unit/core/validation_result_schemas/test_dispatcher.py +++ b/tests/unit/core/validation_result_schemas/test_dispatcher.py @@ -11,6 +11,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_dispatcher.py -m unit -v """ + from __future__ import annotations from pathlib import Path @@ -369,14 +370,18 @@ def test_parse_error_wraps_validation_error(self): def test_family_table_covers_core_expectations(): """Every expect_*.py file in expectations/core/ must appear in _FAMILY_TABLE.""" core_dir = ( - Path(__file__).parent / ".." / ".." / ".." / ".." - / "great_expectations" / "expectations" / "core" + Path(__file__).parent + / ".." + / ".." + / ".." + / ".." + / "great_expectations" + / "expectations" + / "core" ) core_files = list(core_dir.glob("expect_*.py")) expectation_names = { - f.name.replace(".py", "") - for f in core_files - if not f.name.startswith("__") + f.name.replace(".py", "") for f in core_files if not f.name.startswith("__") } missing = expectation_names - set(_FAMILY_TABLE.keys()) assert not missing, f"Missing from _FAMILY_TABLE: {sorted(missing)}" diff --git a/tests/unit/core/validation_result_schemas/test_field_validators.py b/tests/unit/core/validation_result_schemas/test_field_validators.py index e51515798066..8bab1ac360cc 100644 --- a/tests/unit/core/validation_result_schemas/test_field_validators.py +++ b/tests/unit/core/validation_result_schemas/test_field_validators.py @@ -9,6 +9,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_field_validators.py -m unit """ + from __future__ import annotations from typing import Any, Dict, List, Optional @@ -34,9 +35,9 @@ class _PartialCountsModel(pydantic.BaseModel): partial_unexpected_counts: Optional[List[Any]] = None - _validate_counts = pydantic.validator( - "partial_unexpected_counts", pre=True, allow_reuse=True - )(validate_partial_unexpected_counts_fallback) + _validate_counts = pydantic.validator("partial_unexpected_counts", pre=True, allow_reuse=True)( + validate_partial_unexpected_counts_fallback + ) class _PassthroughModel(pydantic.BaseModel): @@ -63,9 +64,7 @@ class _EngineHintModel(pydantic.BaseModel): return_unexpected_index_query: Optional[bool] = None unexpected_index_query: Optional[str] = None - _root_validate = pydantic.root_validator(allow_reuse=True)( - root_validate_engine_required_fields - ) + _root_validate = pydantic.root_validator(allow_reuse=True)(root_validate_engine_required_fields) # --------------------------------------------------------------------------- @@ -128,6 +127,7 @@ def test_classify_pandas_dataframe() -> None: def test_classify_spark_dataframe_other_when_pyspark_unavailable() -> None: """When pyspark is unavailable, a mock object named DataFrame from pyspark should be classified as DATAFRAME_SPARK if it looks like pyspark, or OTHER otherwise.""" + # Without actual pyspark, we simulate the check using a mock # The classifier should detect pyspark via module path inspection class _FakeSparkDataFrame: @@ -154,6 +154,7 @@ class _CustomObject: @pytest.mark.unit def test_classify_never_raises() -> None: """classify_runtime_type must never raise regardless of input.""" + # Includes edge cases: class instances, iterators, generators class _WeirdObject: def __class_getitem__(cls, item: Any) -> Any: diff --git a/tests/unit/core/validation_result_schemas/test_findings_emitter.py b/tests/unit/core/validation_result_schemas/test_findings_emitter.py index 853d2137a7c3..d5425612b737 100644 --- a/tests/unit/core/validation_result_schemas/test_findings_emitter.py +++ b/tests/unit/core/validation_result_schemas/test_findings_emitter.py @@ -9,6 +9,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_findings_emitter.py -m unit """ + from __future__ import annotations import json @@ -204,9 +205,7 @@ def test_explicit_output_dir_overrides_env_var( @pytest.mark.unit -def test_default_dir_used_when_no_env_var( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_default_dir_used_when_no_env_var(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: """When neither arg nor env var is set, _DEFAULT_DIR is used.""" monkeypatch.delenv(_ENV_VAR, raising=False) diff --git a/tests/unit/core/validation_result_schemas/test_format_config.py b/tests/unit/core/validation_result_schemas/test_format_config.py index 566cff6d4216..e2670915dd03 100644 --- a/tests/unit/core/validation_result_schemas/test_format_config.py +++ b/tests/unit/core/validation_result_schemas/test_format_config.py @@ -3,6 +3,7 @@ Round-trips parse_result_format() output under each ResultFormat value, asserting required keys are present and optional keys behave correctly. """ + from __future__ import annotations import pytest diff --git a/tests/unit/core/validation_result_schemas/test_runner_helpers.py b/tests/unit/core/validation_result_schemas/test_runner_helpers.py index c9c3d075c01e..544242c5dd78 100644 --- a/tests/unit/core/validation_result_schemas/test_runner_helpers.py +++ b/tests/unit/core/validation_result_schemas/test_runner_helpers.py @@ -11,6 +11,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_runner_helpers.py -m unit """ + from __future__ import annotations from typing import Any, Dict, Optional diff --git a/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py b/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py index 1ffe7fb6d511..458d39dcce8d 100644 --- a/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py +++ b/tests/unit/core/validation_result_schemas/test_schemas_aggregate.py @@ -12,6 +12,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_schemas_aggregate.py -m unit """ + from __future__ import annotations import pytest diff --git a/tests/unit/core/validation_result_schemas/test_schemas_map.py b/tests/unit/core/validation_result_schemas/test_schemas_map.py index 9a1d8f5fe064..ac0b666598a2 100644 --- a/tests/unit/core/validation_result_schemas/test_schemas_map.py +++ b/tests/unit/core/validation_result_schemas/test_schemas_map.py @@ -13,6 +13,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_schemas_map.py -m unit """ + from __future__ import annotations import pytest diff --git a/tests/unit/core/validation_result_schemas/test_schemas_overrides.py b/tests/unit/core/validation_result_schemas/test_schemas_overrides.py index e2317bb8bef7..6db8a7391eb4 100644 --- a/tests/unit/core/validation_result_schemas/test_schemas_overrides.py +++ b/tests/unit/core/validation_result_schemas/test_schemas_overrides.py @@ -9,6 +9,7 @@ All tests are marked @pytest.mark.unit and run via: pytest tests/unit/core/validation_result_schemas/test_schemas_overrides.py -m unit """ + from __future__ import annotations import pytest From f1f88476a77dc2293d92f169ebcde406f904d25a Mon Sep 17 00:00:00 2001 From: Josh Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Thu, 7 May 2026 20:53:10 +0200 Subject: [PATCH 20/20] fix(validation-result-schemas): remove unused type: ignore in emitter; add test files to mypy exclude --- .../core/validation_result_schemas/findings_emitter.py | 6 +++--- pyproject.toml | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/great_expectations/core/validation_result_schemas/findings_emitter.py b/great_expectations/core/validation_result_schemas/findings_emitter.py index a72eee64fc4a..7ddf43d9b164 100644 --- a/great_expectations/core/validation_result_schemas/findings_emitter.py +++ b/great_expectations/core/validation_result_schemas/findings_emitter.py @@ -84,9 +84,9 @@ def close(self) -> None: sorted_findings: List[Finding] = sorted( self._findings, key=lambda f: ( - f.get("expectation_type", ""), # type: ignore[call-overload] - f.get("engine", ""), # type: ignore[call-overload] - f.get("result_format", ""), # type: ignore[call-overload] + f.get("expectation_type", ""), + f.get("engine", ""), + f.get("result_format", ""), ), ) diff --git a/pyproject.toml b/pyproject.toml index f01ecd3d2cef..9651f0e5998f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -171,6 +171,14 @@ exclude = [ 'tests/validator/test_metric_configuration\.py', 'tests/validator/test_metrics_calculator\.py', 'tests/validator/test_validation_graph\.py', + # validation_result_schemas: pydantic v1 **kwargs unpacking and intentional call-arg tests + 'tests/unit/core/validation_result_schemas/test_format_config\.py', # 13 + 'tests/unit/core/validation_result_schemas/test_schemas_map\.py', # 57 + 'tests/unit/core/validation_result_schemas/test_schemas_aggregate\.py', # 71 + 'tests/unit/core/validation_result_schemas/test_schemas_overrides\.py', # 6 + 'tests/unit/core/validation_result_schemas/test_cases_table\.py', # 1 + 'tests/integration/data_sources_and_expectations/expectations/_validation_result_schemas_cases\.py', # 2 + 'tests/integration/data_sources_and_expectations/expectations/test_validation_result_schemas_matrix\.py', # 11 ] [[tool.mypy.overrides]]