Skip to content

All expectations fail if one fails with 'unrecognized condition_parser None for Spark execution engine' #10709

@iamamutt

Description

@iamamutt

Describe the bug
Failure to specify condition_parser for a single expectation results in all expectations failing.

To Reproduce

import tempfile

from pprint import pprint

import great_expectations as gx
import pandas as pd

data = {
    'col1': [1, 2, 3, 4, 5],
    'col2': ['A', 'B', 'C', 'D', None],
    'col3': [1.1, None, 3.3, 4.4, 5.5],
}


def validate(dir_path: str, file_name: str):
    context = gx.get_context(mode='ephemeral')
    suite = context.suites.add(
        gx.ExpectationSuite(
            name='test-suite',
            expectations=[
                gx.expectations.ExpectColumnValuesToNotBeNull(
                    column='col1', result_format='COMPLETE'
                ),
                gx.expectations.ExpectColumnValuesToBeInSet(
                    column='col2',
                    value_set=['A', 'B', 'C'],
                    row_condition='col3 IS NOT NULL',
                    mostly=0.665,
                    # condition_parser='spark',
                    result_format='COMPLETE',
                ),
            ],
        )
    )

    return gx.ValidationDefinition(
        name='test-validation',
        data=(
            context.data_sources.add_spark_filesystem(
                name='test-spark-fs',
                base_directory=dir_path,
            )
            .add_csv_asset(
                name='csv-asset',
                sep=',',
                header=True,
                infer_schema=True,
            )
            .add_batch_definition_path(
                name='test-data',
                path=file_name,
            )
        ),
        suite=suite,
    ).run()


with tempfile.TemporaryDirectory() as dir_path:
    file_name = 'data.csv'
    pd.DataFrame(data).to_csv(f'{dir_path}/{file_name}', index=False)
    result = validate(dir_path, file_name)
    pprint(result.to_json_dict(), sort_dicts=False, width=100)

Expected behavior
Only expectations with errors should fail and show exception info

Environment (please complete the following information):

  • Operating System: Linux
  • Great Expectations Version: 1.2.4
  • Data Source: Spark file

Additional context

Details
{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col1",
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "0e734de0-e872-43e7-adc7-07c868c689d7"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.row_count', '0dfa72ce94f9f181a7dc04305a6c30f7', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    },
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_in_set",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col2",
          "mostly": 0.665,
          "row_condition": "col3 IS NOT NULL",
          "value_set": [
            "A",
            "B",
            "C"
          ],
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "c648e872-154c-4374-9cf1-cb8751e1c6d2"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.column_types', 'e48bc318d7e9c92e270e3f7ab807c1b8', 'include_nested=True')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 532, in _process_direct_and_bundled_metric_computation_configurations\n    metric_computation_configuration.metric_fn(  # type: ignore[misc] # F not callable\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/metric_provider.py\", line 60, in inner_func\n    return metric_fn(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/table_metrics/table_column_types.py\", line 81, in _spark\n    df, _, _ = execution_engine.get_compute_domain(\n               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 800, in get_compute_domain\n    data: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 537, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        },
        "('table.row_count', 'e48bc318d7e9c92e270e3f7ab807c1b8', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    }
  ],
  "suite_name": "test-suite",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 2,
    "successful_expectations": 0,
    "unsuccessful_expectations": 2,
    "success_percent": 0.0
  },
  "meta": {
    "great_expectations_version": "1.2.4",
    "batch_spec": {
      "path": "/tmp/tmpgf8032g5/data.csv",
      "reader_method": "csv",
      "reader_options": {
        "sep": ",",
        "header": true,
        "inferSchema": true
      }
    },
    "batch_markers": {
      "ge_load_time": "20241126T222652.157081Z"
    },
    "active_batch_definition": {
      "datasource_name": "test-spark-fs",
      "data_connector_name": "fluent",
      "data_asset_name": "csv-asset",
      "batch_identifiers": {
        "path": "data.csv"
      },
      "batching_regex": "(?P<path>data.csv)"
    },
    "validation_id": "cd188ea7-bedf-4f8a-9898-1cf823b69b5f",
    "checkpoint_id": null,
    "batch_parameters": null
  },
  "id": null
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    Status

    In progress

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions