-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Open
Labels
Description
Describe the bug
Failure to specify condition_parser for a single expectation results in all expectations failing.
To Reproduce
import tempfile
from pprint import pprint
import great_expectations as gx
import pandas as pd
data = {
'col1': [1, 2, 3, 4, 5],
'col2': ['A', 'B', 'C', 'D', None],
'col3': [1.1, None, 3.3, 4.4, 5.5],
}
def validate(dir_path: str, file_name: str):
context = gx.get_context(mode='ephemeral')
suite = context.suites.add(
gx.ExpectationSuite(
name='test-suite',
expectations=[
gx.expectations.ExpectColumnValuesToNotBeNull(
column='col1', result_format='COMPLETE'
),
gx.expectations.ExpectColumnValuesToBeInSet(
column='col2',
value_set=['A', 'B', 'C'],
row_condition='col3 IS NOT NULL',
mostly=0.665,
# condition_parser='spark',
result_format='COMPLETE',
),
],
)
)
return gx.ValidationDefinition(
name='test-validation',
data=(
context.data_sources.add_spark_filesystem(
name='test-spark-fs',
base_directory=dir_path,
)
.add_csv_asset(
name='csv-asset',
sep=',',
header=True,
infer_schema=True,
)
.add_batch_definition_path(
name='test-data',
path=file_name,
)
),
suite=suite,
).run()
with tempfile.TemporaryDirectory() as dir_path:
file_name = 'data.csv'
pd.DataFrame(data).to_csv(f'{dir_path}/{file_name}', index=False)
result = validate(dir_path, file_name)
pprint(result.to_json_dict(), sort_dicts=False, width=100)Expected behavior
Only expectations with errors should fail and show exception info
Environment (please complete the following information):
- Operating System: Linux
- Great Expectations Version: 1.2.4
- Data Source: Spark file
Additional context
Details
{
"success": false,
"results": [
{
"success": false,
"expectation_config": {
"type": "expect_column_values_to_not_be_null",
"kwargs": {
"result_format": "COMPLETE",
"column": "col1",
"batch_id": "test-spark-fs-csv-asset"
},
"meta": {},
"id": "0e734de0-e872-43e7-adc7-07c868c689d7"
},
"result": {},
"meta": {},
"exception_info": {
"('table.row_count', '0dfa72ce94f9f181a7dc04305a6c30f7', ())": {
"exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
"exception_message": "unrecognized condition_parser None for Spark execution engine",
"raised_exception": true
}
}
},
{
"success": false,
"expectation_config": {
"type": "expect_column_values_to_be_in_set",
"kwargs": {
"result_format": "COMPLETE",
"column": "col2",
"mostly": 0.665,
"row_condition": "col3 IS NOT NULL",
"value_set": [
"A",
"B",
"C"
],
"batch_id": "test-spark-fs-csv-asset"
},
"meta": {},
"id": "c648e872-154c-4374-9cf1-cb8751e1c6d2"
},
"result": {},
"meta": {},
"exception_info": {
"('table.column_types', 'e48bc318d7e9c92e270e3f7ab807c1b8', 'include_nested=True')": {
"exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 532, in _process_direct_and_bundled_metric_computation_configurations\n metric_computation_configuration.metric_fn( # type: ignore[misc] # F not callable\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/metric_provider.py\", line 60, in inner_func\n return metric_fn(*args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/table_metrics/table_column_types.py\", line 81, in _spark\n df, _, _ = execution_engine.get_compute_domain(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 800, in get_compute_domain\n data: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 537, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
"exception_message": "unrecognized condition_parser None for Spark execution engine",
"raised_exception": true
},
"('table.row_count', 'e48bc318d7e9c92e270e3f7ab807c1b8', ())": {
"exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
"exception_message": "unrecognized condition_parser None for Spark execution engine",
"raised_exception": true
}
}
}
],
"suite_name": "test-suite",
"suite_parameters": {},
"statistics": {
"evaluated_expectations": 2,
"successful_expectations": 0,
"unsuccessful_expectations": 2,
"success_percent": 0.0
},
"meta": {
"great_expectations_version": "1.2.4",
"batch_spec": {
"path": "/tmp/tmpgf8032g5/data.csv",
"reader_method": "csv",
"reader_options": {
"sep": ",",
"header": true,
"inferSchema": true
}
},
"batch_markers": {
"ge_load_time": "20241126T222652.157081Z"
},
"active_batch_definition": {
"datasource_name": "test-spark-fs",
"data_connector_name": "fluent",
"data_asset_name": "csv-asset",
"batch_identifiers": {
"path": "data.csv"
},
"batching_regex": "(?P<path>data.csv)"
},
"validation_id": "cd188ea7-bedf-4f8a-9898-1cf823b69b5f",
"checkpoint_id": null,
"batch_parameters": null
},
"id": null
}
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
Type
Projects
Status
In progress