Skip to content

[MAINTENANCE] refactor to remove rule based profiler #11029

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from

Conversation

joshua-stauffer
Copy link
Member

remove deprecated code.

Copy link

netlify bot commented Mar 14, 2025

Deploy Preview for niobium-lead-7998 canceled.

Name Link
🔨 Latest commit b0da768
🔍 Latest deploy log https://app.netlify.com/sites/niobium-lead-7998/deploys/67d3eb890169410008223c1c

Copy link

codecov bot commented Mar 14, 2025

❌ 2 Tests Failed:

Tests completed Failed Passed Skipped
21993 2 21991 5715
View the top 2 failed test(s) by shortest run time
tests.experimental.metric_repository.test_metric_list_metric_retriever_integration::test_get_metrics_full_cdm
Stack Traces | 0.055s run time
cloud_context_and_batch_request_with_simple_dataframe = ({
  "checkpoint_store_name": "default_checkpoint_store",
  "config_version": 4,
  "data_context_id": "12345678-1234-5...-01-02
2                   NaN                   NaN  ...    True 2020-01-03

[3 rows x 6 columns]}, partitioner=None))

    @pytest.mark.cloud
    def test_get_metrics_full_cdm(
        cloud_context_and_batch_request_with_simple_dataframe: tuple[CloudDataContext, BatchRequest],
    ):
        context, batch_request = cloud_context_and_batch_request_with_simple_dataframe
        cdm_metrics_list: List[MetricTypes] = [
            MetricTypes.TABLE_ROW_COUNT,
            MetricTypes.TABLE_COLUMNS,
            MetricTypes.TABLE_COLUMN_TYPES,
            MetricTypes.COLUMN_MIN,
            MetricTypes.COLUMN_MAX,
            MetricTypes.COLUMN_MEAN,
            MetricTypes.COLUMN_MEDIAN,
            MetricTypes.COLUMN_NULL_COUNT,
        ]
        metric_retriever = MetricListMetricRetriever(context)
>       metrics = metric_retriever.get_metrics(
            batch_request=batch_request, metric_list=cdm_metrics_list
        )

.../experimental/metric_repository/test_metric_list_metric_retriever_integration.py:124: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
.../hostedtoolcache/Python/3.9.21....../x64/lib/python3.9.../experimental/metric_repository/metric_list_metric_retriever.py:76: in get_metrics
    numeric_column_names = self._get_numeric_column_names(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <great_expectations.experimental.metric_repository.metric_list_metric_retriever.MetricListMetricRetriever object at 0x7fc44dd4a100>
batch_request = BatchRequest(datasource_name='my_pandas_datasource', data_asset_name='dataframe', options={'dataframe':    numeric_wit...0-01-02
2                   NaN                   NaN  ...    True 2020-01-03

[3 rows x 6 columns]}, partitioner=None)
exclude_column_names = []

    def _get_numeric_column_names(
        self,
        batch_request: BatchRequest,
        exclude_column_names: List[str],
    ) -> list[str]:
        """Get the names of all numeric columns in the batch."""
        table_column_types = self._get_table_column_types(batch_request)
        numeric_column_names = []
        for column_type in table_column_types.value:
>           if column_type.get("type") and column_type["type"].upper() in [
                "FLOAT",
                "INTEGER",
                "NUMERIC",
                "DECIMAL",
            ]:
E           AttributeError: 'numpy.dtypes.Float64DType' object has no attribute 'upper'

.../hostedtoolcache/Python/3.9.21....../x64/lib/python3.9.../experimental/metric_repository/metric_retriever.py:162: AttributeError
tests.experimental.metric_repository.test_metric_list_metric_retriever_integration::test_get_metrics_table_metrics_only
Stack Traces | 0.067s run time
cloud_context_and_batch_request_with_simple_dataframe = ({
  "checkpoint_store_name": "default_checkpoint_store",
  "config_version": 4,
  "data_context_id": "12345678-1234-5...-01-02
2                   NaN                   NaN  ...    True 2020-01-03

[3 rows x 6 columns]}, partitioner=None))

    @pytest.mark.cloud
    def test_get_metrics_table_metrics_only(
        cloud_context_and_batch_request_with_simple_dataframe: tuple[CloudDataContext, BatchRequest],
    ):
        context, batch_request = cloud_context_and_batch_request_with_simple_dataframe
        table_metrics_list: List[MetricTypes] = [
            MetricTypes.TABLE_ROW_COUNT,
            MetricTypes.TABLE_COLUMNS,
            MetricTypes.TABLE_COLUMN_TYPES,
        ]
        metric_retriever = MetricListMetricRetriever(context)
        metrics = metric_retriever.get_metrics(
            batch_request=batch_request, metric_list=table_metrics_list
        )
        validator = context.get_validator(batch_request=batch_request)
        batch_id = validator.active_batch.id
    
        expected_metrics = [
            TableMetric[int](
                batch_id=batch_id,
                metric_name="table.row_count",
                value=3,
                exception=None,
            ),
            TableMetric[List[str]](
                batch_id=batch_id,
                metric_name="table.columns",
                value=[
                    "numeric_with_nulls_1",
                    "numeric_with_nulls_2",
                    "string",
                    "string_with_nulls",
                    "boolean",
                    "datetime",
                ],
                exception=None,
            ),
            TableMetric[List[str]](
                batch_id=batch_id,
                metric_name="table.column_types",
                value=[
                    {"name": "numeric_with_nulls_1", "type": "float64"},
                    {"name": "numeric_with_nulls_2", "type": "float64"},
                    {"name": "string", "type": "object"},
                    {"name": "string_with_nulls", "type": "object"},
                    {"name": "boolean", "type": "bool"},
                    {"name": "datetime", "type": "datetime64[ns]"},
                ],
                exception=None,
            ),
        ]
    
        # Assert each metric so it is easier to see which one fails (instead of assert metrics == expected_metrics):  # noqa: E501 # FIXME CoP
        assert len(metrics) == len(expected_metrics)
        for metric in metrics:
>           assert metric.dict() in [expected_metric.dict() for expected_metric in expected_metrics]
E           AssertionError: assert {'batch_id': 'my_pandas_datasource-dataframe', 'metric_name': 'table.column_types', 'value': [{'name': 'numeric_with_nulls_1', 'type': dtype('float64')}, {'name': 'numeric_with_nulls_2', 'type': dtype('float64')}, {'name': 'string', 'type': dtype('O')}, {'name': 'string_with_nulls', 'type': dtype('O')}, {'name': 'boolean', 'type': dtype('bool')}, {'name': 'datetime', 'type': dtype('<M8[ns]')}], 'exception': None, '__orig_class__': great_expectations.experimental.metric_repository.metrics.TableMetric[list[dict[str, str]]], 'value_type': 'list[dict[str, str]]', 'metric_type': 'TableMetric'} in [{'batch_id': 'my_pandas_datasource-dataframe', 'metric_name': 'table.row_count', 'value': 3, 'exception': None, '__orig_class__': great_expectations.experimental.metric_repository.metrics.TableMetric[int], 'value_type': 'int', 'metric_type': 'TableMetric'}, {'batch_id': 'my_pandas_datasource-dataframe', 'metric_name': 'table.columns', 'value': ['numeric_with_nulls_1', 'numeric_with_nulls_2', 'string', 'string_with_nulls', 'boolean', 'datetime'], 'exception': None, '__orig_class__': great_expectations.experimental.metric_repository.metrics.TableMetric[typing.List[str]], 'value_type': 'typing.List[str]', 'metric_type': 'TableMetric'}, {'batch_id': 'my_pandas_datasource-dataframe', 'metric_name': 'table.column_types', 'value': [{'name': 'numeric_with_nulls_1', 'type': 'float64'}, {'name': 'numeric_with_nulls_2', 'type': 'float64'}, {'name': 'string', 'type': 'object'}, {'name': 'string_with_nulls', 'type': 'object'}, {'name': 'boolean', 'type': 'bool'}, {'name': 'datetime', 'type': 'datetime64[ns]'}], 'exception': None, '__orig_class__': great_expectations.experimental.metric_repository.metrics.TableMetric[typing.List[str]], 'value_type': 'typing.List[str]', 'metric_type': 'TableMetric'}]
E            +  where {'batch_id': 'my_pandas_datasource-dataframe', 'metric_name': 'table.column_types', 'value': [{'name': 'numeric_with_nulls_1', 'type': dtype('float64')}, {'name': 'numeric_with_nulls_2', 'type': dtype('float64')}, {'name': 'string', 'type': dtype('O')}, {'name': 'string_with_nulls', 'type': dtype('O')}, {'name': 'boolean', 'type': dtype('bool')}, {'name': 'datetime', 'type': dtype('<M8[ns]')}], 'exception': None, '__orig_class__': great_expectations.experimental.metric_repository.metrics.TableMetric[list[dict[str, str]]], 'value_type': 'list[dict[str, str]]', 'metric_type': 'TableMetric'} = dict()
E            +    where dict = TableMetric(batch_id='my_pandas_datasource-dataframe', metric_name='table.column_types', value=[{'name': 'numeric_with_nulls_1', 'type': dtype('float64')}, {'name': 'numeric_with_nulls_2', 'type': dtype('float64')}, {'name': 'string', 'type': dtype('O')}, {'name': 'string_with_nulls', 'type': dtype('O')}, {'name': 'boolean', 'type': dtype('bool')}, {'name': 'datetime', 'type': dtype('<M8[ns]')}], exception=None).dict

.../experimental/metric_repository/test_metric_list_metric_retriever_integration.py:105: AssertionError

To view more test analytics, go to the Test Analytics Dashboard
📋 Got 3 mins? Take this short survey to help us improve Test Analytics.

…tations/great_expectations into m/remove_rule_based_profiler
@joshua-stauffer joshua-stauffer changed the title refactor to remove rule based profiler [MAINTENANCE] refactor to remove rule based profiler Mar 14, 2025
@joshua-stauffer joshua-stauffer marked this pull request as ready for review March 14, 2025 09:14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant