remove pandas pin

josectobar · josectobar · commit ecb41c771b41 · 2026-02-20T16:00:16.000-07:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -686,13 +686,10 @@ jobs:
           # TODO: would like to adopt `actionlint` pre-commit hook
           # but false positive here and inability to do an inline ignore
           # prevents this https://github.com/rhysd/actionlint/issues/237
-          - python-version: ${{ github.event_name == 'pull_request_target' && '3.10' }}
           - python-version: ${{ github.event_name == 'pull_request_target' && '3.11' }}
           - python-version: ${{ github.event_name == 'pull_request_target' && '3.12' }}
-          - python-version: ${{ github.event_name == 'merge_group' && '3.10' }}
           - python-version: ${{ github.event_name == 'merge_group' && '3.11' }}
           - python-version: ${{ github.event_name == 'merge_group' && '3.12' }}
-          - python-version: ${{ github.event_name == 'workflow_dispatch' && '3.10' }}
           - python-version: ${{ github.event_name == 'workflow_dispatch' && '3.11' }}
           - python-version: ${{ github.event_name == 'workflow_dispatch' && '3.12' }}
             # clickhouse needs dependency update
@@ -785,7 +782,7 @@ jobs:
         markers:
           # - redshift
           - gx-redshift
-        python-version: ["3.13"]
+        python-version: ["3.10", "3.13"]
 
     steps:
       - name: Checkout
@@ -950,6 +947,29 @@ jobs:
       - name: Run the tests
         run: invoke ci-tests -m unit --xdist --slowest=10  --timeout=2.0
 
+  pandas3-test:
+    needs: [unit-tests, static-analysis, check-actor-permissions]
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+          cache-dependency-path: reqs/requirements-dev-test.txt
+
+      - name: Install dependencies
+        run: pip install . -c ci/constraints-test/pandas3-min-install.txt -r reqs/requirements-dev-test.txt
+
+      - name: Run the tests
+        run: invoke ci-tests -m unit --xdist --slowest=10  --timeout=2.0
+
   airflow-min-versions:
     needs: [unit-tests, static-analysis, check-actor-permissions]
     runs-on: ubuntu-latest
@@ -1022,6 +1042,7 @@ jobs:
         py312-min-versions,
         py313-min-versions,
         pydantic-v1,
+        pandas3-test,
         airflow-min-versions,
         import_gx,
       ]
diff --git a/ci/constraints-test/pandas3-min-install.txt b/ci/constraints-test/pandas3-min-install.txt
@@ -0,0 +1 @@
+pandas>=3.0.0
diff --git a/great_expectations/datasource/fluent/pandas_datasource.py b/great_expectations/datasource/fluent/pandas_datasource.py
@@ -358,6 +358,21 @@ def json(  # noqa: PLR0913 # FIXME CoP
     "xml", _PandasDataAsset
 )  # read_xml doesn't exist for pandas < 1.3
 
+# GBQAsset may not be generated if read_gbq is not available (requires pandas-gbq package)
+# Create a manual GBQAsset class if it wasn't generated
+_GBQ_ASSET_MANUALLY_CREATED = False
+if GBQAsset is _PandasDataAsset:
+
+    class GBQAsset(_PandasDataAsset):  # type: ignore[no-redef]
+        # instance attributes
+        type: Literal["gbq"] = "gbq"
+        query: str
+
+        class Config:
+            extra = pydantic.Extra.forbid
+
+    _GBQ_ASSET_MANUALLY_CREATED = True
+
 
 def _short_id() -> str:
     """
@@ -617,6 +632,9 @@ def _add_asset(self, asset: _DataAssetT, connect_options: dict | None = None) ->
 
 
 _DYNAMIC_ASSET_TYPES = list(_PANDAS_ASSET_MODELS.values())
+# Add manually created GBQAsset if it wasn't generated
+if _GBQ_ASSET_MANUALLY_CREATED:
+    _DYNAMIC_ASSET_TYPES.append(GBQAsset)
 
 
 @public_api
@@ -989,7 +1007,8 @@ def add_gbq_asset(
         Args:
             name: The name of the GBQ asset. This can be any arbitrary string.
             query: The SQL query to send to Google BigQuery.
-            **kwargs: Additional keyword arguments to pass to pandas.read_gbq().
+            **kwargs: Additional keyword arguments to pass to pandas.read_gbq()
+            (or pandas_gbq.read_gbq() for pandas 3.0+).
 
         Returns:
             The GBQAsset that has been added to this datasource.
@@ -1014,7 +1033,8 @@ def read_gbq(
         Args:
             query: The SQL query to send to Google BigQuery.
             asset_name: The name of the GBQ asset, should you wish to use it again.
-            **kwargs: Additional keyword arguments to pass to pandas.read_gbq().
+            **kwargs: Additional keyword arguments to pass to pandas.read_gbq()
+            (or pandas_gbq.read_gbq() for pandas 3.0+).
 
         Returns:
             A Batch using an ephemeral GBQAsset.
diff --git a/great_expectations/execution_engine/pandas_execution_engine.py b/great_expectations/execution_engine/pandas_execution_engine.py
@@ -20,6 +20,7 @@
 )
 
 import pandas as pd
+from packaging.version import Version
 
 import great_expectations.exceptions as gx_exceptions
 from great_expectations.compatibility import aws, azure, google
@@ -484,15 +485,34 @@ def _get_reader_fn(
                 "reader_options"
             )  # This may not be there; use None in that case
 
-        try:
-            reader_fn = getattr(pd, reader_method)
-            if reader_options:
-                reader_fn = partial(reader_fn, **reader_options)
-            return reader_fn
-        except AttributeError:
-            raise gx_exceptions.ExecutionEngineError(  # noqa: TRY003 # FIXME CoP
-                f'Unable to find reader_method "{reader_method}" in pandas.'
-            )
+        # Handle read_gbq which was removed from pandas 3.0.0+
+        # Use pandas_gbq.read_gbq instead
+        if reader_method == "read_gbq":
+            pandas_version = Version(pd.__version__)
+            if pandas_version >= Version("3.0.0"):
+                try:
+                    import pandas_gbq  # type: ignore[import-not-found] # Import  is only available when installing BigQuery Dependencies
+
+                    reader_fn = pandas_gbq.read_gbq
+                except ImportError:
+                    raise gx_exceptions.ExecutionEngineError(  # noqa: TRY003 # FIXME CoP
+                        "pandas.read_gbq was removed in pandas 3.0.0. "
+                        "Please install pandas-gbq and use pandas_gbq.read_gbq instead. "
+                        "See https://pandas-gbq.readthedocs.io/ for more information."
+                    )
+            else:
+                reader_fn = getattr(pd, reader_method)
+        else:
+            try:
+                reader_fn = getattr(pd, reader_method)
+            except AttributeError:
+                raise gx_exceptions.ExecutionEngineError(  # noqa: TRY003 # FIXME CoP
+                    f'Unable to find reader_method "{reader_method}" in pandas.'
+                )
+
+        if reader_options:
+            reader_fn = partial(reader_fn, **reader_options)
+        return reader_fn
 
     @override
     def resolve_metric_bundle(self, metric_fn_bundle) -> dict[MetricConfigurationID, Any]:
diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py
@@ -385,7 +385,7 @@ def _prescriptive_renderer(
 
     @classmethod
     @renderer(renderer_type=LegacyDescriptiveRendererType.VALUE_COUNTS_BAR_CHART)
-    def _descriptive_value_counts_bar_chart_renderer(
+    def _descriptive_value_counts_bar_chart_renderer(  # noqa: C901 #  134 lines
         cls,
         configuration: Optional[ExpectationConfiguration] = None,
         result: Optional[ExpectationValidationResult] = None,
@@ -406,6 +406,10 @@ def _descriptive_value_counts_bar_chart_renderer(
                 "count": counts,
             }
         )
+        # Convert StringDtype columns to object dtype for Altair compatibility
+        for col in df.columns:
+            if isinstance(df[col].dtype, pd.StringDtype):
+                df[col] = df[col].astype("object")
 
         if len(values) > 60:  # noqa: PLR2004 # FIXME CoP
             return None
diff --git a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py
@@ -1027,7 +1027,7 @@ def _get_kl_divergence_chart(  # noqa: C901 #  13
         return expected_distribution
 
     @classmethod
-    def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple:
+    def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple:  # noqa: C901 #  134 lines
         weights = partition_object.get("weights", [])
 
         chart_pixel_width = (len(weights) / 60.0) * 500
@@ -1079,6 +1079,10 @@ def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple:
                 values = partition_object["values"]
 
             df = pd.DataFrame({"values": values, "fraction": weights})
+            # Convert StringDtype columns to object dtype for Altair compatibility
+            for col in df.columns:
+                if isinstance(df[col].dtype, pd.StringDtype):
+                    df[col] = df[col].astype("object")
 
             bars = (
                 alt.Chart(df)
diff --git a/requirements.txt b/requirements.txt
@@ -8,9 +8,9 @@ numpy>=1.22.4; python_version >= "3.10"
 numpy>=1.26.0; python_version >= "3.12"
 numpy>=2.1.0; python_version >= "3.13"
 packaging
-pandas>=1.3.0,<3.0.0; python_version >= "3.10"
-pandas>=2.2.3,<3.0.0; python_version >= "3.13"
-pandas>=1.3.0,<3.0.0; python_version >= "3.12"
+pandas>=1.3.0; python_version >= "3.10"
+pandas>=2.2.3; python_version >= "3.13"
+pandas>=1.3.0; python_version >= "3.12"
 # patch version updates `typing_extensions` to the needed version
 pydantic>=1.10.7
 pyparsing>=2.4,!=3.2.4
@@ -20,4 +20,4 @@ ruamel.yaml>=0.16
 scipy>=1.6.0
 tqdm>=4.59.0
 typing-extensions>=4.1.0 # Leverage type annotations from recent Python releases
-tzlocal>=1.2
+tzlocal>=1.2
diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py
@@ -108,7 +108,7 @@ def test_datetime64_ns_with_str_value_set(batch_for_datasource: Batch) -> None:
         for d in pd.date_range(
             start=datetime(2025, 9, 1),  # noqa: DTZ001 # FIXME CoP
             end=datetime(2025, 9, 3),  # noqa: DTZ001 # FIXME CoP
-            freq="1d",
+            freq="1D",
         )
     ]
     expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set)
@@ -153,7 +153,7 @@ def test_datetime64_ns_with_pd_timestamp_value_set(batch_for_datasource: Batch)
     value_set = pd.date_range(
         start=datetime(2025, 9, 1),  # noqa: DTZ001 # FIXME CoP
         end=datetime(2025, 9, 3),  # noqa: DTZ001 # FIXME CoP
-        freq="1d",
+        freq="1D",
     ).tolist()
     expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set)
     result = batch_for_datasource.validate(expectation)
diff --git a/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py b/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py
@@ -55,9 +55,12 @@ class TestNumericExpectationAgainstStrDataMisconfiguration:
         data=_DATA,
     )
     def test_pandas(self, batch_for_datasource) -> None:
-        self._assert_misconfiguration(
-            batch_for_datasource=batch_for_datasource,
-            exception_message="could not convert string to float",
+        result = batch_for_datasource.validate(self._EXPECTATION)
+        assert not result.success
+        exception_str = str(result.exception_info)
+        assert (
+            "could not convert string to float" in exception_str  # pandas <3.0
+            or "Cannot perform reduction 'std' with string dtype" in exception_str  # pandas 3.x
         )
 
     @parameterize_batch_for_data_sources(
diff --git a/tests/integration/metrics/batch/test_batch_column_types.py b/tests/integration/metrics/batch/test_batch_column_types.py
@@ -38,10 +38,13 @@ def test_pandas_success(batch_for_datasource: Batch) -> None:
     metric = BatchColumnTypes()
     metric_result = batch.compute_metrics(metric)
     assert isinstance(metric_result, BatchColumnTypesResult)
-    assert metric_result.value == [
-        {"name": "numbers", "type": dtype("int64")},
-        {"name": "strings", "type": dtype("O")},
-    ]
+    assert metric_result.value[0] == {"name": "numbers", "type": dtype("int64")}
+    # pandas 3.x uses StringDtype for string columns instead of object dtype
+    strings_entry = metric_result.value[1]
+    assert strings_entry.name == "strings"
+    assert strings_entry.type == dtype("O") or isinstance(strings_entry.type, pd.StringDtype), (
+        f"Expected dtype('O') or StringDtype, got {strings_entry.type}"
+    )
 
 
 @parameterize_batch_for_data_sources(
diff --git a/tests/test_packaging.py b/tests/test_packaging.py
@@ -193,7 +193,7 @@ def test_polish_and_ratchet_pins_and_upper_bounds():
     )
 
     # Polish and ratchet this number down as low as possible
-    assert len(sorted_packages_with_pins_or_upper_bounds) == 35
+    assert len(sorted_packages_with_pins_or_upper_bounds) == 33
     assert set(sorted_packages_with_pins_or_upper_bounds) == {
         (
             "requirements-dev-api-docs-test.txt",
@@ -205,7 +205,6 @@ def test_polish_and_ratchet_pins_and_upper_bounds():
         ("requirements-dev-excel.txt", "xlrd", (("<", "2.0.0"), (">=", "1.1.0"))),
         ("requirements-dev-lite.txt", "moto", (("<", "5.0"), (">=", "4.2.13"))),
         ("requirements-dev-lite.txt", "pact-python", (("<", "3"), (">=", "2.0.1"))),
-        ("requirements-dev.txt", "pandas", (("<", "3.0.0"), (">=", "1.3.0"))),
         ("requirements-dev-pagerduty.txt", "pypd", (("==", "1.1.0"),)),
         (
             "requirements-dev-spark.txt",
@@ -253,5 +252,4 @@ def test_polish_and_ratchet_pins_and_upper_bounds():
         ("requirements-dev.txt", "xlrd", (("<", "2.0.0"), (">=", "1.1.0"))),
         ("requirements.txt", "altair", (("<", "7.0.0"), (">=", "5.0.0"))),
         ("requirements.txt", "marshmallow", (("<", "4.0.0"), (">=", "3.7.1"))),
-        ("requirements.txt", "pandas", (("<", "3.0.0"), (">=", "1.3.0"))),
     }