Skip to content

Commit ecb41c7

Browse files
committed
remove pandas pin
1 parent f367c5b commit ecb41c7

File tree

11 files changed

+107
-33
lines changed

11 files changed

+107
-33
lines changed

.github/workflows/ci.yml

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -686,13 +686,10 @@ jobs:
686686
# TODO: would like to adopt `actionlint` pre-commit hook
687687
# but false positive here and inability to do an inline ignore
688688
# prevents this https://github.com/rhysd/actionlint/issues/237
689-
- python-version: ${{ github.event_name == 'pull_request_target' && '3.10' }}
690689
- python-version: ${{ github.event_name == 'pull_request_target' && '3.11' }}
691690
- python-version: ${{ github.event_name == 'pull_request_target' && '3.12' }}
692-
- python-version: ${{ github.event_name == 'merge_group' && '3.10' }}
693691
- python-version: ${{ github.event_name == 'merge_group' && '3.11' }}
694692
- python-version: ${{ github.event_name == 'merge_group' && '3.12' }}
695-
- python-version: ${{ github.event_name == 'workflow_dispatch' && '3.10' }}
696693
- python-version: ${{ github.event_name == 'workflow_dispatch' && '3.11' }}
697694
- python-version: ${{ github.event_name == 'workflow_dispatch' && '3.12' }}
698695
# clickhouse needs dependency update
@@ -785,7 +782,7 @@ jobs:
785782
markers:
786783
# - redshift
787784
- gx-redshift
788-
python-version: ["3.13"]
785+
python-version: ["3.10", "3.13"]
789786

790787
steps:
791788
- name: Checkout
@@ -950,6 +947,29 @@ jobs:
950947
- name: Run the tests
951948
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
952949

950+
pandas3-test:
951+
needs: [unit-tests, static-analysis, check-actor-permissions]
952+
if: github.event.pull_request.draft == false
953+
runs-on: ubuntu-latest
954+
steps:
955+
- name: Checkout
956+
uses: actions/checkout@v4
957+
with:
958+
ref: ${{ github.event.pull_request.head.sha }}
959+
960+
- name: Set up Python
961+
uses: actions/setup-python@v5
962+
with:
963+
python-version: "3.10"
964+
cache: "pip"
965+
cache-dependency-path: reqs/requirements-dev-test.txt
966+
967+
- name: Install dependencies
968+
run: pip install . -c ci/constraints-test/pandas3-min-install.txt -r reqs/requirements-dev-test.txt
969+
970+
- name: Run the tests
971+
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
972+
953973
airflow-min-versions:
954974
needs: [unit-tests, static-analysis, check-actor-permissions]
955975
runs-on: ubuntu-latest
@@ -1022,6 +1042,7 @@ jobs:
10221042
py312-min-versions,
10231043
py313-min-versions,
10241044
pydantic-v1,
1045+
pandas3-test,
10251046
airflow-min-versions,
10261047
import_gx,
10271048
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pandas>=3.0.0

great_expectations/datasource/fluent/pandas_datasource.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,21 @@ def json( # noqa: PLR0913 # FIXME CoP
358358
"xml", _PandasDataAsset
359359
) # read_xml doesn't exist for pandas < 1.3
360360

361+
# GBQAsset may not be generated if read_gbq is not available (requires pandas-gbq package)
362+
# Create a manual GBQAsset class if it wasn't generated
363+
_GBQ_ASSET_MANUALLY_CREATED = False
364+
if GBQAsset is _PandasDataAsset:
365+
366+
class GBQAsset(_PandasDataAsset): # type: ignore[no-redef]
367+
# instance attributes
368+
type: Literal["gbq"] = "gbq"
369+
query: str
370+
371+
class Config:
372+
extra = pydantic.Extra.forbid
373+
374+
_GBQ_ASSET_MANUALLY_CREATED = True
375+
361376

362377
def _short_id() -> str:
363378
"""
@@ -617,6 +632,9 @@ def _add_asset(self, asset: _DataAssetT, connect_options: dict | None = None) ->
617632

618633

619634
_DYNAMIC_ASSET_TYPES = list(_PANDAS_ASSET_MODELS.values())
635+
# Add manually created GBQAsset if it wasn't generated
636+
if _GBQ_ASSET_MANUALLY_CREATED:
637+
_DYNAMIC_ASSET_TYPES.append(GBQAsset)
620638

621639

622640
@public_api
@@ -989,7 +1007,8 @@ def add_gbq_asset(
9891007
Args:
9901008
name: The name of the GBQ asset. This can be any arbitrary string.
9911009
query: The SQL query to send to Google BigQuery.
992-
**kwargs: Additional keyword arguments to pass to pandas.read_gbq().
1010+
**kwargs: Additional keyword arguments to pass to pandas.read_gbq()
1011+
(or pandas_gbq.read_gbq() for pandas 3.0+).
9931012
9941013
Returns:
9951014
The GBQAsset that has been added to this datasource.
@@ -1014,7 +1033,8 @@ def read_gbq(
10141033
Args:
10151034
query: The SQL query to send to Google BigQuery.
10161035
asset_name: The name of the GBQ asset, should you wish to use it again.
1017-
**kwargs: Additional keyword arguments to pass to pandas.read_gbq().
1036+
**kwargs: Additional keyword arguments to pass to pandas.read_gbq()
1037+
(or pandas_gbq.read_gbq() for pandas 3.0+).
10181038
10191039
Returns:
10201040
A Batch using an ephemeral GBQAsset.

great_expectations/execution_engine/pandas_execution_engine.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
)
2121

2222
import pandas as pd
23+
from packaging.version import Version
2324

2425
import great_expectations.exceptions as gx_exceptions
2526
from great_expectations.compatibility import aws, azure, google
@@ -484,15 +485,34 @@ def _get_reader_fn(
484485
"reader_options"
485486
) # This may not be there; use None in that case
486487

487-
try:
488-
reader_fn = getattr(pd, reader_method)
489-
if reader_options:
490-
reader_fn = partial(reader_fn, **reader_options)
491-
return reader_fn
492-
except AttributeError:
493-
raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP
494-
f'Unable to find reader_method "{reader_method}" in pandas.'
495-
)
488+
# Handle read_gbq which was removed from pandas 3.0.0+
489+
# Use pandas_gbq.read_gbq instead
490+
if reader_method == "read_gbq":
491+
pandas_version = Version(pd.__version__)
492+
if pandas_version >= Version("3.0.0"):
493+
try:
494+
import pandas_gbq # type: ignore[import-not-found] # Import is only available when installing BigQuery Dependencies
495+
496+
reader_fn = pandas_gbq.read_gbq
497+
except ImportError:
498+
raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP
499+
"pandas.read_gbq was removed in pandas 3.0.0. "
500+
"Please install pandas-gbq and use pandas_gbq.read_gbq instead. "
501+
"See https://pandas-gbq.readthedocs.io/ for more information."
502+
)
503+
else:
504+
reader_fn = getattr(pd, reader_method)
505+
else:
506+
try:
507+
reader_fn = getattr(pd, reader_method)
508+
except AttributeError:
509+
raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP
510+
f'Unable to find reader_method "{reader_method}" in pandas.'
511+
)
512+
513+
if reader_options:
514+
reader_fn = partial(reader_fn, **reader_options)
515+
return reader_fn
496516

497517
@override
498518
def resolve_metric_bundle(self, metric_fn_bundle) -> dict[MetricConfigurationID, Any]:

great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ def _prescriptive_renderer(
385385

386386
@classmethod
387387
@renderer(renderer_type=LegacyDescriptiveRendererType.VALUE_COUNTS_BAR_CHART)
388-
def _descriptive_value_counts_bar_chart_renderer(
388+
def _descriptive_value_counts_bar_chart_renderer( # noqa: C901 # 134 lines
389389
cls,
390390
configuration: Optional[ExpectationConfiguration] = None,
391391
result: Optional[ExpectationValidationResult] = None,
@@ -406,6 +406,10 @@ def _descriptive_value_counts_bar_chart_renderer(
406406
"count": counts,
407407
}
408408
)
409+
# Convert StringDtype columns to object dtype for Altair compatibility
410+
for col in df.columns:
411+
if isinstance(df[col].dtype, pd.StringDtype):
412+
df[col] = df[col].astype("object")
409413

410414
if len(values) > 60: # noqa: PLR2004 # FIXME CoP
411415
return None

great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ def _get_kl_divergence_chart( # noqa: C901 # 13
10271027
return expected_distribution
10281028

10291029
@classmethod
1030-
def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple:
1030+
def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple: # noqa: C901 # 134 lines
10311031
weights = partition_object.get("weights", [])
10321032

10331033
chart_pixel_width = (len(weights) / 60.0) * 500
@@ -1079,6 +1079,10 @@ def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple:
10791079
values = partition_object["values"]
10801080

10811081
df = pd.DataFrame({"values": values, "fraction": weights})
1082+
# Convert StringDtype columns to object dtype for Altair compatibility
1083+
for col in df.columns:
1084+
if isinstance(df[col].dtype, pd.StringDtype):
1085+
df[col] = df[col].astype("object")
10821086

10831087
bars = (
10841088
alt.Chart(df)

requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ numpy>=1.22.4; python_version >= "3.10"
88
numpy>=1.26.0; python_version >= "3.12"
99
numpy>=2.1.0; python_version >= "3.13"
1010
packaging
11-
pandas>=1.3.0,<3.0.0; python_version >= "3.10"
12-
pandas>=2.2.3,<3.0.0; python_version >= "3.13"
13-
pandas>=1.3.0,<3.0.0; python_version >= "3.12"
11+
pandas>=1.3.0; python_version >= "3.10"
12+
pandas>=2.2.3; python_version >= "3.13"
13+
pandas>=1.3.0; python_version >= "3.12"
1414
# patch version updates `typing_extensions` to the needed version
1515
pydantic>=1.10.7
1616
pyparsing>=2.4,!=3.2.4
@@ -20,4 +20,4 @@ ruamel.yaml>=0.16
2020
scipy>=1.6.0
2121
tqdm>=4.59.0
2222
typing-extensions>=4.1.0 # Leverage type annotations from recent Python releases
23-
tzlocal>=1.2
23+
tzlocal>=1.2

tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def test_datetime64_ns_with_str_value_set(batch_for_datasource: Batch) -> None:
108108
for d in pd.date_range(
109109
start=datetime(2025, 9, 1), # noqa: DTZ001 # FIXME CoP
110110
end=datetime(2025, 9, 3), # noqa: DTZ001 # FIXME CoP
111-
freq="1d",
111+
freq="1D",
112112
)
113113
]
114114
expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set)
@@ -153,7 +153,7 @@ def test_datetime64_ns_with_pd_timestamp_value_set(batch_for_datasource: Batch)
153153
value_set = pd.date_range(
154154
start=datetime(2025, 9, 1), # noqa: DTZ001 # FIXME CoP
155155
end=datetime(2025, 9, 3), # noqa: DTZ001 # FIXME CoP
156-
freq="1d",
156+
freq="1D",
157157
).tolist()
158158
expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set)
159159
result = batch_for_datasource.validate(expectation)

tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,12 @@ class TestNumericExpectationAgainstStrDataMisconfiguration:
5555
data=_DATA,
5656
)
5757
def test_pandas(self, batch_for_datasource) -> None:
58-
self._assert_misconfiguration(
59-
batch_for_datasource=batch_for_datasource,
60-
exception_message="could not convert string to float",
58+
result = batch_for_datasource.validate(self._EXPECTATION)
59+
assert not result.success
60+
exception_str = str(result.exception_info)
61+
assert (
62+
"could not convert string to float" in exception_str # pandas <3.0
63+
or "Cannot perform reduction 'std' with string dtype" in exception_str # pandas 3.x
6164
)
6265

6366
@parameterize_batch_for_data_sources(

tests/integration/metrics/batch/test_batch_column_types.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,13 @@ def test_pandas_success(batch_for_datasource: Batch) -> None:
3838
metric = BatchColumnTypes()
3939
metric_result = batch.compute_metrics(metric)
4040
assert isinstance(metric_result, BatchColumnTypesResult)
41-
assert metric_result.value == [
42-
{"name": "numbers", "type": dtype("int64")},
43-
{"name": "strings", "type": dtype("O")},
44-
]
41+
assert metric_result.value[0] == {"name": "numbers", "type": dtype("int64")}
42+
# pandas 3.x uses StringDtype for string columns instead of object dtype
43+
strings_entry = metric_result.value[1]
44+
assert strings_entry.name == "strings"
45+
assert strings_entry.type == dtype("O") or isinstance(strings_entry.type, pd.StringDtype), (
46+
f"Expected dtype('O') or StringDtype, got {strings_entry.type}"
47+
)
4548

4649

4750
@parameterize_batch_for_data_sources(

0 commit comments

Comments
 (0)