Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH1088 Test Migration pt1 #1121

Merged
merged 2 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
DatetimeIndexResampler,
Resampler,
)
from pandas.core.series import Series
import pytest
from typing_extensions import (
TypeAlias,
Expand Down Expand Up @@ -563,8 +562,11 @@ def test_types_median() -> None:

def test_types_iterrows() -> None:
df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]})
# TODO rewrite the below with check assert_type
vv: Iterable[tuple[Hashable, Series]] = df.iterrows()
check(
assert_type(df.iterrows(), "Iterable[tuple[Hashable, pd.Series]]"),
Iterable,
tuple,
)


def test_types_itertuples() -> None:
Expand Down Expand Up @@ -2158,13 +2160,16 @@ def test_types_regressions() -> None:
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
df = pd.concat([s1, s2], axis=1)
# TODO the inference here returns Any, should return Series
ts1: pd.Series = pd.concat([s1, s2], axis=0)
ts2: pd.Series = pd.concat([s1, s2])
ts1 = pd.concat([s1, s2], axis=0)
ts2 = pd.concat([s1, s2])

check(assert_type(ts1, pd.Series), pd.Series)
check(assert_type(ts2, pd.Series), pd.Series)

# https://github.com/microsoft/python-type-stubs/issues/110
check(assert_type(pd.Timestamp("2021-01-01"), pd.Timestamp), datetime.date)
tslist: list[pd.Timestamp] = list(pd.to_datetime(["2022-01-01", "2022-01-02"]))
tslist = list(pd.to_datetime(["2022-01-01", "2022-01-02"]))
check(assert_type(tslist, list[pd.Timestamp]), list, pd.Timestamp)
sseries = pd.Series(tslist)
with pytest_warns_bounded(FutureWarning, "'d' is deprecated", lower="2.2.99"):
sseries + pd.Timedelta(1, "d")
Expand Down
127 changes: 81 additions & 46 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,20 +146,20 @@ def test_types_all() -> None:

def test_types_csv() -> None:
s = pd.Series(data=[1, 2, 3])
csv_df: str = s.to_csv()
check(assert_type(s.to_csv(), str), str)

with ensure_clean() as path:
s.to_csv(path)
s2: pd.DataFrame = pd.read_csv(path)
check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame)

with ensure_clean() as path:
s.to_csv(Path(path))
s3: pd.DataFrame = pd.read_csv(Path(path))
check(assert_type(pd.read_csv(Path(path)), pd.DataFrame), pd.DataFrame)

# This keyword was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
with ensure_clean() as path:
s.to_csv(path, errors="replace")
s4: pd.DataFrame = pd.read_csv(path)
check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame)


def test_types_copy() -> None:
Expand All @@ -177,7 +177,7 @@ def test_types_select() -> None:
lower="2.0.99",
):
s[0]
s[1:]
check(assert_type(s[1:], "pd.Series[int]"), pd.Series, np.integer)


def test_types_iloc_iat() -> None:
Expand Down Expand Up @@ -230,11 +230,11 @@ def test_types_boolean_indexing() -> None:
def test_types_df_to_df_comparison() -> None:
s = pd.Series(data={"col1": [1, 2]})
s2 = pd.Series(data={"col1": [3, 2]})
res_gt: pd.Series = s > s2
res_ge: pd.Series = s >= s2
res_lt: pd.Series = s < s2
res_le: pd.Series = s <= s2
res_e: pd.Series = s == s2
check(assert_type(s > s2, "pd.Series[bool]"), pd.Series, np.bool)
check(assert_type(s >= s2, "pd.Series[bool]"), pd.Series, np.bool)
check(assert_type(s < s2, "pd.Series[bool]"), pd.Series, np.bool)
check(assert_type(s <= s2, "pd.Series[bool]"), pd.Series, np.bool)
check(assert_type(s == s2, "pd.Series[bool]"), pd.Series, np.bool)


def test_types_head_tail() -> None:
Expand Down Expand Up @@ -310,7 +310,11 @@ def test_types_drop_multilevel() -> None:
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
)
s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index)
res: pd.Series = s.drop(labels="first", level=1)
check(
assert_type(s.drop(labels="first", level=1), "pd.Series[int]"),
pd.Series,
np.integer,
)


def test_types_drop_duplicates() -> None:
Expand Down Expand Up @@ -383,7 +387,11 @@ def test_types_sort_index() -> None:
# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
def test_types_sort_index_with_key() -> None:
s = pd.Series([1, 2, 3], index=["a", "B", "c"])
res: pd.Series = s.sort_index(key=lambda k: k.str.lower())
check(
assert_type(s.sort_index(key=lambda k: k.str.lower()), "pd.Series[int]"),
pd.Series,
np.integer,
)


def test_types_sort_values() -> None:
Expand Down Expand Up @@ -413,7 +421,11 @@ def test_types_sort_values() -> None:
# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html
def test_types_sort_values_with_key() -> None:
s = pd.Series([1, 2, 3], index=[2, 3, 1])
res: pd.Series = s.sort_values(key=lambda k: -k)
check(
assert_type(s.sort_values(key=lambda k: -k), "pd.Series[int]"),
pd.Series,
np.integer,
)


def test_types_shift() -> None:
Expand Down Expand Up @@ -441,18 +453,26 @@ def test_types_rank() -> None:

def test_types_mean() -> None:
s = pd.Series([1, 2, 3, np.nan])
f1: float = s.mean()
s1: pd.Series = s.groupby(level=0).mean()
f2: float = s.mean(skipna=False)
f3: float = s.mean(numeric_only=False)
check(assert_type(s.mean(), float), float)
check(
assert_type(s.groupby(level=0).mean(), "pd.Series[float]"),
pd.Series,
np.float64,
)
check(assert_type(s.mean(skipna=False), float), float)
check(assert_type(s.mean(numeric_only=False), float), float)


def test_types_median() -> None:
s = pd.Series([1, 2, 3, np.nan])
f1: float = s.median()
s1: pd.Series = s.groupby(level=0).median()
f2: float = s.median(skipna=False)
f3: float = s.median(numeric_only=False)
check(assert_type(s.median(), float), float)
check(
assert_type(s.groupby(level=0).median(), "pd.Series[float]"),
pd.Series,
np.float64,
)
check(assert_type(s.median(skipna=False), float), float)
check(assert_type(s.median(numeric_only=False), float), float)


def test_types_sum() -> None:
Expand Down Expand Up @@ -630,17 +650,25 @@ def test_types_element_wise_arithmetic() -> None:
s = pd.Series([0, 1, -10])
s2 = pd.Series([7, -5, 10])

res_add1: pd.Series = s + s2
res_add2: pd.Series = s.add(s2, fill_value=0)
check(assert_type(s + s2, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.add(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_sub: pd.Series = s - s2
res_sub2: pd.Series = s.sub(s2, fill_value=0)
# TODO this one below should type pd.Series[int]
check(assert_type(s - s2, pd.Series), pd.Series, np.integer)
check(assert_type(s.sub(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_mul: pd.Series = s * s2
res_mul2: pd.Series = s.mul(s2, fill_value=0)
# TODO these two below should type pd.Series[int]
# check(assert_type(s * s2, "pd.Series[int]"), pd.Series, np.integer )
check(assert_type(s * s2, pd.Series), pd.Series, np.integer)
# check(assert_type(s.mul(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.mul(s2, fill_value=0), pd.Series), pd.Series, np.integer)

res_div: pd.Series = s / s2
res_div2: pd.Series = s.div(s2, fill_value=0)
# TODO these two below should type pd.Series[float]
# check(assert_type(s / s2, "pd.Series[float]"), pd.Series, np.float64)
check(assert_type(s / s2, pd.Series), pd.Series, np.float64)
check(
assert_type(s.div(s2, fill_value=0), "pd.Series[float]"), pd.Series, np.float64
)

res_floordiv: pd.Series = s // s2
res_floordiv2: pd.Series = s.floordiv(s2, fill_value=0)
Expand All @@ -657,8 +685,8 @@ def test_types_element_wise_arithmetic() -> None:
def test_types_scalar_arithmetic() -> None:
s = pd.Series([0, 1, -10])

res_add1: pd.Series = s + 1
res_add2: pd.Series = s.add(1, fill_value=0)
check(assert_type(s + 1, "pd.Series[int]"), pd.Series, np.integer)
check(assert_type(s.add(1, fill_value=0), "pd.Series[int]"), pd.Series, np.integer)

res_sub: pd.Series = s - 1
res_sub2: pd.Series = s.sub(1, fill_value=0)
Expand All @@ -681,8 +709,8 @@ def test_types_scalar_arithmetic() -> None:
res_pow3: pd.Series = s.pow(0.5)


# GH 103
def test_types_complex_arithmetic() -> None:
# GH 103
c = 1 + 1j
s = pd.Series([1.0, 2.0, 3.0])
x = s + c
Expand Down Expand Up @@ -1111,8 +1139,8 @@ def test_types_getitem() -> None:
s = pd.Series({"key": [0, 1, 2, 3]})
key: list[int] = s["key"]
s2 = pd.Series([0, 1, 2, 3])
value: int = s2[0]
s3: pd.Series = s[:2]
check(assert_type(s2[0], int), np.integer)
check(assert_type(s[:2], pd.Series), pd.Series)


def test_types_getitem_by_timestamp() -> None:
Expand All @@ -1123,9 +1151,9 @@ def test_types_getitem_by_timestamp() -> None:

def test_types_eq() -> None:
s1 = pd.Series([1, 2, 3])
res1: pd.Series = s1 == 1
check(assert_type(s1 == 1, "pd.Series[bool]"), pd.Series, np.bool)
s2 = pd.Series([1, 2, 4])
res2: pd.Series = s1 == s2
check(assert_type(s1 == s2, "pd.Series[bool]"), pd.Series, np.bool)


def test_types_rename_axis() -> None:
Expand Down Expand Up @@ -1183,6 +1211,7 @@ def add1(x: int) -> int:
s5 = pd.Series([1, 2, 3]).rename({1: 10})
check(assert_type(s5, "pd.Series[int]"), pd.Series, np.integer)
# inplace
# TODO fix issue with inplace=True returning a Series, cf pandas #60942
s6: None = pd.Series([1, 2, 3]).rename("A", inplace=True)

if TYPE_CHECKING_INVALID_USAGE:
Expand All @@ -1192,7 +1221,7 @@ def add1(x: int) -> int:
def test_types_ne() -> None:
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([1, 2, 4])
s3: pd.Series = s1 != s2
check(assert_type(s1 != s2, "pd.Series[bool]"), pd.Series, np.bool)


def test_types_bfill() -> None:
Expand Down Expand Up @@ -1261,7 +1290,7 @@ def test_types_ffill() -> None:

def test_types_as_type() -> None:
s1 = pd.Series([1, 2, 8, 9])
s2: pd.Series = s1.astype("int32")
check(assert_type(s1.astype("int32"), "pd.Series[int]"), pd.Series, np.int32)


def test_types_dot() -> None:
Expand Down Expand Up @@ -1414,13 +1443,19 @@ def test_cat_accessor() -> None:


def test_cat_ctor_values() -> None:
c1 = pd.Categorical(["a", "b", "a"])
check(assert_type(pd.Categorical(["a", "b", "a"]), pd.Categorical), pd.Categorical)
# GH 95
c2 = pd.Categorical(pd.Series(["a", "b", "a"]))
check(
assert_type(pd.Categorical(pd.Series(["a", "b", "a"])), pd.Categorical),
pd.Categorical,
)
s: Sequence = cast(Sequence, ["a", "b", "a"])
c3 = pd.Categorical(s)
check(assert_type(pd.Categorical(s), pd.Categorical), pd.Categorical)
# GH 107
c4 = pd.Categorical(np.array([1, 2, 3, 1, 1]))
check(
assert_type(pd.Categorical(np.array([1, 2, 3, 1, 1])), pd.Categorical),
pd.Categorical,
)


def test_iloc_getitem_ndarray() -> None:
Expand Down Expand Up @@ -1478,8 +1513,8 @@ def test_iloc_setitem_ndarray() -> None:
def test_types_iter() -> None:
s = pd.Series([1, 2, 3], dtype=int)
iterable: Iterable[int] = s
assert_type(iter(s), Iterator[int])
assert_type(next(iter(s)), int)
check(assert_type(iter(s), Iterator[int]), Iterator, int)
check(assert_type(next(iter(s)), int), int)


def test_types_to_list() -> None:
Expand Down Expand Up @@ -2707,12 +2742,12 @@ def test_astype_bytes(cast_arg: BytesDtypeArg, target_type: type) -> None:
@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_CATEGORICAL_ARGS, ids=repr)
def test_astype_categorical(cast_arg: CategoryDtypeArg, target_type: type) -> None:
s = pd.Series(["a", "b"])
check(s.astype("category"), pd.Series, target_type)
check(s.astype(cast_arg), pd.Series, target_type)

if TYPE_CHECKING:
# pandas category
assert_type(s.astype(pd.CategoricalDtype()), "pd.Series[pd.CategoricalDtype]")
assert_type(s.astype("category"), "pd.Series[pd.CategoricalDtype]")
assert_type(s.astype(cast_arg), "pd.Series[pd.CategoricalDtype]")
# pyarrow dictionary
# assert_type(s.astype("dictionary[pyarrow]"), "pd.Series[Categorical]")

Expand Down