|
49 | 49 | )
|
50 | 50 |
|
51 | 51 | from pandas.core import (
|
| 52 | + missing, |
52 | 53 | nanops,
|
53 | 54 | ops,
|
54 | 55 | )
|
@@ -870,6 +871,88 @@ def _reduce(
|
870 | 871 |
|
871 | 872 | raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
|
872 | 873 |
|
| 874 | + def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArray: |
| 875 | + """ |
| 876 | + Return an ExtensionArray performing an accumulation operation. |
| 877 | +
|
| 878 | + The underlying data type might change. |
| 879 | +
|
| 880 | + Parameters |
| 881 | + ---------- |
| 882 | + name : str |
| 883 | + Name of the function, supported values are: |
| 884 | + - cummin |
| 885 | + - cummax |
| 886 | + - cumsum |
| 887 | + - cumprod |
| 888 | + skipna : bool, default True |
| 889 | + If True, skip NA values. |
| 890 | + **kwargs |
| 891 | + Additional keyword arguments passed to the accumulation function. |
| 892 | + Currently, there is no supported kwarg. |
| 893 | +
|
| 894 | + Returns |
| 895 | + ------- |
| 896 | + array |
| 897 | +
|
| 898 | + Raises |
| 899 | + ------ |
| 900 | + NotImplementedError : subclass does not define accumulations |
| 901 | + """ |
| 902 | + if name == "cumprod": |
| 903 | + msg = f"operation '{name}' not supported for dtype '{self.dtype}'" |
| 904 | + raise TypeError(msg) |
| 905 | + |
| 906 | + # We may need to strip out trailing NA values |
| 907 | + tail: np.ndarray | None = None |
| 908 | + na_mask: np.ndarray | None = None |
| 909 | + ndarray = self._ndarray |
| 910 | + np_func = { |
| 911 | + "cumsum": np.cumsum, |
| 912 | + "cummin": np.minimum.accumulate, |
| 913 | + "cummax": np.maximum.accumulate, |
| 914 | + }[name] |
| 915 | + |
| 916 | + if self._hasna: |
| 917 | + na_mask = cast("npt.NDArray[np.bool_]", isna(ndarray)) |
| 918 | + if np.all(na_mask): |
| 919 | + return type(self)(ndarray) |
| 920 | + if skipna: |
| 921 | + if name == "cumsum": |
| 922 | + ndarray = np.where(na_mask, "", ndarray) |
| 923 | + else: |
| 924 | + # We can retain the running min/max by forward/backward filling. |
| 925 | + ndarray = ndarray.copy() |
| 926 | + missing.pad_or_backfill_inplace( |
| 927 | + ndarray, |
| 928 | + method="pad", |
| 929 | + axis=0, |
| 930 | + ) |
| 931 | + missing.pad_or_backfill_inplace( |
| 932 | + ndarray, |
| 933 | + method="backfill", |
| 934 | + axis=0, |
| 935 | + ) |
| 936 | + else: |
| 937 | + # When not skipping NA values, the result should be null from |
| 938 | + # the first NA value onward. |
| 939 | + idx = np.argmax(na_mask) |
| 940 | + tail = np.empty(len(ndarray) - idx, dtype="object") |
| 941 | + tail[:] = self.dtype.na_value |
| 942 | + ndarray = ndarray[:idx] |
| 943 | + |
| 944 | + # mypy: Cannot call function of unknown type |
| 945 | + np_result = np_func(ndarray) # type: ignore[operator] |
| 946 | + |
| 947 | + if tail is not None: |
| 948 | + np_result = np.hstack((np_result, tail)) |
| 949 | + elif na_mask is not None: |
| 950 | + # Argument 2 to "where" has incompatible type "NAType | float" |
| 951 | + np_result = np.where(na_mask, self.dtype.na_value, np_result) # type: ignore[arg-type] |
| 952 | + |
| 953 | + result = type(self)(np_result) |
| 954 | + return result |
| 955 | + |
873 | 956 | def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
|
874 | 957 | if self.dtype.na_value is np.nan and result is libmissing.NA:
|
875 | 958 | # the masked_reductions use pd.NA -> convert to np.nan
|
|
0 commit comments