|
46 | 46 | )
|
47 | 47 |
|
48 | 48 | from pandas.core import (
|
| 49 | + missing, |
49 | 50 | nanops,
|
50 | 51 | ops,
|
51 | 52 | )
|
@@ -865,6 +866,88 @@ def _reduce(
|
865 | 866 | return result
|
866 | 867 | raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
|
867 | 868 |
|
| 869 | + def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> StringArray: |
| 870 | + """ |
| 871 | + Return an ExtensionArray performing an accumulation operation. |
| 872 | +
|
| 873 | + The underlying data type might change. |
| 874 | +
|
| 875 | + Parameters |
| 876 | + ---------- |
| 877 | + name : str |
| 878 | + Name of the function, supported values are: |
| 879 | + - cummin |
| 880 | + - cummax |
| 881 | + - cumsum |
| 882 | + - cumprod |
| 883 | + skipna : bool, default True |
| 884 | + If True, skip NA values. |
| 885 | + **kwargs |
| 886 | + Additional keyword arguments passed to the accumulation function. |
| 887 | + Currently, there is no supported kwarg. |
| 888 | +
|
| 889 | + Returns |
| 890 | + ------- |
| 891 | + array |
| 892 | +
|
| 893 | + Raises |
| 894 | + ------ |
| 895 | + NotImplementedError : subclass does not define accumulations |
| 896 | + """ |
| 897 | + if name == "cumprod": |
| 898 | + msg = f"operation '{name}' not supported for dtype '{self.dtype}'" |
| 899 | + raise TypeError(msg) |
| 900 | + |
| 901 | + # We may need to strip out trailing NA values |
| 902 | + tail: np.ndarray | None = None |
| 903 | + na_mask: np.ndarray | None = None |
| 904 | + ndarray = self._ndarray |
| 905 | + np_func = { |
| 906 | + "cumsum": np.cumsum, |
| 907 | + "cummin": np.minimum.accumulate, |
| 908 | + "cummax": np.maximum.accumulate, |
| 909 | + }[name] |
| 910 | + |
| 911 | + if self._hasna: |
| 912 | + na_mask = cast("npt.NDArray[np.bool_]", isna(ndarray)) |
| 913 | + if np.all(na_mask): |
| 914 | + return type(self)(ndarray) |
| 915 | + if skipna: |
| 916 | + if name == "cumsum": |
| 917 | + ndarray = np.where(na_mask, "", ndarray) |
| 918 | + else: |
| 919 | + # We can retain the running min/max by forward/backward filling. |
| 920 | + ndarray = ndarray.copy() |
| 921 | + missing.pad_or_backfill_inplace( |
| 922 | + ndarray, |
| 923 | + method="pad", |
| 924 | + axis=0, |
| 925 | + ) |
| 926 | + missing.pad_or_backfill_inplace( |
| 927 | + ndarray, |
| 928 | + method="backfill", |
| 929 | + axis=0, |
| 930 | + ) |
| 931 | + else: |
| 932 | + # When not skipping NA values, the result should be null from |
| 933 | + # the first NA value onward. |
| 934 | + idx = np.argmax(na_mask) |
| 935 | + tail = np.empty(len(ndarray) - idx, dtype="object") |
| 936 | + tail[:] = self.dtype.na_value |
| 937 | + ndarray = ndarray[:idx] |
| 938 | + |
| 939 | + # mypy: Cannot call function of unknown type |
| 940 | + np_result = np_func(ndarray) # type: ignore[operator] |
| 941 | + |
| 942 | + if tail is not None: |
| 943 | + np_result = np.hstack((np_result, tail)) |
| 944 | + elif na_mask is not None: |
| 945 | + # Argument 2 to "where" has incompatible type "NAType | float" |
| 946 | + np_result = np.where(na_mask, self.dtype.na_value, np_result) # type: ignore[arg-type] |
| 947 | + |
| 948 | + result = type(self)(np_result) |
| 949 | + return result |
| 950 | + |
868 | 951 | def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
|
869 | 952 | if self.dtype.na_value is np.nan and result is libmissing.NA:
|
870 | 953 | # the masked_reductions use pd.NA -> convert to np.nan
|
|
0 commit comments