Skip to content

Commit 6a00eb6

Browse files
Added subsetting to aggregate and impute_base_prices
The subsetting using the function subset_shared_axis added to helpers.py is useful in the event of the functions being applied in a groupby. Because the arguments are not also chunked, into groups, this extra step does that so that large DataFrame's aren't being computed each time.
1 parent f970991 commit 6a00eb6

3 files changed

Lines changed: 62 additions & 3 deletions

File tree

precon/aggregation.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313

1414
from precon.weights import get_weight_shares, reindex_weights_to_indices
15-
from precon.helpers import flip
15+
from precon.helpers import flip, subset_shared_axis
1616
from precon._validation import _handle_axis
1717

1818

@@ -64,6 +64,10 @@ def aggregate(
6464
'geomean': _geo_mean_aggregate,
6565
}
6666
agg_method = methods_lib.get(method)
67+
68+
# Subset the metadata axis to match those of indices, for quicker
69+
# handling of function when applied by groupby.
70+
weights = subset_shared_axis(weights, indices, axis)
6771

6872
# Make sure that the indices and weights have the same time series
6973
# axis before aggregating.

precon/helpers.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# -*- coding: utf-8 -*-
2-
import pandas as pd
2+
from typing import Sequence, Union, Optional
33

4+
import pandas as pd
5+
from pandas._typing import Level
46

57
def reindex_and_fill(df, other, first='ffill', axis=0):
68
"""Reindex and fill the DataFrame or Series by given index and axis.
@@ -156,3 +158,48 @@ def reduce_to_only_differing_periods(df, axis):
156158
series to only the periods where the values have changed.
157159
"""
158160
return df[df.ne(df.shift(1, axis=axis))].dropna()
161+
162+
163+
def subset_shared_axis(
164+
df: pd.DataFrame,
165+
other: pd.DataFrame,
166+
axis: int = 0,
167+
droplevel: Optional[Union[Level, Sequence[Level]]] = None,
168+
) -> pd.DataFrame:
169+
"""Subsets a DataFrame by it's shared axis with the other.
170+
171+
Optional behaviour to drop levels of the other axis before
172+
looking for the shared axis.
173+
174+
Parameters
175+
----------
176+
df : DataFrame
177+
other : Object of the same data type
178+
Its indices on the given axis are used to define the subset of
179+
indices for this object.
180+
axis : {0, 1} int, default 0
181+
The axis to subset the shared index.
182+
droplevel : int, str, or list-like
183+
If a string is given, must be the name of a level. If
184+
list-like, elements must be names or positional indexes of
185+
levels.
186+
187+
Returns
188+
-------
189+
DataFrame
190+
The subsetted frame.
191+
192+
"""
193+
if not df.axes[axis].eq(other.axes[axis]):
194+
195+
other_axis = other.axes[axis]
196+
197+
if droplevel:
198+
other_axis = other_axis.droplevel(droplevel)
199+
200+
shared_axis = df.axes[axis].isin(other_axis)
201+
return df.loc[axis_slice(shared_axis, axis)]
202+
203+
else:
204+
return df
205+

precon/imputation.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from precon._validation import _handle_axis
1313
from precon.index_methods import calculate_index
14-
from precon.helpers import flip, axis_slice
14+
from precon.helpers import flip, axis_slice, subset_shared_axis
1515
from precon.weights import reindex_weights_to_indices
1616

1717

@@ -64,6 +64,14 @@ def impute_base_prices(
6464
"""
6565
axis = _handle_axis(axis)
6666

67+
# Subset the metadata axis to match those of indices, for quicker
68+
# handling of function when applied by groupby.
69+
to_impute = subset_shared_axis(to_impute, prices, flip(axis))
70+
if weights is not None:
71+
weights = subset_shared_axis(weights, prices, flip(axis))
72+
if adjustments is not None:
73+
adjustments = subset_shared_axis(adjustments, prices, flip(axis))
74+
6775
# Ensure the weights are in the same shape as the prices and
6876
# exclude the prices to impute from the imputation index
6977
# calculation by setting weights to zero.

0 commit comments

Comments
 (0)