Skip to content

Commit 024b2ad

Browse files
Merge pull request #33 from ONSBigData/i28/revisit-index-calculator
I28/revisit index calculator
2 parents 823c0c1 + eb7e221 commit 024b2ad

3 files changed

Lines changed: 82 additions & 67 deletions

File tree

precon/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
impute_base_prices,
1111
get_base_prices,
1212
get_quality_adjusted_prices,
13-
base_price_fill_shift,
14-
13+
ffill_shift,
14+
1515
)
1616
from precon.chaining import chain, unchain
1717
from precon.contributions import contributions, contributions_with_double_update

precon/base_prices.py

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from precon._validation import _handle_axis, _list_convert
1313
from precon.index_methods import calculate_index
1414
from precon.helpers import (
15-
flip,
15+
axis_slice,
1616
axis_vals_as_frame,
17+
flip,
1718
subset_shared_axis,
1819
)
1920
from precon.weights import reindex_weights_to_indices
@@ -69,13 +70,13 @@ def impute_base_prices(
6970
axis = _handle_axis(axis)
7071

7172
# Subset the metadata axis to match those of indices, for quicker
72-
# handling of function when applied by groupby.
73+
# handling of function when applied by groupby.
7374
to_impute = subset_shared_axis(to_impute, prices, flip(axis))
7475
if weights is not None:
7576
weights = subset_shared_axis(weights, prices, flip(axis))
7677
if adjustments is not None:
7778
adjustments = subset_shared_axis(adjustments, prices, flip(axis))
78-
79+
7980
# Ensure the weights are in the same shape as the prices and
8081
# exclude the prices to impute from the imputation index
8182
# calculation by setting weights to zero.
@@ -84,7 +85,12 @@ def impute_base_prices(
8485
weights = weights.mask(to_impute, 0)
8586

8687
# Get the base prices to start with from given base period.
87-
start_prices = get_base_prices(prices, base_period, axis=axis, ffill_shift=False)
88+
start_prices = get_base_prices(
89+
prices,
90+
base_period,
91+
axis=axis,
92+
fill_shift=False,
93+
)
8894
base_prices = start_prices.copy()
8995

9096
if not shift_imputed_values:
@@ -153,27 +159,22 @@ def impute_base_prices(
153159

154160

155161
def get_base_prices(
156-
prices: pd.DataFrame,
157-
base_period: Union[int, Sequence[int]] = 1,
158-
axis: pd._typing.Axis = 0,
159-
ffill_shift: bool = True,
160-
) -> pd.DataFrame:
161-
"""Return prices at base month with optional ffill and shift.
162-
163-
Default behaviour is to fill forward values within the year and
164-
shift one period, since base prices usually start being used in
165-
the following period up to the next base period. Will return
166-
NaNs in non-base month if ffill=False.
162+
prices: pd.DataFrame,
163+
base_period: Union[int, Sequence[int]] = 1,
164+
axis: pd._typing.Axis = 0,
165+
fill_shift: bool = True,
166+
) -> pd.DataFrame:
167+
"""Return base prices with optional fill and shift.
167168
168169
Parameters
169170
----------
170171
prices : DataFrame
171-
base_period : int, or list of ints
172-
The base periods to select base prices from.
172+
base_period : int, or list of ints, defaults to 1
173+
Base period/s to select base prices from.
173174
axis : {0, 1} int, defaults to 0
174175
Fill and shift direction.
175-
ffill_shift : bool, defaults to True
176-
Switch to forward fill values within the year and shift by one
176+
fill_shift : bool, defaults to True
177+
Switch to forward fill base prices within year and shift one
177178
period.
178179
179180
Returns
@@ -183,35 +184,37 @@ def get_base_prices(
183184
184185
Notes
185186
-----
186-
The base prices are forward filled within each year so that base
187-
prices are not filled when prices have stopped being collected.
188-
When shifting, base prices are shifted by one period. So for a base
189-
period of Jan (int=1) base prices are shifted on to the Feb-Jan+1
190-
time delta in which they apply. A base price is needed for the Jan
191-
period at the start of the series, so the function fills the
192-
shifted values with the unshifted values to achieve this
193-
194-
TODO: Make this work for any base period.
195-
187+
When using fill_shift, the base prices are forward filled within
188+
each year so that base prices are not filled when prices have
189+
stopped being collected. Base prices are also shifted by one period,
190+
so for a base period of Jan (int=1) base prices are shifted on to
191+
the Feb-Jan+1 time delta in which they apply.
192+
196193
"""
197194
base_period = _list_convert(base_period)
198-
195+
199196
# Only prices in the base periods are not NaN.
200197
months = axis_vals_as_frame(prices, axis, converter=lambda x: x.month)
201198
base_prices = prices.where(months.isin(base_period))
202199

203-
if ffill_shift:
204-
# Fill base prices forward within the year and shift one.
205-
return base_price_fill_shift(base_prices, axis)
206-
207-
return base_prices
200+
# Ensure the prices in the first period are taken as base prices
201+
# even if not a period given by base_period parameter.
202+
first_period = axis_slice(0, axis)
208203

204+
if not base_prices.iloc[first_period].isna().all():
205+
base_prices.iloc[first_period] = prices.iloc[first_period]
209206

210-
def base_price_fill_shift(
207+
if fill_shift:
208+
return ffill_shift(base_prices, axis)
209+
else:
210+
return base_prices
211+
212+
213+
def ffill_shift(
211214
base_prices: pd.DataFrame,
212215
axis: int = 0
213216
) -> pd.DataFrame:
214-
"""Fill forward base prices and shift one period.
217+
"""Fill forward base prices within year and shift one period.
215218
216219
Parameters
217220
----------
@@ -233,8 +236,6 @@ def base_price_fill_shift(
233236
period at the start of the series, so the function fills the
234237
shifted values with the unshifted values to achieve this.
235238
236-
TODO: Make this work for any base period.
237-
238239
"""
239240
return (
240241
base_prices.groupby(lambda x: x.year, axis=axis)

precon/pipelines.py

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,65 @@
11
"""A set of common pipeline functions to create National Statistics."""
2-
from typing import Optional
2+
from typing import Optional, Union, Sequence
33

44
import pandas as pd
55
from pandas._typing import Axis
66

77
from precon._validation import _handle_axis
8-
from precon.base_prices import impute_base_prices, get_base_prices
8+
from precon.base_prices import (
9+
impute_base_prices,
10+
get_base_prices,
11+
ffill_shift,
12+
)
913
from precon.index_methods import calculate_index
1014
from precon.helpers import flip
1115

1216

1317
def index_calculator(
14-
prices: pd.DataFrame,
15-
index_method: str,
16-
shift_imputed_values: bool = False,
17-
to_impute: Optional[pd.DataFrame] = None,
18-
weights: Optional[pd.DataFrame] = None,
19-
adjustments: Optional[pd.DataFrame] = None,
20-
exclusions: Optional[pd.DataFrame] = None,
21-
base_period: int = 1,
22-
axis: Axis = 1,
23-
) -> pd.Series:
18+
prices: pd.DataFrame,
19+
index_method: str,
20+
shift_imputed_values: bool = False,
21+
to_impute: Optional[pd.DataFrame] = None,
22+
weights: Optional[pd.DataFrame] = None,
23+
adjustments: Optional[pd.DataFrame] = None,
24+
exclusions: Optional[pd.DataFrame] = None,
25+
base_prices: Optional[pd.DataFrame] = None,
26+
base_period: Union[int, Sequence[int]] = 1,
27+
axis: Axis = 1,
28+
) -> pd.Series:
2429
"""Calculates an index given prices and an index method, with
2530
optional arguments for base price imputation.
2631
2732
Parameters
2833
----------
29-
prices: DataFrame
34+
prices : DataFrame
3035
The prices with which to calculate the index.
31-
method: {'jevons', 'dutot', 'carli', 'laspeyres', 'geometric_laspeyres'}
36+
method : {'jevons', 'dutot', 'carli', 'laspeyres', 'geometric_laspeyres'}
3237
Method to calculate the index.
3338
shift_imputed_values: bool, defaults to True
3439
True if imputed values are shifted onto the following period.
35-
to_impute: DataFrame, optional
40+
to_impute : DataFrame, optional
3641
A boolean mask of where to impute.
37-
weights: DataFrame, optional
42+
weights : DataFrame, optional
3843
The weights to use if the index method requires it.
39-
adjustments: DataFrame, optional
40-
Adjustment values to apply to prices for quality adjustment. If
41-
there is no adjustment for a price then the adjustment value
42-
should be zero.
43-
exclusions: DataFrame, optional
44+
adjustments : DataFrame, optional
45+
Adjustment factors to multiply by base prices for quality
46+
adjustments. A factor of 1 means no adjustment takes place.
47+
exclusions : DataFrame, optional
4448
A boolean mask of prices to exclude from the final index
4549
calculation.
46-
base_period: int, defaults to 1
47-
Base period to select initial base prices from.
50+
base_prices : DataFrame, optional
51+
Base prices to be forward filled and shifted before used in
52+
index calculation.
53+
base_period : int, or list of ints, defaults to 1
54+
Base period/s to select initial base prices from.
4855
axis : {0 or 'index', 1 or 'columns'}, defaults to 0
4956
The axis that holds the time series values.
5057
5158
Returns
5259
-------
5360
Series
5461
The index.
62+
5563
"""
5664
axis = _handle_axis(axis)
5765

@@ -60,8 +68,7 @@ def index_calculator(
6068
# the final index calculation
6169
weights = weights.mask(exclusions, 0)
6270

63-
# Impute the base prices if necessary, if not just take the prices
64-
# in the base period and fill forward
71+
# Impute the base prices if necessary.
6572
if to_impute is not None:
6673
base_prices = impute_base_prices(
6774
prices,
@@ -74,7 +81,14 @@ def index_calculator(
7481
adjustments=adjustments,
7582
)
7683
else:
77-
base_prices = get_base_prices(prices, base_period, axis)
84+
if base_prices is not None:
85+
base_prices = ffill_shift(base_prices, axis)
86+
else:
87+
# Get base prices from prices.
88+
base_prices = get_base_prices(prices, base_period, axis)
89+
90+
if adjustments is not None:
91+
base_prices *= adjustments
7892

7993
return calculate_index(
8094
prices,

0 commit comments

Comments
 (0)