diff --git a/jobs/kpi-forecasting/README.md b/jobs/kpi-forecasting/README.md index 2f4c51e4..cffc3299 100644 --- a/jobs/kpi-forecasting/README.md +++ b/jobs/kpi-forecasting/README.md @@ -66,8 +66,7 @@ install_name_tool -add_rpath /PATH/TO/CONDA/envs/kpi-forecasting-dev/lib/cmdstan ### Running locally A metric can be forecasted by using a command line argument that passes the relevant YAML file to the `kpi_forecasting.py` script. -[Here are approaches for accessing a Docker container's terminal](https://docs.docker.com/desktop/use-desktop/container/#integrated-terminal). - +[Here are approaches for accessing a Docker container's terminal](https://docs.docker.com/desktop/use-desktop/container/#integrated-terminal). The `--no-write` argument can also be passed to essentially run a test and ensure the pipeline runs end-to-end. For example, the following command forecasts Desktop DAU numbers: ```sh @@ -182,3 +181,8 @@ The forecast objects in this repo implement an interface similar to `sklearn` or The `BaseEnsembleForecast` makes it possible to fit multiple models over the data, where different subsets of the data have different models applied to them. These subsets are referred to as "segments" in the code. Only one kind of model is supported, and different instances of this model are fit over the different segments. The type of model is set by the `model_class` argument, and should be a class that implements the same interface as `BaseForecast`. The `fit` and `predict` methods in `BaseEnsembleForecast` determine which segment each row of incoming data belongs to and uses the `fit` and `predict` methods of the model class on the segment. This can be seen in the `FunnelForecast` object, which uses the `BaseEnsembleForecast` with `ProphetAutotunerForecast` as the model_class. +## Testing +Before merging, run the pipeline with the `--no-write` flag to ensure it runs end-to-end, IE: + +`python ./kpi_forecasting.py --no-write -c ./kpi_forecasting/configs/dau_mobile.yaml` + diff --git a/jobs/kpi-forecasting/kpi_forecasting.py b/jobs/kpi-forecasting/kpi_forecasting.py index e340a567..a5ee32ce 100644 --- a/jobs/kpi-forecasting/kpi_forecasting.py +++ b/jobs/kpi-forecasting/kpi_forecasting.py @@ -1,7 +1,6 @@ import pandas as pd from datetime import datetime, timezone, timedelta import json -import pickle from kpi_forecasting.inputs import CLI, load_yaml from kpi_forecasting.models.prophet_forecast import ( @@ -71,7 +70,7 @@ def get_predict_dates(self, observed_df): or self._default_end_date() ) return pd.DataFrame( - {"submission_date": pd.date_range(start_date, end_date).date} + {"submission_date": pd.date_range(self.start_date, self.end_date).date} ) def fit(self, observed_df): @@ -160,6 +159,7 @@ def _default_end_date(self) -> str: def main() -> None: # Load the config config_path = CLI().args.config + will_write = CLI().args.write pipeline = KPIPipeline(config_path) @@ -169,10 +169,8 @@ def main() -> None: summarized = pipeline.predict_and_summarize( fit_model, predict_dates.copy(), observed_df ) - pipeline.write_results(fit_model, summarized, predict_dates.copy()) - - with open("main_model.pkl", "wb") as f: - pickle.dump(fit_model, f) + if will_write: + pipeline.write_results(fit_model, summarized, predict_dates.copy()) if __name__ == "__main__": diff --git a/jobs/kpi-forecasting/kpi_forecasting/inputs.py b/jobs/kpi-forecasting/kpi_forecasting/inputs.py index 14da5545..4271e47e 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/inputs.py +++ b/jobs/kpi-forecasting/kpi_forecasting/inputs.py @@ -16,6 +16,13 @@ def __post_init__(self) -> None: self.parser.add_argument( "-c", "--config", type=str, help="Path to configuration yaml file" ) + self.parser.add_argument( + "--write", + type=bool, + help="If true, write results", + default=True, + action=argparse.BooleanOptionalAction, + ) self.args = self.parser.parse_args()