Open
Description
I am trying to run my own dataset through the python version. It is working as expected until I get to the clustering step. I am running into an error: KeyError: 'dep_var'
.
I believe it traces back to line 673 which is using a dataset created with this method. I believe the error is being caused by a hard-coded 'dep_var'
in the data generator while the dataset used in the transformation method uses the original dataset which is not guaranteed to have this as a column. Full trace is below:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'dep_var'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[51], line 10
1 # %% Evaluate Models
2 configs = ClusteringConfig(
3 dep_var_type=DependentVarType(mmm_data.mmmdata_spec.dep_var_type),
4 cluster_by=ClusterBy.HYPERPARAMETERS,
(...) 7 weights=[1.0, 1.0, 1.0],
8 )
---> 10 robyn.evaluate_models(cluster_config=configs)
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/robyn.py:260, in Robyn.evaluate_models(self, pareto_config, cluster_config, display_plots, export_plots)
249 pareto_config = pareto_config or {
250 "pareto_fronts": "auto",
251 "min_candidates": 100,
252 }
253 pareto_optimizer = ParetoOptimizer(
254 mmm_data=self.mmm_data,
255 model_outputs=self.model_outputs,
(...) 258 holidays_data=self.holidays_data,
259 )
--> 260 self.pareto_result = pareto_optimizer.optimize(**pareto_config)
261 unfiltered_pareto_result = copy.deepcopy(self.pareto_result)
263 # Optional clustering
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/pareto_optimizer.py:142, in ParetoOptimizer.optimize(self, pareto_fronts, min_candidates, calibration_constraint, calibrated)
136 pareto_data = self.prepare_pareto_data(
137 aggregated_data, pareto_fronts, min_candidates, calibrated
138 )
139 pareto_data = self.response_curve_calculator.compute_response_curves(
140 pareto_data, aggregated_data
141 )
--> 142 plotting_data = self.plot_data_generator.generate_plot_data(
143 aggregated_data, pareto_data
144 )
146 self.logger.info("Pareto optimization completed successfully")
147 return ParetoResult(
148 pareto_solutions=plotting_data["pareto_solutions"],
149 pareto_fronts=max(pareto_data.pareto_fronts),
(...) 156 df_caov_pct_all=plotting_data["df_caov_pct_all"],
157 )
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:104, in PlotDataGenerator.generate_plot_data(self, aggregated_data, pareto_data)
102 except Exception as e:
103 self.logger.error(f"Error processing solution {sid}: {str(e)}")
--> 104 raise e
106 pareto_solutions = set()
107 if "sol_id" in xDecompVecCollect.columns:
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:77, in PlotDataGenerator.generate_plot_data(self, aggregated_data, pareto_data)
75 for sid in tqdm(uniqueSol, desc="Processing Solutions", unit="solution"):
76 try:
---> 77 plot_results = self._process_single_solution(
78 sid,
79 plotMediaShare,
80 plotWaterfall,
81 pareto_data,
82 aggregated_data,
83 dt_mod,
84 dt_modRollWind,
85 rw_start_loc,
86 rw_end_loc,
87 )
89 mediaVecCollect = pd.concat(
90 [mediaVecCollect, plot_results["mediaVecCollect"]],
91 ignore_index=True,
92 )
93 xDecompVecCollect = pd.concat(
94 [xDecompVecCollect, plot_results["xDecompVec"]],
95 ignore_index=True,
96 )
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:293, in PlotDataGenerator._process_single_solution(self, sid, plotMediaShare, plotWaterfall, pareto_data, aggregated_data, dt_mod, dt_modRollWind, rw_start_loc, rw_end_loc)
290 self.logger.debug(f"Generated plot6data, fitted vs residual, for sid: {sid}")
292 # 7. Immediate vs carryover response
--> 293 plot7data = self.robyn_immcarr(
294 pareto_data, aggregated_data["result_hyp_param"], sid
295 )
296 self.logger.debug(
297 f"Generated plot7data, immediate vs carryover, for sid: {sid}"
298 )
299 mediaVecCollect = pd.concat(
300 [
301 dt_transformPlot.assign(type="rawMedia", sol_id=sid),
(...) 311 ignore_index=True,
312 )
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:637, in PlotDataGenerator.robyn_immcarr(self, pareto_data, result_hyp_param, sol_id, start_date, end_date)
630 # print("Column 'revenue' renamed to 'dep_var'.")
631 else:
632 # print("Column 'revenue' does not exist.")
633 pass
634 decompCollect = self._model_decomp(
635 inputs={
636 "coefs": coefs_df,
--> 637 "y_pred": dt_saturated_dfs.dt_modSaturated["dep_var"].iloc[
638 rollingWindow
639 ],
640 "dt_modSaturated": dt_saturated_dfs.dt_modSaturated.iloc[rollingWindow],
641 "dt_saturatedImmediate": dt_saturated_dfs.dt_saturatedImmediate.iloc[
642 rollingWindow
643 ],
644 "dt_saturatedCarryover": dt_saturated_dfs.dt_saturatedCarryover.iloc[
645 rollingWindow
646 ],
647 "dt_modRollWind": self.featurized_mmm_data.dt_modRollWind.iloc[
648 rollingWindow
649 ],
650 "refreshAddedStart": start_date,
651 }
652 )
654 # Media decomposition
655 mediaDecompImmediate = decompCollect["mediaDecompImmediate"].drop(
656 columns=["ds", "y"], errors="ignore"
657 )
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'dep_var'
Metadata
Metadata
Assignees
Labels
No labels