Skip to content

Error in robyn.evaluate_models #1240

Open
@JJohnson-DA

Description

@JJohnson-DA

I am trying to run my own dataset through the python version. It is working as expected until I get to the clustering step. I am running into an error: KeyError: 'dep_var'.

I believe it traces back to line 673 which is using a dataset created with this method. I believe the error is being caused by a hard-coded 'dep_var' in the data generator while the dataset used in the transformation method uses the original dataset which is not guaranteed to have this as a column. Full trace is below:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'dep_var'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[51], line 10
      1 # %% Evaluate Models
      2 configs = ClusteringConfig(
      3     dep_var_type=DependentVarType(mmm_data.mmmdata_spec.dep_var_type),
      4     cluster_by=ClusterBy.HYPERPARAMETERS,
   (...)      7     weights=[1.0, 1.0, 1.0],
      8 )
---> 10 robyn.evaluate_models(cluster_config=configs)

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/robyn.py:260, in Robyn.evaluate_models(self, pareto_config, cluster_config, display_plots, export_plots)
    249 pareto_config = pareto_config or {
    250     "pareto_fronts": "auto",
    251     "min_candidates": 100,
    252 }
    253 pareto_optimizer = ParetoOptimizer(
    254     mmm_data=self.mmm_data,
    255     model_outputs=self.model_outputs,
   (...)    258     holidays_data=self.holidays_data,
    259 )
--> 260 self.pareto_result = pareto_optimizer.optimize(**pareto_config)
    261 unfiltered_pareto_result = copy.deepcopy(self.pareto_result)
    263 # Optional clustering

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/pareto_optimizer.py:142, in ParetoOptimizer.optimize(self, pareto_fronts, min_candidates, calibration_constraint, calibrated)
    136 pareto_data = self.prepare_pareto_data(
    137     aggregated_data, pareto_fronts, min_candidates, calibrated
    138 )
    139 pareto_data = self.response_curve_calculator.compute_response_curves(
    140     pareto_data, aggregated_data
    141 )
--> 142 plotting_data = self.plot_data_generator.generate_plot_data(
    143     aggregated_data, pareto_data
    144 )
    146 self.logger.info("Pareto optimization completed successfully")
    147 return ParetoResult(
    148     pareto_solutions=plotting_data["pareto_solutions"],
    149     pareto_fronts=max(pareto_data.pareto_fronts),
   (...)    156     df_caov_pct_all=plotting_data["df_caov_pct_all"],
    157 )

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:104, in PlotDataGenerator.generate_plot_data(self, aggregated_data, pareto_data)
    102         except Exception as e:
    103             self.logger.error(f"Error processing solution {sid}: {str(e)}")
--> 104             raise e
    106 pareto_solutions = set()
    107 if "sol_id" in xDecompVecCollect.columns:

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:77, in PlotDataGenerator.generate_plot_data(self, aggregated_data, pareto_data)
     75 for sid in tqdm(uniqueSol, desc="Processing Solutions", unit="solution"):
     76     try:
---> 77         plot_results = self._process_single_solution(
     78             sid,
     79             plotMediaShare,
     80             plotWaterfall,
     81             pareto_data,
     82             aggregated_data,
     83             dt_mod,
     84             dt_modRollWind,
     85             rw_start_loc,
     86             rw_end_loc,
     87         )
     89         mediaVecCollect = pd.concat(
     90             [mediaVecCollect, plot_results["mediaVecCollect"]],
     91             ignore_index=True,
     92         )
     93         xDecompVecCollect = pd.concat(
     94             [xDecompVecCollect, plot_results["xDecompVec"]],
     95             ignore_index=True,
     96         )

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:293, in PlotDataGenerator._process_single_solution(self, sid, plotMediaShare, plotWaterfall, pareto_data, aggregated_data, dt_mod, dt_modRollWind, rw_start_loc, rw_end_loc)
    290 self.logger.debug(f"Generated plot6data, fitted vs residual, for sid: {sid}")
    292 # 7. Immediate vs carryover response
--> 293 plot7data = self.robyn_immcarr(
    294     pareto_data, aggregated_data["result_hyp_param"], sid
    295 )
    296 self.logger.debug(
    297     f"Generated plot7data, immediate vs carryover, for sid: {sid}"
    298 )
    299 mediaVecCollect = pd.concat(
    300     [
    301         dt_transformPlot.assign(type="rawMedia", sol_id=sid),
   (...)    311     ignore_index=True,
    312 )

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/robyn/modeling/pareto/plot_data_generator.py:637, in PlotDataGenerator.robyn_immcarr(self, pareto_data, result_hyp_param, sol_id, start_date, end_date)
    630     # print("Column 'revenue' renamed to 'dep_var'.")
    631 else:
    632     # print("Column 'revenue' does not exist.")
    633     pass
    634 decompCollect = self._model_decomp(
    635     inputs={
    636         "coefs": coefs_df,
--> 637         "y_pred": dt_saturated_dfs.dt_modSaturated["dep_var"].iloc[
    638             rollingWindow
    639         ],
    640         "dt_modSaturated": dt_saturated_dfs.dt_modSaturated.iloc[rollingWindow],
    641         "dt_saturatedImmediate": dt_saturated_dfs.dt_saturatedImmediate.iloc[
    642             rollingWindow
    643         ],
    644         "dt_saturatedCarryover": dt_saturated_dfs.dt_saturatedCarryover.iloc[
    645             rollingWindow
    646         ],
    647         "dt_modRollWind": self.featurized_mmm_data.dt_modRollWind.iloc[
    648             rollingWindow
    649         ],
    650         "refreshAddedStart": start_date,
    651     }
    652 )
    654 # Media decomposition
    655 mediaDecompImmediate = decompCollect["mediaDecompImmediate"].drop(
    656     columns=["ds", "y"], errors="ignore"
    657 )

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]

File /opt/miniconda3/envs/robyn_mmm/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)

KeyError: 'dep_var'

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions