INRIA · ArturoAmorQ · Apr 22, 2025 · Apr 19, 2025 · Apr 19, 2025 · Apr 19, 2025
diff --git a/notebooks/parameter_tuning_ex_03.ipynb b/notebooks/parameter_tuning_ex_03.ipynb
@@ -62,6 +62,11 @@
     "- the parameter `with_std` of the `StandardScaler` with possible values `True`\n",
     "  or `False`.\n",
     "\n",
+    "Use mean absolute error as model performance metric. Remember that a score\n",
+    "function should always return higher values for better performance.\n",
+    "Therefore, we should choose the negative version as the score metric:\n",
+    "`neg_mean_absolute_error`.\n",
+    "\n",
     "Notice that in the notebook \"Hyperparameter tuning by randomized-search\" we\n",
     "pass distributions to be sampled by the `RandomizedSearchCV`. In this case we\n",
     "define a fixed grid of hyperparameters to be explored. Using a `GridSearchCV`\n",

diff --git a/notebooks/parameter_tuning_sol_03.ipynb b/notebooks/parameter_tuning_sol_03.ipynb
@@ -68,6 +68,11 @@
     "- the parameter `with_std` of the `StandardScaler` with possible values `True`\n",
     "  or `False`.\n",
     "\n",
+    "Use mean absolute error as model performance metric. Remember that a score\n",
+    "function should always return higher values for better performance.\n",
+    "Therefore, we should choose the negative version as the score metric:\n",
+    "`neg_mean_absolute_error`.\n",
+    "\n",
     "Notice that in the notebook \"Hyperparameter tuning by randomized-search\" we\n",
     "pass distributions to be sampled by the `RandomizedSearchCV`. In this case we\n",
     "define a fixed grid of hyperparameters to be explored. Using a `GridSearchCV`\n",
@@ -103,6 +108,7 @@
     "model_random_search = RandomizedSearchCV(\n",
     "    model,\n",
     "    param_distributions=param_distributions,\n",
+    "    scoring=\"neg_mean_absolute_error\",\n",
     "    n_iter=20,\n",
     "    n_jobs=2,\n",
     "    verbose=1,\n",
@@ -139,6 +145,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "lines_to_next_cell": 0,
     "tags": [
      "solution"
     ]
@@ -150,6 +157,27 @@
     "cv_results = pd.DataFrame(model_random_search.cv_results_)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we used `neg_mean_absolute_error` as score metric, we should multiply the\n",
+    "score results with minus 1 to get mean absolute error values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "solution"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "cv_results[\"mean_test_score\"] *= -1"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -180,9 +208,7 @@
     "}\n",
     "\n",
     "cv_results = cv_results.rename(columns=column_name_mapping)\n",
-    "cv_results = cv_results[column_name_mapping.values()].sort_values(\n",
-    "    \"mean test score\", ascending=False\n",
-    ")"
+    "cv_results = cv_results[column_name_mapping.values()].sort_values(\"mean test score\")"
    ]
   },
   {
@@ -249,7 +275,7 @@
     "holding on any axis of the parallel coordinate plot. You can then slide (move)\n",
     "the range selection and cross two selections to see the intersections.\n",
     "\n",
-    "Selecting the best performing models (i.e. above R2 score of ~0.68), we\n",
+    "Selecting the best performing models (i.e. below MEA score of ~47), we\n",
     "observe that **in this case**:\n",
     "\n",
     "- scaling the data is important. All the best performing models use scaled\n",

diff --git a/python_scripts/parameter_tuning_ex_03.py b/python_scripts/parameter_tuning_ex_03.py
@@ -50,6 +50,11 @@
 # - the parameter `with_std` of the `StandardScaler` with possible values `True`
 #   or `False`.
 #
+# Use mean absolute error as model performance metric. Remember that a score
+# function should always return higher values for better performance.
+# Therefore, we should choose the negative version as the score metric:
+# `neg_mean_absolute_error`.
+#
 # Notice that in the notebook "Hyperparameter tuning by randomized-search" we
 # pass distributions to be sampled by the `RandomizedSearchCV`. In this case we
 # define a fixed grid of hyperparameters to be explored. Using a `GridSearchCV`

diff --git a/python_scripts/parameter_tuning_sol_03.py b/python_scripts/parameter_tuning_sol_03.py
@@ -50,6 +50,11 @@
 # - the parameter `with_std` of the `StandardScaler` with possible values `True`
 #   or `False`.
 #
+# Use mean absolute error as model performance metric. Remember that a score
+# function should always return higher values for better performance.
+# Therefore, we should choose the negative version as the score metric:
+# `neg_mean_absolute_error`.
+#
 # Notice that in the notebook "Hyperparameter tuning by randomized-search" we
 # pass distributions to be sampled by the `RandomizedSearchCV`. In this case we
 # define a fixed grid of hyperparameters to be explored. Using a `GridSearchCV`
@@ -79,6 +84,7 @@
 model_random_search = RandomizedSearchCV(
     model,
     param_distributions=param_distributions,
+    scoring="neg_mean_absolute_error",
     n_iter=20,
     n_jobs=2,
     verbose=1,
@@ -106,6 +112,12 @@
 import pandas as pd
 
 cv_results = pd.DataFrame(model_random_search.cv_results_)
+# %% [markdown]
+# As we used `neg_mean_absolute_error` as score metric, we should multiply the
+# score results with minus 1 to get mean absolute error values:
+
+# %% tags=["solution"]
+cv_results["mean_test_score"] *= -1
 
 # %% [markdown] tags=["solution"]
 # To simplify the axis of the plot, we rename the column of the dataframe and
@@ -121,7 +133,7 @@
 
 cv_results = cv_results.rename(columns=column_name_mapping)
 cv_results = cv_results[column_name_mapping.values()].sort_values(
-    "mean test score", ascending=False
+    "mean test score"
 )
 
 # %% [markdown] tags=["solution"]
@@ -153,7 +165,7 @@
 # holding on any axis of the parallel coordinate plot. You can then slide (move)
 # the range selection and cross two selections to see the intersections.
 #
-# Selecting the best performing models (i.e. above R2 score of ~0.68), we
+# Selecting the best performing models (i.e. below MEA score of ~47), we
 # observe that **in this case**:
 #
 # - scaling the data is important. All the best performing models use scaled