Clean run

gperdrizet · gperdrizet · commit c87f2a786890 · 2025-12-12T19:25:09.000Z
diff --git a/notebooks/unit3/lesson_21/Lesson_21_demo.ipynb b/notebooks/unit3/lesson_21/Lesson_21_demo.ipynb
@@ -9,6 +9,24 @@
     "\n",
     "This notebook demonstrates key concepts and tools for training ensemble models.\n",
     "\n",
+    "1. Baseline models\n",
+    "    - Logistic regression\n",
+    "    - Decision tree\n",
+    "2. Parallel ensembles\n",
+    "    - Voting ensemble\n",
+    "    - Bagging ensemble\n",
+    "    - Random forest\n",
+    "3. Serial (sequential) ensembles\n",
+    "    - AdaBoost\n",
+    "    - Gradient boosting\n",
+    "    - Stacking ensemble\n",
+    "4. Model comparison\n",
+    "    - Score comparison\n",
+    "    - Confusion matrix comparison\n",
+    "5. Model metric optimization\n",
+    "    - ROC_AUC optimized thresholds\n",
+    "    - F1 optimized thresholds\n",
+    "\n",
     "## Notebook set up\n",
     "\n",
     "### Imports"
@@ -21,28 +39,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
-    "# import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
     "\n",
     "from sklearn.datasets import make_classification\n",
-    "from sklearn.model_selection import train_test_split, cross_validate, TunedThresholdClassifierCV\n",
     "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.metrics import ConfusionMatrixDisplay\n",
+    "from sklearn.model_selection import train_test_split, cross_validate, TunedThresholdClassifierCV\n",
     "from sklearn.svm import SVC\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "\n",
+    "# Ensemble models\n",
     "from sklearn.ensemble import (\n",
-    "    VotingClassifier,\n",
-    "    BaggingClassifier,\n",
-    "    RandomForestClassifier,\n",
     "    AdaBoostClassifier,\n",
+    "    BaggingClassifier,\n",
     "    GradientBoostingClassifier,\n",
-    "    StackingClassifier\n",
-    ")\n",
-    "from sklearn.metrics import (\n",
-    "    # roc_auc_score,\n",
-    "    # f1_score,\n",
-    "    # confusion_matrix,\n",
-    "    ConfusionMatrixDisplay\n",
+    "    RandomForestClassifier,\n",
+    "    StackingClassifier,\n",
+    "    VotingClassifier,\n",
     ")"
    ]
   },
@@ -602,7 +616,7 @@
    "id": "78cb3b9b",
    "metadata": {},
    "source": [
-    "### 3.2. Gradient Boosting\n",
+    "### 3.2. Gradient boosting\n",
     "\n",
     "Builds models sequentially where each new model is trained to predict the residual errors of the previous ensemble, using gradient descent to minimize a loss function.\n",
     "\n",
@@ -1016,7 +1030,7 @@
    "id": "5e23b03a",
    "metadata": {},
    "source": [
-    "## 5. Model Metric Optimization"
+    "## 5. Model metric optimization"
    ]
   },
   {
@@ -1034,7 +1048,7 @@
    "id": "fa9ee276",
    "metadata": {},
    "source": [
-    "### 5.1. ROC_AUC Optimized Thresholds"
+    "### 5.1. ROC_AUC optimized thresholds"
    ]
   },
   {
@@ -1196,7 +1210,7 @@
    "id": "783e5fef",
    "metadata": {},
    "source": [
-    "### 5.2. F1 Optimized Thresholds"
+    "### 5.2. F1 optimized thresholds"
    ]
   },
   {