Merge pull request #205 from gperdrizet/dev

gperdrizet · web-flow · commit c536680d416b · 2026-02-03T00:12:00.000-05:00
Updated training function to work for lazy loading or preloading of t…
diff --git a/notebooks/unit4/lesson_31/Lesson_31_activity.ipynb b/notebooks/unit4/lesson_31/Lesson_31_activity.ipynb
@@ -80,7 +80,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "batch_size = 10000 # Training images come in 5 batches of 10,000\n",
+    "batch_size = 1000 # Training images come in 5 batches of 10,000\n",
     "learning_rate = 1e-3\n",
     "epochs = 30\n",
     "print_every = 5 # Print training progress every n epochs\n",
@@ -218,11 +218,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# TODO: Modify this architecture\n",
     "num_classes = 10\n",
     "\n",
     "model_exp1 = nn.Sequential(\n",
-    "    # Conv block: 1 -> 32 channels, 32 x 32 -> 16 x 16\n",
+    "\n",
+    "    # Conv block: grayscale input\n",
     "    nn.Conv2d(1, 32, kernel_size=3, padding=1),\n",
     "    nn.BatchNorm2d(32),\n",
     "    nn.ReLU(),\n",
@@ -234,10 +234,11 @@
     "    \n",
     "    # Classifier\n",
     "    nn.Flatten(),\n",
-    "    nn.Linear(32 * 16 * 16, 128),  # TODO: Update input size if you add more layers\n",
+    "    nn.Linear(32 * 16 * 16, 128),\n",
     "    nn.ReLU(),\n",
     "    nn.Dropout(0.5),\n",
     "    nn.Linear(128, num_classes)\n",
+    "\n",
     ").to(device)\n",
     "\n",
     "trainable_params = sum(p.numel() for p in model_exp1.parameters() if p.requires_grad)\n",
@@ -267,11 +268,14 @@
     "    criterion: nn.Module,\n",
     "    optimizer: optim.Optimizer,\n",
     "    epochs: int = 10,\n",
-    "    print_every: int = 1\n",
+    "    print_every: int = 1,\n",
+    "    device: torch.device = None\n",
     ") -> dict[str, list[float]]:\n",
     "    '''Training loop for PyTorch classification model.\n",
     "    \n",
-    "    Note: Assumes data is already on the correct device.\n",
+    "    Args:\n",
+    "        device: If provided, moves batches to this device on-the-fly.\n",
+    "                If None, assumes data is already on the correct device.\n",
     "    '''\n",
     "\n",
     "    history = {'train_loss': [], 'val_loss': [], 'train_accuracy': [], 'val_accuracy': []}\n",
@@ -285,6 +289,10 @@
     "        total = 0\n",
     "\n",
     "        for images, labels in train_loader:\n",
+    "            \n",
+    "            # Move batch to device if specified\n",
+    "            if device is not None:\n",
+    "                images, labels = images.to(device), labels.to(device)\n",
     "\n",
     "            # Forward pass\n",
     "            optimizer.zero_grad()\n",
@@ -314,6 +322,10 @@
     "        with torch.no_grad():\n",
     "\n",
     "            for images, labels in val_loader:\n",
+    "                \n",
+    "                # Move batch to device if specified\n",
+    "                if device is not None:\n",
+    "                    images, labels = images.to(device), labels.to(device)\n",
     "\n",
     "                outputs = model(images)\n",
     "                loss = criterion(outputs, labels)\n",
@@ -388,11 +400,14 @@
    "source": [
     "def evaluate_model(\n",
     "    model: nn.Module,\n",
-    "    test_loader: DataLoader\n",
+    "    test_loader: DataLoader,\n",
+    "    device: torch.device = None\n",
     ") -> tuple[float, np.ndarray, np.ndarray]:\n",
     "    '''Evaluate model on test set.\n",
     "    \n",
-    "    Note: Assumes data is already on the correct device.\n",
+    "    Args:\n",
+    "        device: If provided, moves batches to this device on-the-fly.\n",
+    "                If None, assumes data is already on the correct device.\n",
     "    '''\n",
     "\n",
     "    model.eval()\n",
@@ -404,6 +419,10 @@
     "    with torch.no_grad():\n",
     "\n",
     "        for images, labels in test_loader:\n",
+    "            \n",
+    "            # Move batch to device if specified\n",
+    "            if device is not None:\n",
+    "                images, labels = images.to(device), labels.to(device)\n",
     "\n",
     "            outputs = model(images)\n",
     "            _, predicted = torch.max(outputs.data, 1)\n",
@@ -495,10 +514,8 @@
    "source": [
     "# TODO: Modify this transform to use RGB instead of grayscale\n",
     "transform_exp2 = transforms.Compose([\n",
-    "    # TODO: Remove Grayscale transform\n",
-    "    transforms.Grayscale(num_output_channels=1),  # Remove this line\n",
     "    transforms.ToTensor(),\n",
-    "    transforms.Normalize((0.5,), (0.5,))  # TODO: Update normalization for 3 channels\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
     "])\n",
     "\n",
     "# Load training and test datasets with RGB\n",
@@ -617,7 +634,8 @@
    "source": [
     "# TODO: Update the first conv layer to accept 3 channels\n",
     "model_exp2 = nn.Sequential(\n",
-    "    # Conv block: TODO: Change input channels from 1 to 3\n",
+    "\n",
+    "    # Conv block: RGB input\n",
     "    nn.Conv2d(1, 32, kernel_size=3, padding=1),\n",
     "    nn.BatchNorm2d(32),\n",
     "    nn.ReLU(),\n",
@@ -633,6 +651,7 @@
     "    nn.ReLU(),\n",
     "    nn.Dropout(0.5),\n",
     "    nn.Linear(128, num_classes)\n",
+    "\n",
     ").to(device)\n",
     "\n",
     "trainable_params = sum(p.numel() for p in model_exp2.parameters() if p.requires_grad)\n",
@@ -761,7 +780,7 @@
     "transform_train_exp3 = transforms.Compose([\n",
     "    # TODO: Add augmentation transforms here (before ToTensor)    \n",
     "    transforms.ToTensor(),\n",
-    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Using RGB\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
     "])\n",
     "\n",
     "# Validation and test transforms (no augmentation)\n",
@@ -833,9 +852,7 @@
    "id": "ba0683e5",
    "metadata": {},
    "source": [
-    "### 3.3. Create Data Loaders with Augmentation\n",
-    "\n",
-    "**Note**: For augmented data, we cannot preload to GPU because each epoch needs different augmentations. We'll use regular DataLoaders."
+    "### 3.3. Create Data Loaders with Augmentation"
    ]
   },
   {
@@ -845,32 +862,53 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create DataLoaders (cannot preload augmented data to GPU)\n",
+    "# For data augmentation, we must NOT preload data to GPU as tensors.\n",
+    "# Transforms need to be applied on-the-fly during each epoch so each \n",
+    "# batch sees different augmented versions of the images.\n",
+    "\n",
+    "# Split training data into train and validation sets using Subset\n",
+    "n_train = int(0.8 * len(train_dataset_exp3))\n",
+    "n_val = len(train_dataset_exp3) - n_train\n",
+    "indices = torch.randperm(len(train_dataset_exp3)).tolist()\n",
+    "\n",
+    "train_subset_exp3 = torch.utils.data.Subset(train_dataset_exp3, indices[:n_train])\n",
+    "val_subset_exp3 = torch.utils.data.Subset(train_dataset_exp3, indices[n_train:])\n",
+    "\n",
+    "print(f'Training samples: {len(train_subset_exp3)}')\n",
+    "print(f'Validation samples: {len(val_subset_exp3)}')\n",
+    "print(f'Test samples: {len(test_dataset_exp3)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70daf63e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create DataLoaders directly from Dataset/Subset objects\n",
+    "# Transforms are applied on-the-fly when batches are loaded\n",
     "train_loader_exp3 = DataLoader(\n",
-    "    train_dataset_exp3,\n",
+    "    train_subset_exp3,\n",
     "    batch_size=batch_size,\n",
     "    shuffle=True\n",
     ")\n",
     "\n",
-    "# For validation/test, we can use the same approach as experiment 2\n",
-    "X_test_exp3 = torch.stack([img for img, _ in test_dataset_exp3]).to(device)\n",
-    "y_test_exp3 = torch.tensor([label for _, label in test_dataset_exp3]).to(device)\n",
-    "\n",
-    "# Create validation split from training data\n",
-    "n_val = int(0.2 * len(train_dataset_exp3))\n",
-    "n_train = len(train_dataset_exp3) - n_val\n",
-    "\n",
-    "train_subset_exp3, val_subset_exp3 = torch.utils.data.random_split(\n",
-    "    train_dataset_exp3,\n",
-    "    [n_train, n_val]\n",
+    "val_loader_exp3 = DataLoader(\n",
+    "    val_subset_exp3,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=False\n",
     ")\n",
     "\n",
-    "val_loader_exp3 = DataLoader(val_subset_exp3, batch_size=batch_size, shuffle=False)\n",
-    "test_tensor_dataset_exp3 = torch.utils.data.TensorDataset(X_test_exp3, y_test_exp3)\n",
-    "test_loader_exp3 = DataLoader(test_tensor_dataset_exp3, batch_size=batch_size, shuffle=False)\n",
+    "test_loader_exp3 = DataLoader(\n",
+    "    test_dataset_exp3,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=False\n",
+    ")\n",
     "\n",
     "print(f'Training batches: {len(train_loader_exp3)}')\n",
-    "print(f'Validation batches: {len(val_loader_exp3)}')"
+    "print(f'Validation batches: {len(val_loader_exp3)}')\n",
+    "print(f'Test batches: {len(test_loader_exp3)}')"
    ]
   },
   {
@@ -890,6 +928,7 @@
    "source": [
     "# Same architecture as Experiment 2 (RGB)\n",
     "model_exp3 = nn.Sequential(\n",
+    "\n",
     "    # Conv block: RGB input\n",
     "    nn.Conv2d(3, 32, kernel_size=3, padding=1),\n",
     "    nn.BatchNorm2d(32),\n",
@@ -906,6 +945,7 @@
     "    nn.ReLU(),\n",
     "    nn.Dropout(0.5),\n",
     "    nn.Linear(128, num_classes)\n",
+    "\n",
     ").to(device)\n",
     "\n",
     "trainable_params = sum(p.numel() for p in model_exp3.parameters() if p.requires_grad)\n",
@@ -932,25 +972,19 @@
     "criterion_exp3 = nn.CrossEntropyLoss()\n",
     "optimizer_exp3 = optim.Adam(model_exp3.parameters(), lr=learning_rate)\n",
     "\n",
+    "# Pass device to move batches on-the-fly (required for on-the-fly augmentation)\n",
     "history_exp3 = train_model(\n",
     "    model=model_exp3,\n",
     "    train_loader=train_loader_exp3,\n",
     "    val_loader=val_loader_exp3,\n",
     "    criterion=criterion_exp3,\n",
     "    optimizer=optimizer_exp3,\n",
     "    epochs=epochs,\n",
-    "    print_every=print_every\n",
+    "    print_every=print_every,\n",
+    "    device=device\n",
     ")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "0faf27d3",
-   "metadata": {},
-   "source": [
-    "### 3.5. Train Model with Augmented Data"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "85ad8659",
@@ -986,8 +1020,10 @@
     "plt.tight_layout()\n",
     "plt.show()\n",
     "\n",
-    "# Test accuracy\n",
-    "test_accuracy_exp3, predictions_exp3, true_labels_exp3 = evaluate_model(model_exp3, test_loader_exp3)\n",
+    "# Test accuracy (pass device for on-the-fly batch loading)\n",
+    "test_accuracy_exp3, predictions_exp3, true_labels_exp3 = evaluate_model(\n",
+    "    model_exp3, test_loader_exp3, device=device\n",
+    ")\n",
     "print(f'\\nExperiment 3 Test Accuracy: {test_accuracy_exp3:.2f}%')"
    ]
   },
@@ -1024,7 +1060,7 @@
     "\n",
     "| Experiment | Description | Test Accuracy | Notes |\n",
     "|------------|-------------|---------------|-------|\n",
-    "| Baseline (demo) | Grayscale, simple architecture | ~45% | From demo notebook |\n",
+    "| Baseline (demo) | Grayscale, simple architecture | ~60% | From demo notebook |\n",
     "| Experiment 1 | Modified architecture | _% | |\n",
     "| Experiment 2 | RGB images | _% | |\n",
     "| Experiment 3 | Image augmentation | _% | |\n",
@@ -1044,11 +1080,31 @@
     "*Your reflections here:*\n",
     "\n"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8155ac6",
+   "metadata": {},
+   "source": []
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,