|
80 | 80 | "metadata": {}, |
81 | 81 | "outputs": [], |
82 | 82 | "source": [ |
83 | | - "batch_size = 10000 # Training images come in 5 batches of 10,000\n", |
| 83 | + "batch_size = 1000 # Training images come in 5 batches of 10,000\n", |
84 | 84 | "learning_rate = 1e-3\n", |
85 | 85 | "epochs = 30\n", |
86 | 86 | "print_every = 5 # Print training progress every n epochs\n", |
|
218 | 218 | "metadata": {}, |
219 | 219 | "outputs": [], |
220 | 220 | "source": [ |
221 | | - "# TODO: Modify this architecture\n", |
222 | 221 | "num_classes = 10\n", |
223 | 222 | "\n", |
224 | 223 | "model_exp1 = nn.Sequential(\n", |
225 | | - " # Conv block: 1 -> 32 channels, 32 x 32 -> 16 x 16\n", |
| 224 | + "\n", |
| 225 | + " # Conv block: grayscale input\n", |
226 | 226 | " nn.Conv2d(1, 32, kernel_size=3, padding=1),\n", |
227 | 227 | " nn.BatchNorm2d(32),\n", |
228 | 228 | " nn.ReLU(),\n", |
|
234 | 234 | " \n", |
235 | 235 | " # Classifier\n", |
236 | 236 | " nn.Flatten(),\n", |
237 | | - " nn.Linear(32 * 16 * 16, 128), # TODO: Update input size if you add more layers\n", |
| 237 | + " nn.Linear(32 * 16 * 16, 128),\n", |
238 | 238 | " nn.ReLU(),\n", |
239 | 239 | " nn.Dropout(0.5),\n", |
240 | 240 | " nn.Linear(128, num_classes)\n", |
| 241 | + "\n", |
241 | 242 | ").to(device)\n", |
242 | 243 | "\n", |
243 | 244 | "trainable_params = sum(p.numel() for p in model_exp1.parameters() if p.requires_grad)\n", |
|
267 | 268 | " criterion: nn.Module,\n", |
268 | 269 | " optimizer: optim.Optimizer,\n", |
269 | 270 | " epochs: int = 10,\n", |
270 | | - " print_every: int = 1\n", |
| 271 | + " print_every: int = 1,\n", |
| 272 | + " device: torch.device = None\n", |
271 | 273 | ") -> dict[str, list[float]]:\n", |
272 | 274 | " '''Training loop for PyTorch classification model.\n", |
273 | 275 | " \n", |
274 | | - " Note: Assumes data is already on the correct device.\n", |
| 276 | + " Args:\n", |
| 277 | + " device: If provided, moves batches to this device on-the-fly.\n", |
| 278 | + " If None, assumes data is already on the correct device.\n", |
275 | 279 | " '''\n", |
276 | 280 | "\n", |
277 | 281 | " history = {'train_loss': [], 'val_loss': [], 'train_accuracy': [], 'val_accuracy': []}\n", |
|
285 | 289 | " total = 0\n", |
286 | 290 | "\n", |
287 | 291 | " for images, labels in train_loader:\n", |
| 292 | + " \n", |
| 293 | + " # Move batch to device if specified\n", |
| 294 | + " if device is not None:\n", |
| 295 | + " images, labels = images.to(device), labels.to(device)\n", |
288 | 296 | "\n", |
289 | 297 | " # Forward pass\n", |
290 | 298 | " optimizer.zero_grad()\n", |
|
314 | 322 | " with torch.no_grad():\n", |
315 | 323 | "\n", |
316 | 324 | " for images, labels in val_loader:\n", |
| 325 | + " \n", |
| 326 | + " # Move batch to device if specified\n", |
| 327 | + " if device is not None:\n", |
| 328 | + " images, labels = images.to(device), labels.to(device)\n", |
317 | 329 | "\n", |
318 | 330 | " outputs = model(images)\n", |
319 | 331 | " loss = criterion(outputs, labels)\n", |
|
388 | 400 | "source": [ |
389 | 401 | "def evaluate_model(\n", |
390 | 402 | " model: nn.Module,\n", |
391 | | - " test_loader: DataLoader\n", |
| 403 | + " test_loader: DataLoader,\n", |
| 404 | + " device: torch.device = None\n", |
392 | 405 | ") -> tuple[float, np.ndarray, np.ndarray]:\n", |
393 | 406 | " '''Evaluate model on test set.\n", |
394 | 407 | " \n", |
395 | | - " Note: Assumes data is already on the correct device.\n", |
| 408 | + " Args:\n", |
| 409 | + " device: If provided, moves batches to this device on-the-fly.\n", |
| 410 | + " If None, assumes data is already on the correct device.\n", |
396 | 411 | " '''\n", |
397 | 412 | "\n", |
398 | 413 | " model.eval()\n", |
|
404 | 419 | " with torch.no_grad():\n", |
405 | 420 | "\n", |
406 | 421 | " for images, labels in test_loader:\n", |
| 422 | + " \n", |
| 423 | + " # Move batch to device if specified\n", |
| 424 | + " if device is not None:\n", |
| 425 | + " images, labels = images.to(device), labels.to(device)\n", |
407 | 426 | "\n", |
408 | 427 | " outputs = model(images)\n", |
409 | 428 | " _, predicted = torch.max(outputs.data, 1)\n", |
|
495 | 514 | "source": [ |
496 | 515 | "# TODO: Modify this transform to use RGB instead of grayscale\n", |
497 | 516 | "transform_exp2 = transforms.Compose([\n", |
498 | | - " # TODO: Remove Grayscale transform\n", |
499 | | - " transforms.Grayscale(num_output_channels=1), # Remove this line\n", |
500 | 517 | " transforms.ToTensor(),\n", |
501 | | - " transforms.Normalize((0.5,), (0.5,)) # TODO: Update normalization for 3 channels\n", |
| 518 | + " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n", |
502 | 519 | "])\n", |
503 | 520 | "\n", |
504 | 521 | "# Load training and test datasets with RGB\n", |
|
617 | 634 | "source": [ |
618 | 635 | "# TODO: Update the first conv layer to accept 3 channels\n", |
619 | 636 | "model_exp2 = nn.Sequential(\n", |
620 | | - " # Conv block: TODO: Change input channels from 1 to 3\n", |
| 637 | + "\n", |
| 638 | + " # Conv block: RGB input\n", |
621 | 639 | " nn.Conv2d(1, 32, kernel_size=3, padding=1),\n", |
622 | 640 | " nn.BatchNorm2d(32),\n", |
623 | 641 | " nn.ReLU(),\n", |
|
633 | 651 | " nn.ReLU(),\n", |
634 | 652 | " nn.Dropout(0.5),\n", |
635 | 653 | " nn.Linear(128, num_classes)\n", |
| 654 | + "\n", |
636 | 655 | ").to(device)\n", |
637 | 656 | "\n", |
638 | 657 | "trainable_params = sum(p.numel() for p in model_exp2.parameters() if p.requires_grad)\n", |
|
761 | 780 | "transform_train_exp3 = transforms.Compose([\n", |
762 | 781 | " # TODO: Add augmentation transforms here (before ToTensor) \n", |
763 | 782 | " transforms.ToTensor(),\n", |
764 | | - " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Using RGB\n", |
| 783 | + " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n", |
765 | 784 | "])\n", |
766 | 785 | "\n", |
767 | 786 | "# Validation and test transforms (no augmentation)\n", |
|
833 | 852 | "id": "ba0683e5", |
834 | 853 | "metadata": {}, |
835 | 854 | "source": [ |
836 | | - "### 3.3. Create Data Loaders with Augmentation\n", |
837 | | - "\n", |
838 | | - "**Note**: For augmented data, we cannot preload to GPU because each epoch needs different augmentations. We'll use regular DataLoaders." |
| 855 | + "### 3.3. Create Data Loaders with Augmentation" |
839 | 856 | ] |
840 | 857 | }, |
841 | 858 | { |
|
845 | 862 | "metadata": {}, |
846 | 863 | "outputs": [], |
847 | 864 | "source": [ |
848 | | - "# Create DataLoaders (cannot preload augmented data to GPU)\n", |
| 865 | + "# For data augmentation, we must NOT preload data to GPU as tensors.\n", |
| 866 | + "# Transforms need to be applied on-the-fly during each epoch so each \n", |
| 867 | + "# batch sees different augmented versions of the images.\n", |
| 868 | + "\n", |
| 869 | + "# Split training data into train and validation sets using Subset\n", |
| 870 | + "n_train = int(0.8 * len(train_dataset_exp3))\n", |
| 871 | + "n_val = len(train_dataset_exp3) - n_train\n", |
| 872 | + "indices = torch.randperm(len(train_dataset_exp3)).tolist()\n", |
| 873 | + "\n", |
| 874 | + "train_subset_exp3 = torch.utils.data.Subset(train_dataset_exp3, indices[:n_train])\n", |
| 875 | + "val_subset_exp3 = torch.utils.data.Subset(train_dataset_exp3, indices[n_train:])\n", |
| 876 | + "\n", |
| 877 | + "print(f'Training samples: {len(train_subset_exp3)}')\n", |
| 878 | + "print(f'Validation samples: {len(val_subset_exp3)}')\n", |
| 879 | + "print(f'Test samples: {len(test_dataset_exp3)}')" |
| 880 | + ] |
| 881 | + }, |
| 882 | + { |
| 883 | + "cell_type": "code", |
| 884 | + "execution_count": null, |
| 885 | + "id": "70daf63e", |
| 886 | + "metadata": {}, |
| 887 | + "outputs": [], |
| 888 | + "source": [ |
| 889 | + "# Create DataLoaders directly from Dataset/Subset objects\n", |
| 890 | + "# Transforms are applied on-the-fly when batches are loaded\n", |
849 | 891 | "train_loader_exp3 = DataLoader(\n", |
850 | | - " train_dataset_exp3,\n", |
| 892 | + " train_subset_exp3,\n", |
851 | 893 | " batch_size=batch_size,\n", |
852 | 894 | " shuffle=True\n", |
853 | 895 | ")\n", |
854 | 896 | "\n", |
855 | | - "# For validation/test, we can use the same approach as experiment 2\n", |
856 | | - "X_test_exp3 = torch.stack([img for img, _ in test_dataset_exp3]).to(device)\n", |
857 | | - "y_test_exp3 = torch.tensor([label for _, label in test_dataset_exp3]).to(device)\n", |
858 | | - "\n", |
859 | | - "# Create validation split from training data\n", |
860 | | - "n_val = int(0.2 * len(train_dataset_exp3))\n", |
861 | | - "n_train = len(train_dataset_exp3) - n_val\n", |
862 | | - "\n", |
863 | | - "train_subset_exp3, val_subset_exp3 = torch.utils.data.random_split(\n", |
864 | | - " train_dataset_exp3,\n", |
865 | | - " [n_train, n_val]\n", |
| 897 | + "val_loader_exp3 = DataLoader(\n", |
| 898 | + " val_subset_exp3,\n", |
| 899 | + " batch_size=batch_size,\n", |
| 900 | + " shuffle=False\n", |
866 | 901 | ")\n", |
867 | 902 | "\n", |
868 | | - "val_loader_exp3 = DataLoader(val_subset_exp3, batch_size=batch_size, shuffle=False)\n", |
869 | | - "test_tensor_dataset_exp3 = torch.utils.data.TensorDataset(X_test_exp3, y_test_exp3)\n", |
870 | | - "test_loader_exp3 = DataLoader(test_tensor_dataset_exp3, batch_size=batch_size, shuffle=False)\n", |
| 903 | + "test_loader_exp3 = DataLoader(\n", |
| 904 | + " test_dataset_exp3,\n", |
| 905 | + " batch_size=batch_size,\n", |
| 906 | + " shuffle=False\n", |
| 907 | + ")\n", |
871 | 908 | "\n", |
872 | 909 | "print(f'Training batches: {len(train_loader_exp3)}')\n", |
873 | | - "print(f'Validation batches: {len(val_loader_exp3)}')" |
| 910 | + "print(f'Validation batches: {len(val_loader_exp3)}')\n", |
| 911 | + "print(f'Test batches: {len(test_loader_exp3)}')" |
874 | 912 | ] |
875 | 913 | }, |
876 | 914 | { |
|
890 | 928 | "source": [ |
891 | 929 | "# Same architecture as Experiment 2 (RGB)\n", |
892 | 930 | "model_exp3 = nn.Sequential(\n", |
| 931 | + "\n", |
893 | 932 | " # Conv block: RGB input\n", |
894 | 933 | " nn.Conv2d(3, 32, kernel_size=3, padding=1),\n", |
895 | 934 | " nn.BatchNorm2d(32),\n", |
|
906 | 945 | " nn.ReLU(),\n", |
907 | 946 | " nn.Dropout(0.5),\n", |
908 | 947 | " nn.Linear(128, num_classes)\n", |
| 948 | + "\n", |
909 | 949 | ").to(device)\n", |
910 | 950 | "\n", |
911 | 951 | "trainable_params = sum(p.numel() for p in model_exp3.parameters() if p.requires_grad)\n", |
|
932 | 972 | "criterion_exp3 = nn.CrossEntropyLoss()\n", |
933 | 973 | "optimizer_exp3 = optim.Adam(model_exp3.parameters(), lr=learning_rate)\n", |
934 | 974 | "\n", |
| 975 | + "# Pass device to move batches on-the-fly (required for on-the-fly augmentation)\n", |
935 | 976 | "history_exp3 = train_model(\n", |
936 | 977 | " model=model_exp3,\n", |
937 | 978 | " train_loader=train_loader_exp3,\n", |
938 | 979 | " val_loader=val_loader_exp3,\n", |
939 | 980 | " criterion=criterion_exp3,\n", |
940 | 981 | " optimizer=optimizer_exp3,\n", |
941 | 982 | " epochs=epochs,\n", |
942 | | - " print_every=print_every\n", |
| 983 | + " print_every=print_every,\n", |
| 984 | + " device=device\n", |
943 | 985 | ")" |
944 | 986 | ] |
945 | 987 | }, |
946 | | - { |
947 | | - "cell_type": "markdown", |
948 | | - "id": "0faf27d3", |
949 | | - "metadata": {}, |
950 | | - "source": [ |
951 | | - "### 3.5. Train Model with Augmented Data" |
952 | | - ] |
953 | | - }, |
954 | 988 | { |
955 | 989 | "cell_type": "markdown", |
956 | 990 | "id": "85ad8659", |
|
986 | 1020 | "plt.tight_layout()\n", |
987 | 1021 | "plt.show()\n", |
988 | 1022 | "\n", |
989 | | - "# Test accuracy\n", |
990 | | - "test_accuracy_exp3, predictions_exp3, true_labels_exp3 = evaluate_model(model_exp3, test_loader_exp3)\n", |
| 1023 | + "# Test accuracy (pass device for on-the-fly batch loading)\n", |
| 1024 | + "test_accuracy_exp3, predictions_exp3, true_labels_exp3 = evaluate_model(\n", |
| 1025 | + " model_exp3, test_loader_exp3, device=device\n", |
| 1026 | + ")\n", |
991 | 1027 | "print(f'\\nExperiment 3 Test Accuracy: {test_accuracy_exp3:.2f}%')" |
992 | 1028 | ] |
993 | 1029 | }, |
|
1024 | 1060 | "\n", |
1025 | 1061 | "| Experiment | Description | Test Accuracy | Notes |\n", |
1026 | 1062 | "|------------|-------------|---------------|-------|\n", |
1027 | | - "| Baseline (demo) | Grayscale, simple architecture | ~45% | From demo notebook |\n", |
| 1063 | + "| Baseline (demo) | Grayscale, simple architecture | ~60% | From demo notebook |\n", |
1028 | 1064 | "| Experiment 1 | Modified architecture | _% | |\n", |
1029 | 1065 | "| Experiment 2 | RGB images | _% | |\n", |
1030 | 1066 | "| Experiment 3 | Image augmentation | _% | |\n", |
|
1044 | 1080 | "*Your reflections here:*\n", |
1045 | 1081 | "\n" |
1046 | 1082 | ] |
| 1083 | + }, |
| 1084 | + { |
| 1085 | + "cell_type": "markdown", |
| 1086 | + "id": "b8155ac6", |
| 1087 | + "metadata": {}, |
| 1088 | + "source": [] |
1047 | 1089 | } |
1048 | 1090 | ], |
1049 | 1091 | "metadata": { |
| 1092 | + "kernelspec": { |
| 1093 | + "display_name": "Python 3", |
| 1094 | + "language": "python", |
| 1095 | + "name": "python3" |
| 1096 | + }, |
1050 | 1097 | "language_info": { |
1051 | | - "name": "python" |
| 1098 | + "codemirror_mode": { |
| 1099 | + "name": "ipython", |
| 1100 | + "version": 3 |
| 1101 | + }, |
| 1102 | + "file_extension": ".py", |
| 1103 | + "mimetype": "text/x-python", |
| 1104 | + "name": "python", |
| 1105 | + "nbconvert_exporter": "python", |
| 1106 | + "pygments_lexer": "ipython3", |
| 1107 | + "version": "3.10.12" |
1052 | 1108 | } |
1053 | 1109 | }, |
1054 | 1110 | "nbformat": 4, |
|
0 commit comments