tiwariPC · raj2022 · Jan 8, 2025 · Jan 8, 2025 · Jan 9, 2025 · Jan 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,5 @@ outputfiles/*.ipynb
 **.vscode
 bdt/notebook/**/*.pt
 bdt/notebook/**/*.pth
+stats_study/CMSSW_*/
+stats_study/datacards/CMSSW*
diff --git a/bdt/README.md → ML_Application/README.md b/bdt/README.md → ML_Application/README.md
diff --git a/bdt/notebook/DNN_Classifier.ipynb → ML_Application/notebook/DNN_Classifier.ipynb b/bdt/notebook/DNN_Classifier.ipynb → ML_Application/notebook/DNN_Classifier.ipynb
diff --git a/bdt/notebook/Descriminator.ipynb → ML_Application/notebook/Descriminator.ipynb b/bdt/notebook/Descriminator.ipynb → ML_Application/notebook/Descriminator.ipynb
diff --git a/bdt/notebook/Multiclass_Descriminator.ipynb → ...n/notebook/Multiclass_Descriminator.ipynb b/bdt/notebook/Multiclass_Descriminator.ipynb → ...n/notebook/Multiclass_Descriminator.ipynb
diff --git a/...selection_BDT_gradient_boosting_model.pkl → ...selection_BDT_gradient_boosting_model.pkl b/...selection_BDT_gradient_boosting_model.pkl → ...selection_BDT_gradient_boosting_model.pkl
diff --git a/...set/plots/CorrelationMatrixBackground.pdf → ...set/plots/CorrelationMatrixBackground.pdf b/...set/plots/CorrelationMatrixBackground.pdf → ...set/plots/CorrelationMatrixBackground.pdf
diff --git a/...set/plots/CorrelationMatrixBackground.png → ...set/plots/CorrelationMatrixBackground.png b/...set/plots/CorrelationMatrixBackground.png → ...set/plots/CorrelationMatrixBackground.png
diff --git a/...dataset/plots/CorrelationMatrixSignal.pdf → ...dataset/plots/CorrelationMatrixSignal.pdf b/...dataset/plots/CorrelationMatrixSignal.pdf → ...dataset/plots/CorrelationMatrixSignal.pdf
diff --git a/...dataset/plots/CorrelationMatrixSignal.png → ...dataset/plots/CorrelationMatrixSignal.png b/...dataset/plots/CorrelationMatrixSignal.png → ...dataset/plots/CorrelationMatrixSignal.png
diff --git a/bdt/notebook/dataset/plots/Discriminator.pdf → .../notebook/dataset/plots/Discriminator.pdf b/bdt/notebook/dataset/plots/Discriminator.pdf → .../notebook/dataset/plots/Discriminator.pdf
diff --git a/bdt/notebook/dataset/plots/Discriminator.png → .../notebook/dataset/plots/Discriminator.png b/bdt/notebook/dataset/plots/Discriminator.png → .../notebook/dataset/plots/Discriminator.png
diff --git a/...k/dataset/plots/ROC_MulticlassPyTorch.pdf → ...k/dataset/plots/ROC_MulticlassPyTorch.pdf b/...k/dataset/plots/ROC_MulticlassPyTorch.pdf → ...k/dataset/plots/ROC_MulticlassPyTorch.pdf
diff --git a/...k/dataset/plots/ROC_MulticlassPyTorch.png → ...k/dataset/plots/ROC_MulticlassPyTorch.png b/...k/dataset/plots/ROC_MulticlassPyTorch.png → ...k/dataset/plots/ROC_MulticlassPyTorch.png
diff --git a/...otebook/dataset/plots/TrainingHistory.png → ...otebook/dataset/plots/TrainingHistory.png b/...otebook/dataset/plots/TrainingHistory.png → ...otebook/dataset/plots/TrainingHistory.png
diff --git a/bdt/notebook/dataset/plots/invBeffvsSeff.png → .../notebook/dataset/plots/invBeffvsSeff.png b/bdt/notebook/dataset/plots/invBeffvsSeff.png → .../notebook/dataset/plots/invBeffvsSeff.png
diff --git a/bdt/notebook/dataset/plots/rejBvsS.png → ...cation/notebook/dataset/plots/rejBvsS.png b/bdt/notebook/dataset/plots/rejBvsS.png → ...cation/notebook/dataset/plots/rejBvsS.png
diff --git a/...dataset/plots/variables_deco_gauss_c1.png → ...dataset/plots/variables_deco_gauss_c1.png b/...dataset/plots/variables_deco_gauss_c1.png → ...dataset/plots/variables_deco_gauss_c1.png
diff --git a/...dataset/plots/variables_deco_gauss_c2.png → ...dataset/plots/variables_deco_gauss_c2.png b/...dataset/plots/variables_deco_gauss_c2.png → ...dataset/plots/variables_deco_gauss_c2.png
diff --git a/...dataset/plots/variables_deco_gauss_c3.png → ...dataset/plots/variables_deco_gauss_c3.png b/...dataset/plots/variables_deco_gauss_c3.png → ...dataset/plots/variables_deco_gauss_c3.png
diff --git a/...dataset/plots/variables_deco_gauss_c4.png → ...dataset/plots/variables_deco_gauss_c4.png b/...dataset/plots/variables_deco_gauss_c4.png → ...dataset/plots/variables_deco_gauss_c4.png
diff --git a/...dataset/plots/variables_deco_gauss_c5.png → ...dataset/plots/variables_deco_gauss_c5.png b/...dataset/plots/variables_deco_gauss_c5.png → ...dataset/plots/variables_deco_gauss_c5.png
diff --git a/bdt/notebook/dataset/rootFile/TMVA.root → ...ation/notebook/dataset/rootFile/TMVA.root b/bdt/notebook/dataset/rootFile/TMVA.root → ...ation/notebook/dataset/rootFile/TMVA.root
diff --git a/...t/weights/TMVAClassification_BDTG.class.C → ...t/weights/TMVAClassification_BDTG.class.C b/...t/weights/TMVAClassification_BDTG.class.C → ...t/weights/TMVAClassification_BDTG.class.C
diff --git a/...ights/TMVAClassification_BDTG.weights.xml → ...ights/TMVAClassification_BDTG.weights.xml b/...ights/TMVAClassification_BDTG.weights.xml → ...ights/TMVAClassification_BDTG.weights.xml
diff --git a/...eights/TMVAClassification_DNN_GPU.class.C → ...eights/TMVAClassification_DNN_GPU.class.C b/...eights/TMVAClassification_DNN_GPU.class.C → ...eights/TMVAClassification_DNN_GPU.class.C
diff --git a/...ts/TMVAClassification_DNN_GPU.weights.xml → ...ts/TMVAClassification_DNN_GPU.weights.xml b/...ts/TMVAClassification_DNN_GPU.weights.xml → ...ts/TMVAClassification_DNN_GPU.weights.xml
diff --git a/...et/weights/TMVAClassification_MLP.class.C → ...et/weights/TMVAClassification_MLP.class.C b/...et/weights/TMVAClassification_MLP.class.C → ...et/weights/TMVAClassification_MLP.class.C
diff --git a/...eights/TMVAClassification_MLP.weights.xml → ...eights/TMVAClassification_MLP.weights.xml b/...eights/TMVAClassification_MLP.weights.xml → ...eights/TMVAClassification_MLP.weights.xml
diff --git a/...eights/TMVAClassification_PyTorch.class.C → ...eights/TMVAClassification_PyTorch.class.C b/...eights/TMVAClassification_PyTorch.class.C → ...eights/TMVAClassification_PyTorch.class.C
diff --git a/...ts/TMVAClassification_PyTorch.weights.xml → ...ts/TMVAClassification_PyTorch.weights.xml b/...ts/TMVAClassification_PyTorch.weights.xml → ...ts/TMVAClassification_PyTorch.weights.xml
diff --git a/bdt/notebook/dnn_Classifier.ipynb → ML_Application/notebook/dnn_Classifier.ipynb b/bdt/notebook/dnn_Classifier.ipynb → ML_Application/notebook/dnn_Classifier.ipynb
diff --git a/bdt/notebook/dnn_Tagger.ipynb → ML_Application/notebook/dnn_Tagger.ipynb b/bdt/notebook/dnn_Tagger.ipynb → ML_Application/notebook/dnn_Tagger.ipynb
diff --git a/bdt/notebook/grid_point_plots.ipynb → ...plication/notebook/grid_point_plots.ipynb b/bdt/notebook/grid_point_plots.ipynb → ...plication/notebook/grid_point_plots.ipynb
diff --git a/bdt/notebook/histogram_filling.ipynb → ...lication/notebook/histogram_filling.ipynb b/bdt/notebook/histogram_filling.ipynb → ...lication/notebook/histogram_filling.ipynb
diff --git a/...notebook/modelFile/enhanced_dnn_model.pth → ...notebook/modelFile/enhanced_dnn_model.pth b/...notebook/modelFile/enhanced_dnn_model.pth → ...notebook/modelFile/enhanced_dnn_model.pth
diff --git a/...delFile/preselection_simple_dnn_model.pth → ...delFile/preselection_simple_dnn_model.pth b/...delFile/preselection_simple_dnn_model.pth → ...delFile/preselection_simple_dnn_model.pth
diff --git a/...lass_classification-all-backgrounds.ipynb → ...lass_classification-all-backgrounds.ipynb b/...lass_classification-all-backgrounds.ipynb → ...lass_classification-all-backgrounds.ipynb
diff --git a/bdt/notebook/multiclass_classification.ipynb → .../notebook/multiclass_classification.ipynb b/bdt/notebook/multiclass_classification.ipynb → .../notebook/multiclass_classification.ipynb
diff --git a/bdt/notebook/tmvaPyTorch.ipynb → ML_Application/notebook/tmvaPyTorch.ipynb b/bdt/notebook/tmvaPyTorch.ipynb → ML_Application/notebook/tmvaPyTorch.ipynb
diff --git a/...ook/training_DNN_Model_optimization.ipynb → ...ook/training_DNN_Model_optimization.ipynb b/...ook/training_DNN_Model_optimization.ipynb → ...ook/training_DNN_Model_optimization.ipynb
diff --git a/bdt/notebook/training_DNN_highX_highY.ipynb → ...n/notebook/training_DNN_highX_highY.ipynb b/bdt/notebook/training_DNN_highX_highY.ipynb → ...n/notebook/training_DNN_highX_highY.ipynb
diff --git a/bdt/notebook/training_DNN_highX_lowY.ipynb → ...on/notebook/training_DNN_highX_lowY.ipynb b/bdt/notebook/training_DNN_highX_lowY.ipynb → ...on/notebook/training_DNN_highX_lowY.ipynb
diff --git a/bdt/notebook/training_DNN_lowX_highY.ipynb → ...on/notebook/training_DNN_lowX_highY.ipynb b/bdt/notebook/training_DNN_lowX_highY.ipynb → ...on/notebook/training_DNN_lowX_highY.ipynb
diff --git a/bdt/notebook/training_DNN_lowX_lowY.ipynb → ...ion/notebook/training_DNN_lowX_lowY.ipynb b/bdt/notebook/training_DNN_lowX_lowY.ipynb → ...ion/notebook/training_DNN_lowX_lowY.ipynb
diff --git a/bdt/notebook/training_DNN_midX_midY.ipynb → ...ion/notebook/training_DNN_midX_midY.ipynb b/bdt/notebook/training_DNN_midX_midY.ipynb → ...ion/notebook/training_DNN_midX_midY.ipynb
diff --git a/...ning_DNN_single_background_training.ipynb → ...ning_DNN_single_background_training.ipynb b/...ning_DNN_single_background_training.ipynb → ...ning_DNN_single_background_training.ipynb
diff --git a/ML_Application/parametrized_DNN/README.md b/ML_Application/parametrized_DNN/README.md
@@ -0,0 +1,24 @@
+# Paraemtrized DNN on the v2 HiggsDNA files
+for the DNN training we were training seperately for each mass points. For the parametrized DNN, we will provide weights as well for the sample training. We divided the mass point ijnto different ranges based on its kinematics. 
+With parametrized DNN, where we will provides weights into the model during the traning and we will be able to do the training for all sample in once.
+
+Parametrized DNN on the dataset, we can implement as follows:
+* pNN method is employed for a wide range of mass points
+    * Train on all signal MC{m1, m2, m3...}
+    * Give background MC random values of mass from {m1, m2, m3...}
+*  Provide same input variable as DNN
+* Split the MC signal in half, with one half used as input for the classifier, andtheother half(weight ×2) will be used for the final signal model construction
+$$
+f(\vec{x}; m) =
+\begin{cases} 
+f^1(\vec{x}) & \text{if } m = m_1 \\
+f^2(\vec{x}) & \text{if } m = m_2 \\
+\vdots
+\end{cases}
+$$
+for the above taken from this preentation, [here](https://indico.cern.ch/event/1507349/contributions/6364202/attachments/3009726/5317821/preapproval.pdf)
+
+
+## Ref
+1. https://link.springer.com/article/10.1140/epjc/s10052-016-4099-4
+2. https://arxiv.org/pdf/2202.00424
diff --git a/ML_Application/parametrized_DNN/output.log b/ML_Application/parametrized_DNN/output.log
@@ -0,0 +1,5 @@
+nohup: ignoring input
+/eos/home-s/sraj/Work_/CUA_20--/Analysis/hhbbgg_AwkwardAnalyzer/ML_Application/parametrized_DNN/pDNN.py:121: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.
+  df_balanced = pd.concat([df_majority_downsampled, df_minority])
+/cvmfs/sft.cern.ch/lcg/views/LCG_105_cuda/x86_64-el9-gcc11-opt/lib/python3.9/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at /build/jenkins/workspace/lcg_release_pipeline/build/pyexternals/torch-2.1.1/src/torch/2.1.1/c10/cuda/CUDAFunctions.cpp:108.)
+  return torch._C._cuda_getDeviceCount() > 0
diff --git a/ML_Application/parametrized_DNN/pDNN.py b/ML_Application/parametrized_DNN/pDNN.py
@@ -0,0 +1,295 @@
+import os
+import pandas as pd
+import uproot
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, TensorDataset
+from torch.optim import Adam
+from torch.nn import BCEWithLogitsLoss
+
+
+
+# Taking mass X and corresponding Y mass points
+mass_points = [300, 400, 500, 550, 600, 650, 700, 900]  # Example mass points
+y_values = [100, 125, 150, 200, 300, 400, 500, 600]  # Example Y values
+
+# Initialize list to store data and a dictionary for missing files
+signal_data = []
+missing_files = {}
+
+# Load signal data from Parquet files
+for mass in mass_points:
+    for y in y_values:
+        file_path = f"../../../output_parquet/final_production_Syst/merged/NMSSM_X{mass}_Y{y}/nominal/NOTAG_merged.parquet"
+
+        if os.path.exists(file_path):  # Check if file exists
+            try:
+                df = pd.read_parquet(file_path)  # Load the Parquet file
+                df["mass"] = mass
+                df["y_value"] = y  # Store Y value if needed
+                df["label"] = 1  # Assuming signal label
+                signal_data.append(df)
+            except Exception as e:
+                print(f"Warning: Could not read {file_path}. Error: {e}")
+        else:
+            print(f"Warning: File {file_path} does not exist.")
+            # Track missing files
+            if mass not in missing_files:
+                missing_files[mass] = []
+            missing_files[mass].append(y)
+
+# Combine all signal data into a single DataFrame
+signal_df = pd.concat(signal_data, ignore_index=True) if signal_data else pd.DataFrame()
+
+#  print the missing files
+if missing_files:
+    print("Missing files for the following mass points and Y values:")
+    for mass, ys in missing_files.items():
+        print(f"Mass point {mass} is missing Y values: {ys}")
+
+print(f"singal shape is",signal_df.shape)
+
+# Reading background files
+# Load background data from ROOT files
+background_files = [
+    ("../../outputfiles/hhbbgg_analyzer-v2-trees.root", "/GGJets/preselection"),
+    ("../../outputfiles/hhbbgg_analyzer-v2-trees.root", "/GJetPt20To40/preselection"),
+    ("../../outputfiles/hhbbgg_analyzer-v2-trees.root", "/GJetPt40/preselection"),
+]
+background_data = []
+for file_path, tree_name in background_files:
+    try:
+        with uproot.open(file_path) as file:
+            tree = file[tree_name]
+            df = tree.arrays(library="pd")
+            df["mass"] = np.random.choice(mass_points, len(df))  # Random mass assignment
+            df["label"] = 0
+            background_data.append(df)
+    except Exception as e:
+        print(f"Warning: Could not read {file_path}. Error: {e}")
+
+df_background = pd.concat(background_data, ignore_index=True) if background_data else pd.DataFrame()
+
+# Define features and labels
+features = [
+    'bbgg_eta', 'bbgg_phi', 'lead_pho_phi', 'sublead_pho_eta',
+    'sublead_pho_phi', 'diphoton_eta', 'diphoton_phi', 'dibjet_eta', 'dibjet_phi',
+    'lead_bjet_pt', 'sublead_bjet_pt', 'lead_bjet_eta', 'lead_bjet_phi', 'sublead_bjet_eta',
+    'sublead_bjet_phi', 'sublead_bjet_PNetB', 'lead_bjet_PNetB', 'CosThetaStar_gg',
+    'CosThetaStar_jj', 'CosThetaStar_CS', 'DeltaR_jg_min', 'pholead_PtOverM',
+    'phosublead_PtOverM', 'lead_pho_mvaID', 'sublead_pho_mvaID'
+]
+
+# Reduce background dataset size by random sampling
+background_fraction = 0.2  #  20% of the background
+df_background = df_background.sample(frac=background_fraction, random_state=42)
+
+# Combine signal and background
+df_combined = pd.concat([signal_df, df_background], ignore_index=True)
+
+# Ensure df_combined is not empty
+if df_combined.empty:
+    raise ValueError("Error: Combined DataFrame is empty. Check input files.")
+
+# Convert feature data to DataFrame to prevent AttributeError
+df_features = df_combined[features]
+
+# Fill missing values with column mean
+df_features = df_features.fillna(df_features.mean())
+
+# Extract features (X) and labels (y)
+X = df_features.values
+y = df_combined["label"].values
+
+print(f"total features", df_features.shape)
+
+# Undersampling the Majority Class
+
+from sklearn.utils import resample
+
+df_majority = df_combined[df_combined["label"] == 0]
+df_minority = df_combined[df_combined["label"] == 1]
+
+df_majority_downsampled = resample(df_majority,
+                                   replace=False,
+                                   n_samples=len(df_minority),
+                                   random_state=42)
+
+df_balanced = pd.concat([df_majority_downsampled, df_minority])
+
+
+# Standardize features
+scaler = StandardScaler()
+X = scaler.fit_transform(X)
+
+# Convert to PyTorch tensors
+X_tensor = torch.tensor(X, dtype=torch.float32)
+y_tensor = torch.tensor(y, dtype=torch.float32)
+
+# Check for GPU
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# print(f"Using device: {device}")
+
+# Move data to GPU
+# X_tensor = X_tensor.to(device)
+# y_tensor = y_tensor.to(device)
+
+# Create DataLoader
+dataset = TensorDataset(X_tensor, y_tensor)
+dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
+
+# Checking class imabalance
+class_counts = np.bincount(y)
+print(f"Class distribution: {dict(enumerate(class_counts))}")
+
+
+import torch
+import torch.nn as nn
+from torch.optim import Adam
+
+class ParameterizedDNN(nn.Module):
+    def __init__(self, input_dim):
+        super(ParameterizedDNN, self).__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_dim, 256),  # Increase neurons
+            nn.ReLU(),
+            nn.Dropout(0.3),  # Reduce dropout
+
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+
+            nn.Linear(128, 64),  # Increase size from 4 → 16
+            nn.ReLU(),
+            nn.Dropout(0.2),  # Reduce dropout further
+
+            nn.Linear(64, 1)  # Output layer (No activation function)
+        )
+
+    def forward(self, x):
+        return self.model(x)  # No sigmoid here!
+
+
+
+# Initialize model
+input_dim = X.shape[1]
+model = ParameterizedDNN(input_dim)
+# criterion = nn.BCEWithLogitsLoss()  # Expecting raw logits
+# criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([weight]))
+optimizer = Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)  # Reduce learning rate
+# Compute class weights
+pos_weight = torch.tensor([class_counts[0] / class_counts[1]], dtype=torch.float32)
+
+# Update loss function
+criterion = BCEWithLogitsLoss(pos_weight=pos_weight)
+
+
+import torch
+from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
+import matplotlib.pyplot as plt
+
+# Check if GPU is available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+
+num_epochs = 100
+train_losses = []
+train_accuracies = []
+train_aucs = []
+fpr_all, tpr_all, thresholds_all = [], [], []
+
+for epoch in range(num_epochs):
+    epoch_loss = 0
+    y_true = []
+    y_pred = []
+
+    model.train()  # Set to training mode
+    for batch in dataloader:
+        X_batch, y_batch = batch
+        X_batch, y_batch = X_batch.to(device), y_batch.to(device)  # Move data to GPU
+
+        optimizer.zero_grad()
+        outputs = model(X_batch).squeeze()  # Get raw logits
+
+        loss = criterion(outputs, y_batch)
+        loss.backward()
+        optimizer.step()
+
+        epoch_loss += loss.item()
+
+        # Store predictions for accuracy & AUC calculation
+        y_true.extend(y_batch.cpu().numpy())  # True labels
+        y_pred.extend(torch.sigmoid(outputs).detach().cpu().numpy())  # Apply sigmoid AFTER training
+
+    # Compute Metrics
+    avg_loss = epoch_loss / len(dataloader)
+    y_pred_binary = [1 if p > 0.5 else 0 for p in y_pred]  # Convert to 0/1 labels
+    accuracy = accuracy_score(y_true, y_pred_binary)
+    auc = roc_auc_score(y_true, y_pred)  # Use probabilities, not logits
+
+    # Store metrics
+    train_losses.append(avg_loss)
+    train_accuracies.append(accuracy)
+    train_aucs.append(auc)
+
+    # Compute ROC curve for current epoch (for plotting)
+    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
+    fpr_all.append(fpr)
+    tpr_all.append(tpr)
+    thresholds_all.append(thresholds)
+
+    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}, AUC: {auc:.4f}")
+
+
+
+
+# Plot Loss
+plt.figure(figsize=(12, 4))
+plt.subplot(1, 3, 1)
+plt.plot(range(1, num_epochs+1), train_losses, marker='o', linestyle='-', color='blue')
+plt.xlabel("Epoch")
+plt.ylabel("Loss")
+plt.title("Loss vs. Epochs")
+
+
+plt.tight_layout()
+plt.savefig("loss_vs_epochs.png")
+plt.savefig("loss_vs_epochs.pdf")
+
+
+# Plot Accuracy
+plt.subplot(1, 3, 2)
+plt.plot(range(1, num_epochs+1), train_accuracies, marker='o', linestyle='-', color='green')
+plt.xlabel("Epoch")
+plt.ylabel("Accuracy")
+plt.title("Accuracy vs. Epochs")
+
+plt.tight_layout()
+plt.savefig("accuracy_vs_epochs.png")
+plt.savefig("accuracy_vs_epochs.pdf")
+
+
+# Plot AUC
+
+
+# Plot the final ROC curve
+# Select the ROC curve from the last epoch
+fpr_last = fpr_all[-1]
+tpr_last = tpr_all[-1]
+
+plt.figure(figsize=(10, 6))
+plt.plot(fpr_last, tpr_last, color='darkorange', lw=2, label=f'ROC curve (AUC = {train_aucs[-1]:.2f})')
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')  # Random classifier line
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title(f'Final ROC Curve (AUC = {train_aucs[-1]:.2f})')
+plt.legend(loc="lower right")
+plt.savefig(AUC.png)
+plt.savefig(AUC.pdf)
+
+