satwiksps
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config.yaml‎
Lines changed: 39 additions & 0 deletions b/‎config.yaml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎generate_visuals_only.py‎
Lines changed: 0 additions & 43 deletions b/‎generate_visuals_only.py‎
Lines changed: 0 additions & 43 deletions
diff --git a/‎requirements.txt‎
Lines changed: 21 additions & 13 deletions b/‎requirements.txt‎
Lines changed: 21 additions & 13 deletions
diff --git a/‎run_pipeline.py‎
Lines changed: 37 additions & 28 deletions b/‎run_pipeline.py‎
Lines changed: 37 additions & 28 deletions
@@ -28,6 +28,7 @@ __pycache__/
 /mlruns/
 
 # --- Generated Data & Models ---
+
 # The following rules ensure we commit the FOLDER structure but not its CONTENTS.
 
 # 1. Ignore all content inside the /data/ directory...
 
@@ -1,32 +1,71 @@
 # ===================================================================
 # Master Configuration for the Quantum-Enhanced MLOps Pipeline
 # ===================================================================
+# This file is the single source of truth for all experiment parameters.
+# Changing a value here will change how the pipeline runs without
+# needing to modify any Python source code.
+
+# --- Project & MLflow Settings ---
+# These settings control the overall project and experiment tracking.
+
+# The name of the project, used to create the experiment in the MLflow UI.
 project_name: "quantum-enhanced-MLOps"
+
+# The local folder where MLflow will store all its logs, metrics, and artifacts.
 mlflow_tracking_uri: "mlruns"
+
+# The quantum backend to use. In this version, it is set to the local simulator.
 quantum_backend: "simulator"
+
+# Quantum Error Mitigation (QEM) Simulation flag.
+# If set to 'True', the local simulator will mimic a real, noisy quantum computer.
+# This is a powerful research feature but will make the pipeline run slower.
 use_qem: False
 
 # ===================================================================
 # Pipeline Stage Parameters (with UNIQUE keys)
 # ===================================================================
+# Each section below controls the parameters for one stage of the MLOps pipeline.
+
 stage_1_feature_engineering:
+  # Number of images to use for training the Hybrid Autoencoder.
   stage_1_n_samples: 400
+  # Number of times the autoencoder will see the entire training dataset.
   stage_1_epochs: 5
+  # The step size for the model's learning process.
   stage_1_learning_rate: 0.005
+  # Number of images to process in a single batch during training.
   stage_1_batch_size: 16
+  # The size of the "quantum feature". This directly corresponds to the number of qubits in the PQC.
   stage_1_latent_dim: 4
+  # The height and width (in pixels) to which the input MNIST images are resized.
   stage_1_img_size: 14
 
 stage_2_hyperparameter_tuning:
+  # Number of quantum features to generate for the downstream classifier's tuning process.
   stage_2_n_samples: 1000
+  # The number of repetitions (the 'p' value) in the QAOA circuit. Higher values can lead to
+  # better results but increase the circuit depth and runtime.
   stage_2_qaoa_reps: 2
+  
+  # This defines the "menu" of options for the QAOA algorithm to search through.
+  # It will find the best combination of these values.
   hyperparameter_space:
     hidden_dim: [32, 64, 128]
     learning_rate: [0.01, 0.005, 0.001]
     dropout: [0.2, 0.4, 0.6]
 
 stage_3_production_monitoring:
+  # Number of "good" (normal) data points to use for training the anomaly detector.
   stage_3_n_samples: 200
+  # An SVM-specific parameter. It's an estimate of the fraction of outliers you expect
+  # in your data and affects the tightness of the decision boundary.
   stage_3_nu_param: 0.1
+  
+  # This switch controls the speed and quality of the Stage 3 plots.
+  # 'fast':         Generates a simplified plot in < 1 minute for quick checks.
+  # 'high_quality': Generates the full, detailed plot, which can take 5-15+ minutes.
   visualization_mode: "fast" 
+  
+  # The number of data points to use ONLY when visualization_mode is set to 'fast'.
   stage_3_fast_plot_n_samples: 50
@@ -1,13 +1,21 @@
-torch
-torchvision
-qiskit
-qiskit_machine_learning
-qiskit_algorithms
-qiskit-aer
-qiskit-optimization
-numpy
-matplotlib
-scikit-learn
-tqdm
-mlflow
-PyYAML
+# --- Core Frameworks & Libraries ---
+
+torch                   # For building classical neural networks (Encoder, Decoder, etc.).
+torchvision             # Provides the MNIST dataset and image tools.
+qiskit                  # The main SDK for creating and working with quantum circuits.
+qiskit_machine_learning # Provides tools for quantum machine learning models (QNN, QSVM).
+qiskit_algorithms       # Contains high-level quantum algorithms like VQE/QAOA.
+qiskit-aer              # The fast, local simulator for running quantum circuits.
+qiskit-optimization     # A specialized library for solving optimization problems (QUBO).
+
+# --- Scientific Computing & Utilities ---
+
+numpy                   # The fundamental library for all numerical and array operations.
+matplotlib              # The primary library used for creating all plots and visualizations.
+scikit-learn            # Used for its classical SVM, PCA, and performance metrics.
+tqdm                    # Creates the smart progress bars for training loops.
+
+# --- MLOps & Configuration ---
+
+mlflow                  # The platform for logging and tracking all experiment runs.
+PyYAML                  # Used to read and parse the main `config.yaml` file.
@@ -1,7 +1,8 @@
-import os
-import yaml
-import mlflow
+import os                                       # For interacting with the operating system (e.g., file paths).
+import yaml                                     # For reading the main `config.yaml` file.
+import mlflow                                   # The main library for experiment tracking.
 
+# --- Import the primary function from each stage's dedicated module ---
 from src.feature_engineering.build_feature_extractor import run_feature_engineering
 from src.hyperparameter_tuning.tune_with_qaoa import run_hyperparameter_tuning
 from src.production_monitoring.monitor_with_qsvm import run_drift_detection
@@ -10,57 +11,65 @@
 from src.visualization.plot_stage_3 import create_drift_detection_plots
 
 def main():
-    with open('config.yaml', 'r') as f:
-        config = yaml.safe_load(f)
+    # --- Step 1: Load the Master Configuration ---
+    with open('config.yaml', 'r') as f:             # Open the config file for reading.
+        config = yaml.safe_load(f)                  # Load all parameters into a dictionary.
 
-    mlflow.set_tracking_uri(config['mlflow_tracking_uri'])
-    mlflow.set_experiment(config['project_name'])
+    # --- Step 2: Initialize MLflow Experiment Tracking ---
+    mlflow.set_tracking_uri(config['mlflow_tracking_uri']) # Set the folder to store MLflow logs.
+    mlflow.set_experiment(config['project_name'])      # Set the experiment name in the MLflow UI.
 
-    with mlflow.start_run() as run:
-        run_id = run.info.run_id
+    # --- Step 3: Start the Master MLflow Run ---
+    with mlflow.start_run() as run:                 # Start a new experiment run context.
+        run_id = run.info.run_id                    # Get the unique ID for this execution.
         print(f"==========================================================")
         print(f"=== STARTING MLFLOW RUN ID: {run_id} ===")
         print(f"==========================================================")
 
-        # --- THE FIX: Create a single dictionary with unique keys before logging ---
+        # --- Flatten all config parameters into a single dictionary for logging ---
         params_to_log = {
             **config['stage_1_feature_engineering'],
             **config['stage_2_hyperparameter_tuning'],
             **config['stage_3_production_monitoring']
         }
-        # MLflow cannot log nested dictionaries, so we remove this one.
-        params_to_log.pop('hyperparameter_space', None) 
+        params_to_log.pop('hyperparameter_space', None) # Remove nested dictionaries, as MLflow can't log them.
 
         print("Logging configuration parameters to MLflow...")
-        mlflow.log_params(params_to_log)
+        mlflow.log_params(params_to_log)            # Log all parameters to the MLflow run.
 
-        run_feature_engineering(config)
-        run_hyperparameter_tuning(config)
-        run_drift_detection(config)
+        # --- Step 4: Execute the MLOps Pipeline Stages Sequentially ---
+        run_feature_engineering(config)             # Run Stage 1: Build the feature extractor.
+        run_hyperparameter_tuning(config)           # Run Stage 2: Find the best model parameters.
+        run_drift_detection(config)                 # Run Stage 3: Monitor for data drift.
 
+        # --- Step 5: Generate Storytelling Visualizations ---
         print("\n==========================================================")
         print("===     MAIN PIPELINE COMPLETE. NOW GENERATING VISUALS...  ===")
         print("==========================================================")
 
-        create_feature_space_plot(config)
-        create_hpo_search_plot(config)
-        create_drift_detection_plots(config)
+        create_feature_space_plot(config)           # Generate the plot for Stage 1.
+        create_hpo_search_plot(config)              # Generate the plot for Stage 2.
+        create_drift_detection_plots(config)        # Generate the plot(s) for Stage 3.
 
+        # --- Step 6: Archive Visualizations in MLflow ---
         print("\nLogging visualization artifacts to MLflow...")
-        mlflow.log_artifact("visualization_stage_1_feature_space.png")
-        mlflow.log_artifact("visualization_stage_2_hpo_search.png")
-        mode = config['stage_3_production_monitoring']['visualization_mode']
-        if mode == 'fast':
-            mlflow.log_artifact("visualization_stage_3_drift_FAST.png")
+        mlflow.log_artifact("visualization_stage_1_feature_space.png") # Save Stage 1 plot to MLflow.
+        mlflow.log_artifact("visualization_stage_2_hpo_search.png") # Save Stage 2 plot to MLflow.
+        
+        mode = config['stage_3_production_monitoring']['visualization_mode'] # Check which viz mode was used.
+        if mode == 'fast':                          # If fast mode was used...
+            mlflow.log_artifact("visualization_stage_3_drift_FAST.png") # ...log the fast plot.
             mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
-        else:
-            mlflow.log_artifact("visualization_stage_3_drift_boundary.png")
+        else:                                       # Otherwise...
+            mlflow.log_artifact("visualization_stage_3_drift_boundary.png") # ...log the high-quality plot.
             mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
 
+        # --- Final confirmation message ---
         print("\n==========================================================")
         print(f"=== MLOPS PIPELINE EXECUTION COMPLETE FOR RUN ID: {run_id} ===")
         print(f"=== View results in the MLflow UI: `mlflow ui` ===")
         print(f"==========================================================")
 
-if __name__ == '__main__':
-    main()
+# --- Standard Python entry point ---
+if __name__ == '__main__':                          # If the script is run directly...
+    main()                                          # ...call the main function.