satwiksps
diff --git a/‎.gitignore‎
Lines changed: 37 additions & 13 deletions b/‎.gitignore‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎config.yaml‎
Lines changed: 32 additions & 0 deletions b/‎config.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎generate_visuals_only.py‎
Lines changed: 43 additions & 0 deletions b/‎generate_visuals_only.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 4 deletions b/‎requirements.txt‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎run_pipeline.py‎
Lines changed: 54 additions & 40 deletions b/‎run_pipeline.py‎
Lines changed: 54 additions & 40 deletions
diff --git a/‎src/feature_engineering/build_feature_extractor.py‎
Lines changed: 20 additions & 17 deletions b/‎src/feature_engineering/build_feature_extractor.py‎
Lines changed: 20 additions & 17 deletions
@@ -1,20 +1,34 @@
-# .gitignore for the Quantum MLOps Pipeline
+# ===================================================================
+# .gitignore for the Quantum-Enhanced MLOps Pipeline
+# ===================================================================
 
 # --- Python Virtual Environments ---
+# Never commit your virtual environment. It's large, OS-specific,
+# and should be recreated from requirements.txt.
 /qenv/
-venv/
-.venv/
-env/
-ENV/
+/venv/
+/.venv/
+/env/
+/ENV/
 
-# --- Python Cache Files ---
+# --- Python Cache & Compiled Files ---
+# These are temporary files Python creates to run your code faster.
 __pycache__/
-*.pyc
-*.pyo
-*.pyd
+*.py[cod]
+*.egg-info/
 
-# --- Generated Artifacts & Data ---
-# This section is modified to keep the directory structure.
+# ===================================================================
+# MLOps & Data Artifacts (The most important section for this project)
+# ===================================================================
+
+# --- MLflow ---
+# ** NEW **
+# Ignore the entire MLflow tracking directory. This contains all experiment
+# logs, metrics, params, and artifacts, and can grow very large.
+/mlruns/
+
+# --- Generated Data & Models ---
+# The following rules ensure we commit the FOLDER structure but not its CONTENTS.
 
 # 1. Ignore all content inside the /data/ directory...
 data/*
@@ -31,12 +45,22 @@ saved_models/tuned_classifier/*
 # 6. ...but DO NOT ignore its .gitkeep placeholder.
 !saved_models/tuned_classifier/.gitkeep
 
-# Ignore all generated visualization images
+# --- Generated Visualizations ---
+# Ignore all plot images generated by the pipeline. These are artifacts.
 *.png
 
+# ===================================================================
+# OS, Editor, and Test Artifacts
+# ===================================================================
+
+# --- Common Test Artifacts ---
+.pytest_cache/
+htmlcov/
+.coverage
+
 # --- OS & Editor Specific Files ---
 .DS_Store
 Thumbs.db
 .vscode/
 .idea/
-.ipynb_checkpoints
+.ipynb_checkpoints/
@@ -0,0 +1,32 @@
+# ===================================================================
+# Master Configuration for the Quantum-Enhanced MLOps Pipeline
+# ===================================================================
+project_name: "quantum-enhanced-MLOps"
+mlflow_tracking_uri: "mlruns"
+quantum_backend: "simulator"
+use_qem: False
+
+# ===================================================================
+# Pipeline Stage Parameters (with UNIQUE keys)
+# ===================================================================
+stage_1_feature_engineering:
+  stage_1_n_samples: 400
+  stage_1_epochs: 5
+  stage_1_learning_rate: 0.005
+  stage_1_batch_size: 16
+  stage_1_latent_dim: 4
+  stage_1_img_size: 14
+
+stage_2_hyperparameter_tuning:
+  stage_2_n_samples: 1000
+  stage_2_qaoa_reps: 2
+  hyperparameter_space:
+    hidden_dim: [32, 64, 128]
+    learning_rate: [0.01, 0.005, 0.001]
+    dropout: [0.2, 0.4, 0.6]
+
+stage_3_production_monitoring:
+  stage_3_n_samples: 200
+  stage_3_nu_param: 0.1
+  visualization_mode: "fast" 
+  stage_3_fast_plot_n_samples: 50
@@ -0,0 +1,43 @@
+# ===================================================================
+# Standalone Script to Generate All Visualizations
+# ===================================================================
+# Use this script to regenerate plots without re-running the entire
+# MLOps pipeline. It requires that `run_pipeline.py` has been
+# successfully run at least once to create the necessary saved models.
+
+import yaml
+
+# Import the individual, modular plotting scripts
+from src.visualization.plot_stage_1 import create_feature_space_plot
+from src.visualization.plot_stage_2 import create_hpo_search_plot
+from src.visualization.plot_stage_3 import create_drift_detection_plots
+
+def main():
+    """
+    Loads the project configuration and runs all visualization functions.
+    """
+    
+    # 1. Load the project configuration from the YAML file
+    try:
+        with open('config.yaml', 'r') as f:
+            config = yaml.safe_load(f)
+    except FileNotFoundError:
+        print("[ERROR] `config.yaml` not found! Please ensure it's in the root directory.")
+        return
+        
+    print("==========================================================")
+    print("===      RE-GENERATING ALL STORYTELLING VISUALS...       ===")
+    print("==========================================================")
+    
+    # 2. Call each visualization function, passing the config object
+    #    so they know which settings to use (e.g., 'fast' vs 'high_quality').
+    create_feature_space_plot(config)
+    create_hpo_search_plot(config)
+    create_drift_detection_plots(config)
+    
+    print("\n==========================================================")
+    print("===         ALL VISUALS RE-GENERATED SUCCESSFULLY        ===")
+    print("==========================================================")
+
+if __name__ == '__main__':
+    main()
@@ -1,14 +1,13 @@
-# Core Frameworks
 torch
 torchvision
 qiskit
 qiskit_machine_learning
 qiskit_algorithms
 qiskit-aer
 qiskit-optimization
-
-# Utility and Data Handling
 numpy
 matplotlib
 scikit-learn
-tqdm
+tqdm
+mlflow
+PyYAML
@@ -1,52 +1,66 @@
 import os
+import yaml
+import mlflow
+
 from src.feature_engineering.build_feature_extractor import run_feature_engineering
 from src.hyperparameter_tuning.tune_with_qaoa import run_hyperparameter_tuning
 from src.production_monitoring.monitor_with_qsvm import run_drift_detection
-
-# --- ADD IMPORTS FOR THE NEW VISUALIZATION MODULES ---
-from src.visualisation.plot_stage_1 import create_feature_space_plot
-from src.visualisation.plot_stage_2 import create_hpo_search_plot
-from src.visualisation.plot_stage_3 import create_drift_detection_plots
-from src.visualisation.plot_stage_3_fast import create_drift_detection_plot_fast
+from src.visualization.plot_stage_1 import create_feature_space_plot
+from src.visualization.plot_stage_2 import create_hpo_search_plot
+from src.visualization.plot_stage_3 import create_drift_detection_plots
 
 def main():
-    """Executes the entire Quantum-Native MLOps pipeline from end to end."""
-    print("==========================================================")
-    print("=== EXECUTING END-TO-END QUANTUM-NATIVE MLOPS PIPELINE ===")
-    print("==========================================================")
-    
-    # --- Stage 1: Feature Engineering ---
-    p1_params = {
-        'latent_dim': 4, 'epochs': 3, 'lr': 0.005,
-        'batch_size': 16, 'n_samples': 200, 'img_size': 14
-    }
-    run_feature_engineering(**p1_params)
-    
-    stage1_output_exists = os.path.exists("saved_models/feature_extractor/hae_encoder.pth")
-    if not stage1_output_exists:
-        print("\n[ERROR] Stage 1 did not produce model files. Aborting pipeline.")
-        return
+    with open('config.yaml', 'r') as f:
+        config = yaml.safe_load(f)
 
-    # --- Stage 2: Hyperparameter Tuning ---
-    run_hyperparameter_tuning()
+    mlflow.set_tracking_uri(config['mlflow_tracking_uri'])
+    mlflow.set_experiment(config['project_name'])
 
-    # --- Stage 3: Production Monitoring ---
-    run_drift_detection()
-    
-    # --- Stage 4: Visualization ---
-    print("-----GENERATING VISUALS-----")
-    create_feature_space_plot()
-    create_hpo_search_plot()
-    # Option 1: Fast Plot (Runs in < 5 minutes)
-    create_drift_detection_plot_fast()
-    # Option 2: High-Quality Slow Plot (Can take 40+ minutes)
-    # To run this, comment out the line above and uncomment the line below.
-    # create_drift_detection_plots()
-    
+    with mlflow.start_run() as run:
+        run_id = run.info.run_id
+        print(f"==========================================================")
+        print(f"=== STARTING MLFLOW RUN ID: {run_id} ===")
+        print(f"==========================================================")
+        
+        # --- THE FIX: Create a single dictionary with unique keys before logging ---
+        params_to_log = {
+            **config['stage_1_feature_engineering'],
+            **config['stage_2_hyperparameter_tuning'],
+            **config['stage_3_production_monitoring']
+        }
+        # MLflow cannot log nested dictionaries, so we remove this one.
+        params_to_log.pop('hyperparameter_space', None) 
+        
+        print("Logging configuration parameters to MLflow...")
+        mlflow.log_params(params_to_log)
+        
+        run_feature_engineering(config)
+        run_hyperparameter_tuning(config)
+        run_drift_detection(config)
 
-    print("\n==========================================================")
-    print("===         MLOPS PIPELINE EXECUTION COMPLETE          ===")
-    print("==========================================================")
+        print("\n==========================================================")
+        print("===     MAIN PIPELINE COMPLETE. NOW GENERATING VISUALS...  ===")
+        print("==========================================================")
+        
+        create_feature_space_plot(config)
+        create_hpo_search_plot(config)
+        create_drift_detection_plots(config)
+        
+        print("\nLogging visualization artifacts to MLflow...")
+        mlflow.log_artifact("visualization_stage_1_feature_space.png")
+        mlflow.log_artifact("visualization_stage_2_hpo_search.png")
+        mode = config['stage_3_production_monitoring']['visualization_mode']
+        if mode == 'fast':
+            mlflow.log_artifact("visualization_stage_3_drift_FAST.png")
+            mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
+        else:
+            mlflow.log_artifact("visualization_stage_3_drift_boundary.png")
+            mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
+        
+        print("\n==========================================================")
+        print(f"=== MLOPS PIPELINE EXECUTION COMPLETE FOR RUN ID: {run_id} ===")
+        print(f"=== View results in the MLflow UI: `mlflow ui` ===")
+        print(f"==========================================================")
 
 if __name__ == '__main__':
     main()
@@ -4,6 +4,7 @@
 import numpy as np
 import os
 from tqdm import tqdm
+import mlflow
 
 from .data_setup import get_data_loaders
 from .classical_components import Encoder, Decoder
@@ -23,28 +24,29 @@ def forward(self, x):
         reconstructed_image = self.decoder(quantum_output)
         return reconstructed_image
 
-def run_feature_engineering(latent_dim=4, epochs=5, lr=0.001, batch_size=32, n_samples=600, img_size=14):
-    """Main function to orchestrate the training of the Hybrid Autoencoder."""
+def run_feature_engineering(config):
     print("--- MLOps Stage 1: Building Quantum-Native Feature Extractor ---")
-    
+    cfg = config['stage_1_feature_engineering']
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     print(f"Using device: {device}")
 
-    train_loader, _ = get_data_loaders(batch_size, n_samples, img_size)
-    
-    encoder = Encoder(latent_dim, img_size).to(device)
-    quantum_layer = get_quantum_torch_layer(latent_dim).to(device)
-    decoder = Decoder(latent_dim, img_size).to(device)
+    train_loader, _ = get_data_loaders(
+        batch_size=cfg['stage_1_batch_size'], 
+        n_samples=cfg['stage_1_n_samples'], 
+        img_size=cfg['stage_1_img_size']
+    )
+    encoder = Encoder(cfg['stage_1_latent_dim'], cfg['stage_1_img_size']).to(device)
+    quantum_layer = get_quantum_torch_layer(cfg['stage_1_latent_dim']).to(device)
+    decoder = Decoder(cfg['stage_1_latent_dim'], cfg['stage_1_img_size']).to(device)
     model = HybridAutoencoder(encoder, quantum_layer, decoder).to(device)
-    
     criterion = nn.MSELoss()
-    optimizer = optim.Adam(model.parameters(), lr=lr)
+    optimizer = optim.Adam(model.parameters(), lr=cfg['stage_1_learning_rate'])
 
     print("\nStarting Hybrid Autoencoder training...")
-    for epoch in range(epochs):
+    for epoch in range(cfg['stage_1_epochs']):
         model.train()
         running_loss = 0.0
-        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
+        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{cfg['stage_1_epochs']}")
         for images, _ in progress_bar:
             images = images.to(device)
             optimizer.zero_grad()
@@ -54,22 +56,23 @@ def run_feature_engineering(latent_dim=4, epochs=5, lr=0.001, batch_size=32, n_s
             optimizer.step()
             running_loss += loss.item()
             progress_bar.set_postfix(loss=loss.item())
-
         avg_loss = running_loss / len(train_loader)
-        print(f"Epoch [{epoch+1}/{epochs}], Average Training Loss: {avg_loss:.4f}")
+        print(f"Epoch [{epoch+1}/{cfg['stage_1_epochs']}], Average Training Loss: {avg_loss:.4f}")
+        mlflow.log_metric(f"stage_1_epoch_{epoch+1}_loss", avg_loss)
 
     print("\nTraining finished.")
-
     save_dir = "saved_models/feature_extractor"
     os.makedirs(save_dir, exist_ok=True)
-    
     encoder_path = os.path.join(save_dir, "hae_encoder.pth")
     torch.save(model.encoder.state_dict(), encoder_path)
     print(f"Trained classical encoder saved to {encoder_path}")
-    
     pqc_weights_path = os.path.join(save_dir, "hae_pqc_weights.npy")
     pqc_weights = model.quantum_layer.weight.cpu().detach().numpy()
     np.save(pqc_weights_path, pqc_weights)
     print(f"Trained PQC weights saved to {pqc_weights_path}")
 
+    print("Logging model artifacts to MLflow...")
+    mlflow.log_artifact(encoder_path, artifact_path="stage_1_feature_extractor")
+    mlflow.log_artifact(pqc_weights_path, artifact_path="stage_1_feature_extractor")
+    
     print("\n--- Feature Engineering Stage Complete ---")