Skip to content

Commit 2be891d

Browse files
committed
added comments for clarity and fixed typos and formatting issues
1 parent 9ce1b32 commit 2be891d

18 files changed

+597
-457
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ __pycache__/
2828
/mlruns/
2929

3030
# --- Generated Data & Models ---
31+
3132
# The following rules ensure we commit the FOLDER structure but not its CONTENTS.
3233

3334
# 1. Ignore all content inside the /data/ directory...

config.yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,71 @@
11
# ===================================================================
22
# Master Configuration for the Quantum-Enhanced MLOps Pipeline
33
# ===================================================================
4+
# This file is the single source of truth for all experiment parameters.
5+
# Changing a value here will change how the pipeline runs without
6+
# needing to modify any Python source code.
7+
8+
# --- Project & MLflow Settings ---
9+
# These settings control the overall project and experiment tracking.
10+
11+
# The name of the project, used to create the experiment in the MLflow UI.
412
project_name: "quantum-enhanced-MLOps"
13+
14+
# The local folder where MLflow will store all its logs, metrics, and artifacts.
515
mlflow_tracking_uri: "mlruns"
16+
17+
# The quantum backend to use. In this version, it is set to the local simulator.
618
quantum_backend: "simulator"
19+
20+
# Quantum Error Mitigation (QEM) Simulation flag.
21+
# If set to 'True', the local simulator will mimic a real, noisy quantum computer.
22+
# This is a powerful research feature but will make the pipeline run slower.
723
use_qem: False
824

925
# ===================================================================
1026
# Pipeline Stage Parameters (with UNIQUE keys)
1127
# ===================================================================
28+
# Each section below controls the parameters for one stage of the MLOps pipeline.
29+
1230
stage_1_feature_engineering:
31+
# Number of images to use for training the Hybrid Autoencoder.
1332
stage_1_n_samples: 400
33+
# Number of times the autoencoder will see the entire training dataset.
1434
stage_1_epochs: 5
35+
# The step size for the model's learning process.
1536
stage_1_learning_rate: 0.005
37+
# Number of images to process in a single batch during training.
1638
stage_1_batch_size: 16
39+
# The size of the "quantum feature". This directly corresponds to the number of qubits in the PQC.
1740
stage_1_latent_dim: 4
41+
# The height and width (in pixels) to which the input MNIST images are resized.
1842
stage_1_img_size: 14
1943

2044
stage_2_hyperparameter_tuning:
45+
# Number of quantum features to generate for the downstream classifier's tuning process.
2146
stage_2_n_samples: 1000
47+
# The number of repetitions (the 'p' value) in the QAOA circuit. Higher values can lead to
48+
# better results but increase the circuit depth and runtime.
2249
stage_2_qaoa_reps: 2
50+
51+
# This defines the "menu" of options for the QAOA algorithm to search through.
52+
# It will find the best combination of these values.
2353
hyperparameter_space:
2454
hidden_dim: [32, 64, 128]
2555
learning_rate: [0.01, 0.005, 0.001]
2656
dropout: [0.2, 0.4, 0.6]
2757

2858
stage_3_production_monitoring:
59+
# Number of "good" (normal) data points to use for training the anomaly detector.
2960
stage_3_n_samples: 200
61+
# An SVM-specific parameter. It's an estimate of the fraction of outliers you expect
62+
# in your data and affects the tightness of the decision boundary.
3063
stage_3_nu_param: 0.1
64+
65+
# This switch controls the speed and quality of the Stage 3 plots.
66+
# 'fast': Generates a simplified plot in < 1 minute for quick checks.
67+
# 'high_quality': Generates the full, detailed plot, which can take 5-15+ minutes.
3168
visualization_mode: "fast"
69+
70+
# The number of data points to use ONLY when visualization_mode is set to 'fast'.
3271
stage_3_fast_plot_n_samples: 50

generate_visuals_only.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

requirements.txt

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
1-
torch
2-
torchvision
3-
qiskit
4-
qiskit_machine_learning
5-
qiskit_algorithms
6-
qiskit-aer
7-
qiskit-optimization
8-
numpy
9-
matplotlib
10-
scikit-learn
11-
tqdm
12-
mlflow
13-
PyYAML
1+
# --- Core Frameworks & Libraries ---
2+
3+
torch # For building classical neural networks (Encoder, Decoder, etc.).
4+
torchvision # Provides the MNIST dataset and image tools.
5+
qiskit # The main SDK for creating and working with quantum circuits.
6+
qiskit_machine_learning # Provides tools for quantum machine learning models (QNN, QSVM).
7+
qiskit_algorithms # Contains high-level quantum algorithms like VQE/QAOA.
8+
qiskit-aer # The fast, local simulator for running quantum circuits.
9+
qiskit-optimization # A specialized library for solving optimization problems (QUBO).
10+
11+
# --- Scientific Computing & Utilities ---
12+
13+
numpy # The fundamental library for all numerical and array operations.
14+
matplotlib # The primary library used for creating all plots and visualizations.
15+
scikit-learn # Used for its classical SVM, PCA, and performance metrics.
16+
tqdm # Creates the smart progress bars for training loops.
17+
18+
# --- MLOps & Configuration ---
19+
20+
mlflow # The platform for logging and tracking all experiment runs.
21+
PyYAML # Used to read and parse the main `config.yaml` file.

run_pipeline.py

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import os
2-
import yaml
3-
import mlflow
1+
import os # For interacting with the operating system (e.g., file paths).
2+
import yaml # For reading the main `config.yaml` file.
3+
import mlflow # The main library for experiment tracking.
44

5+
# --- Import the primary function from each stage's dedicated module ---
56
from src.feature_engineering.build_feature_extractor import run_feature_engineering
67
from src.hyperparameter_tuning.tune_with_qaoa import run_hyperparameter_tuning
78
from src.production_monitoring.monitor_with_qsvm import run_drift_detection
@@ -10,57 +11,65 @@
1011
from src.visualization.plot_stage_3 import create_drift_detection_plots
1112

1213
def main():
13-
with open('config.yaml', 'r') as f:
14-
config = yaml.safe_load(f)
14+
# --- Step 1: Load the Master Configuration ---
15+
with open('config.yaml', 'r') as f: # Open the config file for reading.
16+
config = yaml.safe_load(f) # Load all parameters into a dictionary.
1517

16-
mlflow.set_tracking_uri(config['mlflow_tracking_uri'])
17-
mlflow.set_experiment(config['project_name'])
18+
# --- Step 2: Initialize MLflow Experiment Tracking ---
19+
mlflow.set_tracking_uri(config['mlflow_tracking_uri']) # Set the folder to store MLflow logs.
20+
mlflow.set_experiment(config['project_name']) # Set the experiment name in the MLflow UI.
1821

19-
with mlflow.start_run() as run:
20-
run_id = run.info.run_id
22+
# --- Step 3: Start the Master MLflow Run ---
23+
with mlflow.start_run() as run: # Start a new experiment run context.
24+
run_id = run.info.run_id # Get the unique ID for this execution.
2125
print(f"==========================================================")
2226
print(f"=== STARTING MLFLOW RUN ID: {run_id} ===")
2327
print(f"==========================================================")
2428

25-
# --- THE FIX: Create a single dictionary with unique keys before logging ---
29+
# --- Flatten all config parameters into a single dictionary for logging ---
2630
params_to_log = {
2731
**config['stage_1_feature_engineering'],
2832
**config['stage_2_hyperparameter_tuning'],
2933
**config['stage_3_production_monitoring']
3034
}
31-
# MLflow cannot log nested dictionaries, so we remove this one.
32-
params_to_log.pop('hyperparameter_space', None)
35+
params_to_log.pop('hyperparameter_space', None) # Remove nested dictionaries, as MLflow can't log them.
3336

3437
print("Logging configuration parameters to MLflow...")
35-
mlflow.log_params(params_to_log)
38+
mlflow.log_params(params_to_log) # Log all parameters to the MLflow run.
3639

37-
run_feature_engineering(config)
38-
run_hyperparameter_tuning(config)
39-
run_drift_detection(config)
40+
# --- Step 4: Execute the MLOps Pipeline Stages Sequentially ---
41+
run_feature_engineering(config) # Run Stage 1: Build the feature extractor.
42+
run_hyperparameter_tuning(config) # Run Stage 2: Find the best model parameters.
43+
run_drift_detection(config) # Run Stage 3: Monitor for data drift.
4044

45+
# --- Step 5: Generate Storytelling Visualizations ---
4146
print("\n==========================================================")
4247
print("=== MAIN PIPELINE COMPLETE. NOW GENERATING VISUALS... ===")
4348
print("==========================================================")
4449

45-
create_feature_space_plot(config)
46-
create_hpo_search_plot(config)
47-
create_drift_detection_plots(config)
50+
create_feature_space_plot(config) # Generate the plot for Stage 1.
51+
create_hpo_search_plot(config) # Generate the plot for Stage 2.
52+
create_drift_detection_plots(config) # Generate the plot(s) for Stage 3.
4853

54+
# --- Step 6: Archive Visualizations in MLflow ---
4955
print("\nLogging visualization artifacts to MLflow...")
50-
mlflow.log_artifact("visualization_stage_1_feature_space.png")
51-
mlflow.log_artifact("visualization_stage_2_hpo_search.png")
52-
mode = config['stage_3_production_monitoring']['visualization_mode']
53-
if mode == 'fast':
54-
mlflow.log_artifact("visualization_stage_3_drift_FAST.png")
56+
mlflow.log_artifact("visualization_stage_1_feature_space.png") # Save Stage 1 plot to MLflow.
57+
mlflow.log_artifact("visualization_stage_2_hpo_search.png") # Save Stage 2 plot to MLflow.
58+
59+
mode = config['stage_3_production_monitoring']['visualization_mode'] # Check which viz mode was used.
60+
if mode == 'fast': # If fast mode was used...
61+
mlflow.log_artifact("visualization_stage_3_drift_FAST.png") # ...log the fast plot.
5562
mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
56-
else:
57-
mlflow.log_artifact("visualization_stage_3_drift_boundary.png")
63+
else: # Otherwise...
64+
mlflow.log_artifact("visualization_stage_3_drift_boundary.png") # ...log the high-quality plot.
5865
mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
5966

67+
# --- Final confirmation message ---
6068
print("\n==========================================================")
6169
print(f"=== MLOPS PIPELINE EXECUTION COMPLETE FOR RUN ID: {run_id} ===")
6270
print(f"=== View results in the MLflow UI: `mlflow ui` ===")
6371
print(f"==========================================================")
6472

65-
if __name__ == '__main__':
66-
main()
73+
# --- Standard Python entry point ---
74+
if __name__ == '__main__': # If the script is run directly...
75+
main() # ...call the main function.

0 commit comments

Comments
 (0)