Skip to content

Commit 9ce1b32

Browse files
committed
added mlflow tracking and config file for parameters
1 parent 964d9f9 commit 9ce1b32

File tree

15 files changed

+423
-384
lines changed

15 files changed

+423
-384
lines changed

.gitignore

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
1-
# .gitignore for the Quantum MLOps Pipeline
1+
# ===================================================================
2+
# .gitignore for the Quantum-Enhanced MLOps Pipeline
3+
# ===================================================================
24

35
# --- Python Virtual Environments ---
6+
# Never commit your virtual environment. It's large, OS-specific,
7+
# and should be recreated from requirements.txt.
48
/qenv/
5-
venv/
6-
.venv/
7-
env/
8-
ENV/
9+
/venv/
10+
/.venv/
11+
/env/
12+
/ENV/
913

10-
# --- Python Cache Files ---
14+
# --- Python Cache & Compiled Files ---
15+
# These are temporary files Python creates to run your code faster.
1116
__pycache__/
12-
*.pyc
13-
*.pyo
14-
*.pyd
17+
*.py[cod]
18+
*.egg-info/
1519

16-
# --- Generated Artifacts & Data ---
17-
# This section is modified to keep the directory structure.
20+
# ===================================================================
21+
# MLOps & Data Artifacts (The most important section for this project)
22+
# ===================================================================
23+
24+
# --- MLflow ---
25+
# ** NEW **
26+
# Ignore the entire MLflow tracking directory. This contains all experiment
27+
# logs, metrics, params, and artifacts, and can grow very large.
28+
/mlruns/
29+
30+
# --- Generated Data & Models ---
31+
# The following rules ensure we commit the FOLDER structure but not its CONTENTS.
1832

1933
# 1. Ignore all content inside the /data/ directory...
2034
data/*
@@ -31,12 +45,22 @@ saved_models/tuned_classifier/*
3145
# 6. ...but DO NOT ignore its .gitkeep placeholder.
3246
!saved_models/tuned_classifier/.gitkeep
3347

34-
# Ignore all generated visualization images
48+
# --- Generated Visualizations ---
49+
# Ignore all plot images generated by the pipeline. These are artifacts.
3550
*.png
3651

52+
# ===================================================================
53+
# OS, Editor, and Test Artifacts
54+
# ===================================================================
55+
56+
# --- Common Test Artifacts ---
57+
.pytest_cache/
58+
htmlcov/
59+
.coverage
60+
3761
# --- OS & Editor Specific Files ---
3862
.DS_Store
3963
Thumbs.db
4064
.vscode/
4165
.idea/
42-
.ipynb_checkpoints
66+
.ipynb_checkpoints/

config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# ===================================================================
2+
# Master Configuration for the Quantum-Enhanced MLOps Pipeline
3+
# ===================================================================
4+
project_name: "quantum-enhanced-MLOps"
5+
mlflow_tracking_uri: "mlruns"
6+
quantum_backend: "simulator"
7+
use_qem: False
8+
9+
# ===================================================================
10+
# Pipeline Stage Parameters (with UNIQUE keys)
11+
# ===================================================================
12+
stage_1_feature_engineering:
13+
stage_1_n_samples: 400
14+
stage_1_epochs: 5
15+
stage_1_learning_rate: 0.005
16+
stage_1_batch_size: 16
17+
stage_1_latent_dim: 4
18+
stage_1_img_size: 14
19+
20+
stage_2_hyperparameter_tuning:
21+
stage_2_n_samples: 1000
22+
stage_2_qaoa_reps: 2
23+
hyperparameter_space:
24+
hidden_dim: [32, 64, 128]
25+
learning_rate: [0.01, 0.005, 0.001]
26+
dropout: [0.2, 0.4, 0.6]
27+
28+
stage_3_production_monitoring:
29+
stage_3_n_samples: 200
30+
stage_3_nu_param: 0.1
31+
visualization_mode: "fast"
32+
stage_3_fast_plot_n_samples: 50

generate_visuals_only.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# ===================================================================
2+
# Standalone Script to Generate All Visualizations
3+
# ===================================================================
4+
# Use this script to regenerate plots without re-running the entire
5+
# MLOps pipeline. It requires that `run_pipeline.py` has been
6+
# successfully run at least once to create the necessary saved models.
7+
8+
import yaml
9+
10+
# Import the individual, modular plotting scripts
11+
from src.visualization.plot_stage_1 import create_feature_space_plot
12+
from src.visualization.plot_stage_2 import create_hpo_search_plot
13+
from src.visualization.plot_stage_3 import create_drift_detection_plots
14+
15+
def main():
16+
"""
17+
Loads the project configuration and runs all visualization functions.
18+
"""
19+
20+
# 1. Load the project configuration from the YAML file
21+
try:
22+
with open('config.yaml', 'r') as f:
23+
config = yaml.safe_load(f)
24+
except FileNotFoundError:
25+
print("[ERROR] `config.yaml` not found! Please ensure it's in the root directory.")
26+
return
27+
28+
print("==========================================================")
29+
print("=== RE-GENERATING ALL STORYTELLING VISUALS... ===")
30+
print("==========================================================")
31+
32+
# 2. Call each visualization function, passing the config object
33+
# so they know which settings to use (e.g., 'fast' vs 'high_quality').
34+
create_feature_space_plot(config)
35+
create_hpo_search_plot(config)
36+
create_drift_detection_plots(config)
37+
38+
print("\n==========================================================")
39+
print("=== ALL VISUALS RE-GENERATED SUCCESSFULLY ===")
40+
print("==========================================================")
41+
42+
if __name__ == '__main__':
43+
main()

requirements.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
# Core Frameworks
21
torch
32
torchvision
43
qiskit
54
qiskit_machine_learning
65
qiskit_algorithms
76
qiskit-aer
87
qiskit-optimization
9-
10-
# Utility and Data Handling
118
numpy
129
matplotlib
1310
scikit-learn
14-
tqdm
11+
tqdm
12+
mlflow
13+
PyYAML

run_pipeline.py

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,66 @@
11
import os
2+
import yaml
3+
import mlflow
4+
25
from src.feature_engineering.build_feature_extractor import run_feature_engineering
36
from src.hyperparameter_tuning.tune_with_qaoa import run_hyperparameter_tuning
47
from src.production_monitoring.monitor_with_qsvm import run_drift_detection
5-
6-
# --- ADD IMPORTS FOR THE NEW VISUALIZATION MODULES ---
7-
from src.visualisation.plot_stage_1 import create_feature_space_plot
8-
from src.visualisation.plot_stage_2 import create_hpo_search_plot
9-
from src.visualisation.plot_stage_3 import create_drift_detection_plots
10-
from src.visualisation.plot_stage_3_fast import create_drift_detection_plot_fast
8+
from src.visualization.plot_stage_1 import create_feature_space_plot
9+
from src.visualization.plot_stage_2 import create_hpo_search_plot
10+
from src.visualization.plot_stage_3 import create_drift_detection_plots
1111

1212
def main():
13-
"""Executes the entire Quantum-Native MLOps pipeline from end to end."""
14-
print("==========================================================")
15-
print("=== EXECUTING END-TO-END QUANTUM-NATIVE MLOPS PIPELINE ===")
16-
print("==========================================================")
17-
18-
# --- Stage 1: Feature Engineering ---
19-
p1_params = {
20-
'latent_dim': 4, 'epochs': 3, 'lr': 0.005,
21-
'batch_size': 16, 'n_samples': 200, 'img_size': 14
22-
}
23-
run_feature_engineering(**p1_params)
24-
25-
stage1_output_exists = os.path.exists("saved_models/feature_extractor/hae_encoder.pth")
26-
if not stage1_output_exists:
27-
print("\n[ERROR] Stage 1 did not produce model files. Aborting pipeline.")
28-
return
13+
with open('config.yaml', 'r') as f:
14+
config = yaml.safe_load(f)
2915

30-
# --- Stage 2: Hyperparameter Tuning ---
31-
run_hyperparameter_tuning()
16+
mlflow.set_tracking_uri(config['mlflow_tracking_uri'])
17+
mlflow.set_experiment(config['project_name'])
3218

33-
# --- Stage 3: Production Monitoring ---
34-
run_drift_detection()
35-
36-
# --- Stage 4: Visualization ---
37-
print("-----GENERATING VISUALS-----")
38-
create_feature_space_plot()
39-
create_hpo_search_plot()
40-
# Option 1: Fast Plot (Runs in < 5 minutes)
41-
create_drift_detection_plot_fast()
42-
# Option 2: High-Quality Slow Plot (Can take 40+ minutes)
43-
# To run this, comment out the line above and uncomment the line below.
44-
# create_drift_detection_plots()
45-
19+
with mlflow.start_run() as run:
20+
run_id = run.info.run_id
21+
print(f"==========================================================")
22+
print(f"=== STARTING MLFLOW RUN ID: {run_id} ===")
23+
print(f"==========================================================")
24+
25+
# --- THE FIX: Create a single dictionary with unique keys before logging ---
26+
params_to_log = {
27+
**config['stage_1_feature_engineering'],
28+
**config['stage_2_hyperparameter_tuning'],
29+
**config['stage_3_production_monitoring']
30+
}
31+
# MLflow cannot log nested dictionaries, so we remove this one.
32+
params_to_log.pop('hyperparameter_space', None)
33+
34+
print("Logging configuration parameters to MLflow...")
35+
mlflow.log_params(params_to_log)
36+
37+
run_feature_engineering(config)
38+
run_hyperparameter_tuning(config)
39+
run_drift_detection(config)
4640

47-
print("\n==========================================================")
48-
print("=== MLOPS PIPELINE EXECUTION COMPLETE ===")
49-
print("==========================================================")
41+
print("\n==========================================================")
42+
print("=== MAIN PIPELINE COMPLETE. NOW GENERATING VISUALS... ===")
43+
print("==========================================================")
44+
45+
create_feature_space_plot(config)
46+
create_hpo_search_plot(config)
47+
create_drift_detection_plots(config)
48+
49+
print("\nLogging visualization artifacts to MLflow...")
50+
mlflow.log_artifact("visualization_stage_1_feature_space.png")
51+
mlflow.log_artifact("visualization_stage_2_hpo_search.png")
52+
mode = config['stage_3_production_monitoring']['visualization_mode']
53+
if mode == 'fast':
54+
mlflow.log_artifact("visualization_stage_3_drift_FAST.png")
55+
mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
56+
else:
57+
mlflow.log_artifact("visualization_stage_3_drift_boundary.png")
58+
mlflow.log_artifact(f"visualization_stage_3_confusion_matrix_{mode.upper()}.png")
59+
60+
print("\n==========================================================")
61+
print(f"=== MLOPS PIPELINE EXECUTION COMPLETE FOR RUN ID: {run_id} ===")
62+
print(f"=== View results in the MLflow UI: `mlflow ui` ===")
63+
print(f"==========================================================")
5064

5165
if __name__ == '__main__':
5266
main()

src/feature_engineering/build_feature_extractor.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy as np
55
import os
66
from tqdm import tqdm
7+
import mlflow
78

89
from .data_setup import get_data_loaders
910
from .classical_components import Encoder, Decoder
@@ -23,28 +24,29 @@ def forward(self, x):
2324
reconstructed_image = self.decoder(quantum_output)
2425
return reconstructed_image
2526

26-
def run_feature_engineering(latent_dim=4, epochs=5, lr=0.001, batch_size=32, n_samples=600, img_size=14):
27-
"""Main function to orchestrate the training of the Hybrid Autoencoder."""
27+
def run_feature_engineering(config):
2828
print("--- MLOps Stage 1: Building Quantum-Native Feature Extractor ---")
29-
29+
cfg = config['stage_1_feature_engineering']
3030
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
3131
print(f"Using device: {device}")
3232

33-
train_loader, _ = get_data_loaders(batch_size, n_samples, img_size)
34-
35-
encoder = Encoder(latent_dim, img_size).to(device)
36-
quantum_layer = get_quantum_torch_layer(latent_dim).to(device)
37-
decoder = Decoder(latent_dim, img_size).to(device)
33+
train_loader, _ = get_data_loaders(
34+
batch_size=cfg['stage_1_batch_size'],
35+
n_samples=cfg['stage_1_n_samples'],
36+
img_size=cfg['stage_1_img_size']
37+
)
38+
encoder = Encoder(cfg['stage_1_latent_dim'], cfg['stage_1_img_size']).to(device)
39+
quantum_layer = get_quantum_torch_layer(cfg['stage_1_latent_dim']).to(device)
40+
decoder = Decoder(cfg['stage_1_latent_dim'], cfg['stage_1_img_size']).to(device)
3841
model = HybridAutoencoder(encoder, quantum_layer, decoder).to(device)
39-
4042
criterion = nn.MSELoss()
41-
optimizer = optim.Adam(model.parameters(), lr=lr)
43+
optimizer = optim.Adam(model.parameters(), lr=cfg['stage_1_learning_rate'])
4244

4345
print("\nStarting Hybrid Autoencoder training...")
44-
for epoch in range(epochs):
46+
for epoch in range(cfg['stage_1_epochs']):
4547
model.train()
4648
running_loss = 0.0
47-
progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
49+
progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{cfg['stage_1_epochs']}")
4850
for images, _ in progress_bar:
4951
images = images.to(device)
5052
optimizer.zero_grad()
@@ -54,22 +56,23 @@ def run_feature_engineering(latent_dim=4, epochs=5, lr=0.001, batch_size=32, n_s
5456
optimizer.step()
5557
running_loss += loss.item()
5658
progress_bar.set_postfix(loss=loss.item())
57-
5859
avg_loss = running_loss / len(train_loader)
59-
print(f"Epoch [{epoch+1}/{epochs}], Average Training Loss: {avg_loss:.4f}")
60+
print(f"Epoch [{epoch+1}/{cfg['stage_1_epochs']}], Average Training Loss: {avg_loss:.4f}")
61+
mlflow.log_metric(f"stage_1_epoch_{epoch+1}_loss", avg_loss)
6062

6163
print("\nTraining finished.")
62-
6364
save_dir = "saved_models/feature_extractor"
6465
os.makedirs(save_dir, exist_ok=True)
65-
6666
encoder_path = os.path.join(save_dir, "hae_encoder.pth")
6767
torch.save(model.encoder.state_dict(), encoder_path)
6868
print(f"Trained classical encoder saved to {encoder_path}")
69-
7069
pqc_weights_path = os.path.join(save_dir, "hae_pqc_weights.npy")
7170
pqc_weights = model.quantum_layer.weight.cpu().detach().numpy()
7271
np.save(pqc_weights_path, pqc_weights)
7372
print(f"Trained PQC weights saved to {pqc_weights_path}")
7473

74+
print("Logging model artifacts to MLflow...")
75+
mlflow.log_artifact(encoder_path, artifact_path="stage_1_feature_extractor")
76+
mlflow.log_artifact(pqc_weights_path, artifact_path="stage_1_feature_extractor")
77+
7578
print("\n--- Feature Engineering Stage Complete ---")

0 commit comments

Comments
 (0)