1- import os
2- import yaml
3- import mlflow
1+ import os # For interacting with the operating system (e.g., file paths).
2+ import yaml # For reading the main `config.yaml` file.
3+ import mlflow # The main library for experiment tracking.
44
5+ # --- Import the primary function from each stage's dedicated module ---
56from src .feature_engineering .build_feature_extractor import run_feature_engineering
67from src .hyperparameter_tuning .tune_with_qaoa import run_hyperparameter_tuning
78from src .production_monitoring .monitor_with_qsvm import run_drift_detection
1011from src .visualization .plot_stage_3 import create_drift_detection_plots
1112
1213def main ():
13- with open ('config.yaml' , 'r' ) as f :
14- config = yaml .safe_load (f )
14+ # --- Step 1: Load the Master Configuration ---
15+ with open ('config.yaml' , 'r' ) as f : # Open the config file for reading.
16+ config = yaml .safe_load (f ) # Load all parameters into a dictionary.
1517
16- mlflow .set_tracking_uri (config ['mlflow_tracking_uri' ])
17- mlflow .set_experiment (config ['project_name' ])
18+ # --- Step 2: Initialize MLflow Experiment Tracking ---
19+ mlflow .set_tracking_uri (config ['mlflow_tracking_uri' ]) # Set the folder to store MLflow logs.
20+ mlflow .set_experiment (config ['project_name' ]) # Set the experiment name in the MLflow UI.
1821
19- with mlflow .start_run () as run :
20- run_id = run .info .run_id
22+ # --- Step 3: Start the Master MLflow Run ---
23+ with mlflow .start_run () as run : # Start a new experiment run context.
24+ run_id = run .info .run_id # Get the unique ID for this execution.
2125 print (f"==========================================================" )
2226 print (f"=== STARTING MLFLOW RUN ID: { run_id } ===" )
2327 print (f"==========================================================" )
2428
25- # --- THE FIX: Create a single dictionary with unique keys before logging ---
29+ # --- Flatten all config parameters into a single dictionary for logging ---
2630 params_to_log = {
2731 ** config ['stage_1_feature_engineering' ],
2832 ** config ['stage_2_hyperparameter_tuning' ],
2933 ** config ['stage_3_production_monitoring' ]
3034 }
31- # MLflow cannot log nested dictionaries, so we remove this one.
32- params_to_log .pop ('hyperparameter_space' , None )
35+ params_to_log .pop ('hyperparameter_space' , None ) # Remove nested dictionaries, as MLflow can't log them.
3336
3437 print ("Logging configuration parameters to MLflow..." )
35- mlflow .log_params (params_to_log )
38+ mlflow .log_params (params_to_log ) # Log all parameters to the MLflow run.
3639
37- run_feature_engineering (config )
38- run_hyperparameter_tuning (config )
39- run_drift_detection (config )
40+ # --- Step 4: Execute the MLOps Pipeline Stages Sequentially ---
41+ run_feature_engineering (config ) # Run Stage 1: Build the feature extractor.
42+ run_hyperparameter_tuning (config ) # Run Stage 2: Find the best model parameters.
43+ run_drift_detection (config ) # Run Stage 3: Monitor for data drift.
4044
45+ # --- Step 5: Generate Storytelling Visualizations ---
4146 print ("\n ==========================================================" )
4247 print ("=== MAIN PIPELINE COMPLETE. NOW GENERATING VISUALS... ===" )
4348 print ("==========================================================" )
4449
45- create_feature_space_plot (config )
46- create_hpo_search_plot (config )
47- create_drift_detection_plots (config )
50+ create_feature_space_plot (config ) # Generate the plot for Stage 1.
51+ create_hpo_search_plot (config ) # Generate the plot for Stage 2.
52+ create_drift_detection_plots (config ) # Generate the plot(s) for Stage 3.
4853
54+ # --- Step 6: Archive Visualizations in MLflow ---
4955 print ("\n Logging visualization artifacts to MLflow..." )
50- mlflow .log_artifact ("visualization_stage_1_feature_space.png" )
51- mlflow .log_artifact ("visualization_stage_2_hpo_search.png" )
52- mode = config ['stage_3_production_monitoring' ]['visualization_mode' ]
53- if mode == 'fast' :
54- mlflow .log_artifact ("visualization_stage_3_drift_FAST.png" )
56+ mlflow .log_artifact ("visualization_stage_1_feature_space.png" ) # Save Stage 1 plot to MLflow.
57+ mlflow .log_artifact ("visualization_stage_2_hpo_search.png" ) # Save Stage 2 plot to MLflow.
58+
59+ mode = config ['stage_3_production_monitoring' ]['visualization_mode' ] # Check which viz mode was used.
60+ if mode == 'fast' : # If fast mode was used...
61+ mlflow .log_artifact ("visualization_stage_3_drift_FAST.png" ) # ...log the fast plot.
5562 mlflow .log_artifact (f"visualization_stage_3_confusion_matrix_{ mode .upper ()} .png" )
56- else :
57- mlflow .log_artifact ("visualization_stage_3_drift_boundary.png" )
63+ else : # Otherwise...
64+ mlflow .log_artifact ("visualization_stage_3_drift_boundary.png" ) # ...log the high-quality plot.
5865 mlflow .log_artifact (f"visualization_stage_3_confusion_matrix_{ mode .upper ()} .png" )
5966
67+ # --- Final confirmation message ---
6068 print ("\n ==========================================================" )
6169 print (f"=== MLOPS PIPELINE EXECUTION COMPLETE FOR RUN ID: { run_id } ===" )
6270 print (f"=== View results in the MLflow UI: `mlflow ui` ===" )
6371 print (f"==========================================================" )
6472
65- if __name__ == '__main__' :
66- main ()
73+ # --- Standard Python entry point ---
74+ if __name__ == '__main__' : # If the script is run directly...
75+ main () # ...call the main function.
0 commit comments