@@ -32,17 +32,23 @@ class _MlflowOps:
3232
3333 from evalhub.adapter.mlflow import MlflowArtifact
3434
35- callbacks.mlflow.save(
35+ rid = callbacks.mlflow.save(
3636 results,
3737 job_spec,
3838 artifacts=[
3939 MlflowArtifact("results.json", json_bytes, "application/json"),
4040 MlflowArtifact("report.html", html_bytes, "text/html"),
4141 ],
4242 )
43+ if rid:
44+ results.mlflow_run_id = rid
4345
4446 Metrics, params, and all artifacts are saved in a single MLflow run.
45- Does nothing if ``job_spec.experiment_name`` is not set.
47+ Does nothing if ``job_spec.experiment_name`` is not set (returns ``None``).
48+
49+ Returns the MLflow run id when a run is created. Assign it to
50+ ``results.mlflow_run_id`` before ``callbacks.report_results(results)`` so
51+ Eval Hub stores the link.
4652
4753 The backend is controlled by the ``backend`` constructor argument or the
4854 ``EVALHUB_MLFLOW_BACKEND`` environment variable:
@@ -60,16 +66,15 @@ def save(
6066 results : JobResults ,
6167 job_spec : JobSpec ,
6268 artifacts : list [MlflowArtifact ] | None = None ,
63- ) -> None :
69+ ) -> str | None :
6470 if not job_spec .experiment_name :
6571 logger .debug ("No MLflow experiment configured, skipping" )
66- return
72+ return None
6773
6874 try :
6975 if self ._backend == MlflowBackend .UPSTREAM :
70- self ._save_upstream (results , job_spec , artifacts )
71- else :
72- self ._save_odh (results , job_spec , artifacts )
76+ return self ._save_upstream (results , job_spec , artifacts )
77+ return self ._save_odh (results , job_spec , artifacts )
7378 except Exception as e :
7479 logger .error ("Failed to save to MLflow: %s" , e )
7580 raise RuntimeError (f"MLflow save failed: { e } " ) from e
@@ -82,16 +87,17 @@ def save(
8287 def _build_params_metrics (
8388 results : JobResults ,
8489 ) -> tuple [list , list ]:
85- from .mlflow import Metric , Param
90+ from .mlflow import Metric , Param , sanitize_metric_key_for_api
8691
8792 params = [
8893 Param ("benchmark_id" , results .benchmark_id ),
8994 Param ("model_name" , results .model_name ),
9095 Param ("num_examples_evaluated" , str (results .num_examples_evaluated )),
9196 Param ("duration_seconds" , str (results .duration_seconds )),
9297 ]
98+ # MLflow rejects commas etc. in metric keys; Eval Hub keeps r.metric_name as-is.
9399 metrics : list [Metric ] = [
94- Metric (r .metric_name , float (r .metric_value ))
100+ Metric (sanitize_metric_key_for_api ( r .metric_name ) , float (r .metric_value ))
95101 for r in results .results
96102 if isinstance (r .metric_value , int | float )
97103 ]
@@ -104,21 +110,23 @@ def _save_odh(
104110 results : JobResults ,
105111 job_spec : JobSpec ,
106112 artifacts : list [MlflowArtifact ] | None ,
107- ) -> None :
113+ ) -> str :
108114 from .mlflow import MlflowClient
109115
110116 params , metrics = self ._build_params_metrics (results )
111117 run_tags : dict [str , str ] = {
112118 tag ["key" ]: tag ["value" ] for tag in (job_spec .tags or [])
113119 }
114120
121+ run_id : str = ""
115122 with MlflowClient () as client :
116123 experiment_id = client .get_or_create_experiment (
117124 job_spec .experiment_name or ""
118125 )
119126 with client .start_run (
120127 experiment_id , run_name = job_spec .id , tags = run_tags
121- ) as run_id :
128+ ) as rid :
129+ run_id = rid
122130 client .log_batch (run_id , metrics = metrics , params = params )
123131 for artifact in artifacts or []:
124132 client .upload_artifact (
@@ -129,20 +137,21 @@ def _save_odh(
129137 )
130138
131139 logger .info (
132- "Saved to MLflow (odh) experiment '%s' (run : %s) — "
140+ "Saved to MLflow (odh) experiment '%s' (run_id : %s) — "
133141 "%d metric(s), %d artifact(s)" ,
134142 job_spec .experiment_name ,
135- job_spec . id ,
143+ run_id ,
136144 len (metrics ),
137145 len (artifacts or []),
138146 )
147+ return run_id
139148
140149 def _save_upstream (
141150 self ,
142151 results : JobResults ,
143152 job_spec : JobSpec ,
144153 artifacts : list [MlflowArtifact ] | None ,
145- ) -> None :
154+ ) -> str :
146155 import tempfile
147156 from pathlib import Path as _Path
148157
@@ -160,7 +169,9 @@ def _save_upstream(
160169 }
161170
162171 mlflow .set_experiment (job_spec .experiment_name )
163- with mlflow .start_run (run_name = job_spec .id , tags = run_tags ):
172+ run_id = ""
173+ with mlflow .start_run (run_name = job_spec .id , tags = run_tags ) as active_run :
174+ run_id = active_run .info .run_id
164175 mlflow .log_params ({p .key : p .value for p in params })
165176 mlflow .log_metrics ({m .key : m .value for m in metrics })
166177
@@ -177,13 +188,14 @@ def _save_upstream(
177188 mlflow .log_artifact (str (tmp_file ), artifact_path = artifact_dir )
178189
179190 logger .info (
180- "Saved to MLflow (upstream) experiment '%s' (run : %s) — "
191+ "Saved to MLflow (upstream) experiment '%s' (run_id : %s) — "
181192 "%d metric(s), %d artifact(s)" ,
182193 job_spec .experiment_name ,
183- job_spec . id ,
194+ run_id ,
184195 len (metrics ),
185196 len (artifacts or []),
186197 )
198+ return run_id
187199
188200
189201class DefaultCallbacks (JobCallbacks ):
@@ -192,6 +204,16 @@ class DefaultCallbacks(JobCallbacks):
192204 This implementation:
193205 - Reports status updates to sidecar (if available) or logs them
194206 - Pushes OCI artifacts directly using OCIArtifactPersister
207+ - ``report_results(results)``: POSTs final results to Eval Hub; if
208+ ``results.mlflow_run_id`` is set (for example from ``save()``), that id
209+ is included (if unset, the field is left out).
210+
211+ Example::
212+
213+ rid = callbacks.mlflow.save(results, job_spec)
214+ if rid:
215+ results.mlflow_run_id = rid
216+ callbacks.report_results(results)
195217
196218 This is the recommended callback implementation for both production and development.
197219
@@ -612,6 +634,9 @@ def report_results(self, results: JobResults) -> None:
612634 if self .provider_id :
613635 status_event ["provider_id" ] = self .provider_id
614636
637+ if results .mlflow_run_id :
638+ status_event ["mlflow_run_id" ] = results .mlflow_run_id
639+
615640 # Include OCI artifact reference if available
616641 if results .oci_artifact :
617642 status_event ["artifacts" ] = {
0 commit comments