66Eval Hub, and polls for results. Both resources are cleaned up after evaluation.
77"""
88
9- import kfp
109from kfp import dsl
1110
12-
1311RHOAI_VLLM_IMAGE = (
14- "registry.redhat.io/rhaii/vllm-cuda-rhel9"
15- "@sha256:ad06abf3bb5235ebb5b2df84cd1b9fd09e823f0ff2eebfc82bb4590275ccfe0b"
12+ "registry.redhat.io/rhaii/vllm-cuda-rhel9@sha256:ad06abf3bb5235ebb5b2df84cd1b9fd09e823f0ff2eebfc82bb4590275ccfe0b"
1613)
1714
1815
@@ -48,6 +45,8 @@ def evalhub_evaluator_kserve(
4845 memory : str = "8Gi" ,
4946 cpu : str = "2" ,
5047 runtime_image : str = RHOAI_VLLM_IMAGE ,
48+ trust_remote_code : bool = False ,
49+ verify_tls : bool = False ,
5150 isvc_ready_timeout : int = 600 ,
5251):
5352 """Evaluate a model via Eval Hub with a KServe InferenceService.
@@ -58,12 +57,14 @@ def evalhub_evaluator_kserve(
5857 benchmark evaluation. Both resources are cleaned up after completion.
5958
6059 Args:
60+ output_metrics: KFP Metrics artifact for evaluation scores.
61+ output_results: KFP Artifact for full evaluation results JSON.
6162 evalhub_url: Eval Hub API endpoint (empty = skip evaluation).
6263 benchmarks: List of benchmark specs [{"provider_id": "...", "id": "..."}].
6364 collection_id: Eval Hub collection ID (alternative to benchmarks list).
6465 pvc_mount_path: Workspace PVC mount path (triggers KFP PVC mount).
6566 model_artifact: Model artifact from upstream training step.
66- model_path: HuggingFace model ID or local path (if no artifact).
67+ model_path: Local filesystem path to model directory (if no artifact).
6768 evalhub_tenant: Eval Hub tenant / namespace header (X-Tenant).
6869 evalhub_auth_token: Bearer token for Eval Hub auth.
6970 evalhub_model_name: Display name for the model in Eval Hub.
@@ -76,6 +77,8 @@ def evalhub_evaluator_kserve(
7677 memory: Pod memory request/limit for the predictor (e.g. "8Gi", "32Gi").
7778 cpu: CPU request/limit for the predictor (e.g. "2").
7879 runtime_image: Container image for the ServingRuntime (RHOAI vLLM default).
80+ trust_remote_code: Pass --trust-remote-code to vLLM (enables arbitrary code from model repos).
81+ verify_tls: Verify TLS certificates for Eval Hub API calls (False for self-signed certs).
7982 isvc_ready_timeout: Max seconds to wait for InferenceService readiness.
8083 """
8184 import json
@@ -112,8 +115,12 @@ def _k8s_api(method, path, body=None):
112115 "Content-Type" : "application/json" ,
113116 }
114117 resp = requests .request (
115- method , url , headers = headers ,
116- json = body , verify = SA_CA_PATH , timeout = 30 ,
118+ method ,
119+ url ,
120+ headers = headers ,
121+ json = body ,
122+ verify = SA_CA_PATH ,
123+ timeout = 30 ,
117124 )
118125 if resp .status_code >= 400 :
119126 logger .error (f"K8s API { method } { path } -> { resp .status_code } : { resp .text [:500 ]} " )
@@ -123,6 +130,7 @@ def _get_own_pod(namespace):
123130 hostname = os .environ .get ("HOSTNAME" , "" )
124131 if not hostname :
125132 import socket
133+
126134 hostname = socket .gethostname ()
127135 resp = _k8s_api ("GET" , f"/api/v1/namespaces/{ namespace } /pods/{ hostname } " )
128136 if resp .status_code == 200 :
@@ -151,21 +159,26 @@ def _find_workspace_pvc(pod_spec, model_path):
151159 for vm in c .get ("volumeMounts" , []):
152160 vol_name = vm ["name" ]
153161 mount_path = vm ["mountPath" ]
154- if vol_name in pvc_volumes and model_path .startswith (mount_path ):
162+ normalized_mount = mount_path .rstrip ("/" ) + "/"
163+ if vol_name in pvc_volumes and (model_path + "/" ).startswith (normalized_mount ):
155164 return pvc_volumes [vol_name ], mount_path
156165
157- raise RuntimeError (
158- f"Could not find workspace PVC for model path { model_path } . "
159- f"PVC volumes: { pvc_volumes } "
160- )
166+ raise RuntimeError (f"Could not find workspace PVC for model path { model_path } . PVC volumes: { pvc_volumes } " )
161167
162168 # =========================================================================
163169 # KServe resource helpers
164170 # =========================================================================
165171 KSERVE_SR_API = "/apis/serving.kserve.io/v1alpha1"
166172 KSERVE_ISVC_API = "/apis/serving.kserve.io/v1beta1"
167173
168- def _create_serving_runtime (namespace , name , image , served_model_name ):
174+ def _create_serving_runtime (namespace , name , image , served_model_name , enable_trust_remote_code = False ):
175+ vllm_args = [
176+ "--port=8080" ,
177+ "--model=/mnt/models" ,
178+ f"--served-model-name={ served_model_name } " ,
179+ ]
180+ if enable_trust_remote_code :
181+ vllm_args .append ("--trust-remote-code" )
169182 sr = {
170183 "apiVersion" : "serving.kserve.io/v1alpha1" ,
171184 "kind" : "ServingRuntime" ,
@@ -187,21 +200,18 @@ def _create_serving_runtime(namespace, name, image, served_model_name):
187200 "prometheus.io/path" : "/metrics" ,
188201 "prometheus.io/port" : "8080" ,
189202 },
190- "containers" : [{
191- "name" : "kserve-container" ,
192- "image" : image ,
193- "command" : ["python" , "-m" , "vllm.entrypoints.openai.api_server" ],
194- "args" : [
195- "--port=8080" ,
196- "--model=/mnt/models" ,
197- f"--served-model-name={ served_model_name } " ,
198- "--trust-remote-code" ,
199- ],
200- "env" : [
201- {"name" : "HF_HOME" , "value" : "/tmp/hf_home" },
202- ],
203- "ports" : [{"containerPort" : 8080 , "protocol" : "TCP" }],
204- }],
203+ "containers" : [
204+ {
205+ "name" : "kserve-container" ,
206+ "image" : image ,
207+ "command" : ["python" , "-m" , "vllm.entrypoints.openai.api_server" ],
208+ "args" : vllm_args ,
209+ "env" : [
210+ {"name" : "HF_HOME" , "value" : "/tmp/hf_home" },
211+ ],
212+ "ports" : [{"containerPort" : 8080 , "protocol" : "TCP" }],
213+ }
214+ ],
205215 "multiModel" : False ,
206216 "supportedModelFormats" : [
207217 {"autoSelect" : True , "name" : "vLLM" },
@@ -214,8 +224,7 @@ def _create_serving_runtime(namespace, name, image, served_model_name):
214224 logger .info (f"Created ServingRuntime { name } " )
215225 return resp .json ()
216226
217- def _create_inference_service (namespace , name , runtime_name , pvc_name ,
218- model_relative_path , n_gpu , mem , n_cpu ):
227+ def _create_inference_service (namespace , name , runtime_name , pvc_name , model_relative_path , n_gpu , mem , n_cpu ):
219228 storage_uri = f"pvc://{ pvc_name } /{ model_relative_path } "
220229
221230 isvc = {
@@ -278,7 +287,7 @@ def _wait_for_isvc_ready(namespace, name, timeout_s=600):
278287 duration = time .time () - start
279288 logger .info (f"InferenceService { name } ready in { duration :.1f} s" )
280289 return duration
281- reasons = [f"{ c ['type' ]} ={ c .get ('status' ,'?' )} ({ c .get ('reason' ,'' )} )" for c in conditions ]
290+ reasons = [f"{ c ['type' ]} ={ c .get ('status' , '?' )} ({ c .get ('reason' , '' )} )" for c in conditions ]
282291 logger .info (f" ISVC conditions: { ', ' .join (reasons ) if reasons else 'none yet' } " )
283292 time .sleep (15 )
284293 raise TimeoutError (f"InferenceService { name } did not become Ready within { timeout_s } s" )
@@ -307,8 +316,15 @@ def _get_isvc_url(namespace, name):
307316
308317 def _cleanup_kserve (namespace , sr_name , isvc_name ):
309318 logger .info (f"Cleaning up InferenceService { isvc_name } and ServingRuntime { sr_name } " )
310- _k8s_api ("DELETE" , f"{ KSERVE_ISVC_API } /namespaces/{ namespace } /inferenceservices/{ isvc_name } " )
311- _k8s_api ("DELETE" , f"{ KSERVE_SR_API } /namespaces/{ namespace } /servingruntimes/{ sr_name } " )
319+ for kind , api , name in [
320+ ("InferenceService" , KSERVE_ISVC_API , isvc_name ),
321+ ("ServingRuntime" , KSERVE_SR_API , sr_name ),
322+ ]:
323+ resp = _k8s_api ("DELETE" , f"{ api } /namespaces/{ namespace } /{ kind .lower ()} s/{ name } " )
324+ if resp .status_code >= 400 and resp .status_code != 404 :
325+ logger .warning (f"Failed to delete { kind } { name } : { resp .status_code } { resp .text [:200 ]} " )
326+ else :
327+ logger .info (f"Deleted { kind } { name } " )
312328 logger .info (f"Cleanup complete for { isvc_name } / { sr_name } " )
313329
314330 # =========================================================================
@@ -372,11 +388,11 @@ def _cleanup_kserve(namespace, sr_name, isvc_name):
372388
373389 model_relative_path = final_model_path
374390 if final_model_path .startswith (workspace_mount ):
375- model_relative_path = final_model_path [len (workspace_mount ):].lstrip ("/" )
391+ model_relative_path = final_model_path [len (workspace_mount ) :].lstrip ("/" )
376392 logger .info (f"Model relative path in PVC: { model_relative_path } " )
377393 logger .info (f"storageUri will be: pvc://{ workspace_pvc_name } /{ model_relative_path } " )
378394
379- _create_serving_runtime (namespace , sr_name , runtime_image , resolved_model_name )
395+ _create_serving_runtime (namespace , sr_name , runtime_image , resolved_model_name , trust_remote_code )
380396
381397 try :
382398 _create_inference_service (
@@ -443,7 +459,7 @@ def _cleanup_kserve(namespace, sr_name, isvc_name):
443459 logger .info (f"Submitting evaluation job to { submit_url } " )
444460 logger .info (f"Config: { json .dumps (eval_config , indent = 2 )} " )
445461
446- resp = requests .post (submit_url , json = eval_config , headers = headers , timeout = 30 , verify = False )
462+ resp = requests .post (submit_url , json = eval_config , headers = headers , timeout = 30 , verify = verify_tls )
447463 if resp .status_code not in (200 , 201 , 202 ):
448464 raise RuntimeError (f"Eval Hub returned { resp .status_code } : { resp .text } " )
449465
@@ -462,7 +478,7 @@ def _cleanup_kserve(namespace, sr_name, isvc_name):
462478 time .sleep (evalhub_poll_interval )
463479
464480 try :
465- resp = requests .get (job_url , headers = headers , timeout = 30 , verify = False )
481+ resp = requests .get (job_url , headers = headers , timeout = 30 , verify = verify_tls )
466482 if resp .status_code != 200 :
467483 logger .warning (f"Poll returned { resp .status_code } , retrying..." )
468484 continue
@@ -484,7 +500,7 @@ def _cleanup_kserve(namespace, sr_name, isvc_name):
484500 logger .error (f"Evaluation timed out after { evalhub_timeout } s" )
485501 try :
486502 cancel_url = f"{ evalhub_url .rstrip ('/' )} /api/v1/evaluations/jobs/{ job_id } "
487- requests .delete (cancel_url , headers = headers , timeout = 10 , verify = False )
503+ requests .delete (cancel_url , headers = headers , timeout = 10 , verify = verify_tls )
488504 logger .info (f"Cancelled job { job_id } " )
489505 except Exception :
490506 pass
0 commit comments