302302 -H " Content-Type: application/json" \
303303 -H " Authorization: Bearer ${TOKEN} " \
304304 " ${ENDPOINT} " 2> /dev/null || echo " " )
305-
305+ echo " $MODELS_RESPONSE "
306306 HTTP_CODE=$( echo " $MODELS_RESPONSE " | tail -n1)
307307 RESPONSE_BODY=$( echo " $MODELS_RESPONSE " | sed ' $d' )
308308
@@ -312,51 +312,56 @@ else
312312 " The endpoint is not reachable (VPN/firewall/DNS issue)" \
313313 " Check Gateway and HTTPRoute configuration"
314314 MODEL_NAME=" "
315- MODEL_URL =" "
315+ MODEL_CHAT_ENDPOINT =" "
316316 elif [ " $HTTP_CODE " = " 200" ]; then
317317 MODEL_COUNT=$( echo " $RESPONSE_BODY " | jq -r ' .data | length' 2> /dev/null || echo " 0" )
318318 if [ " $MODEL_COUNT " -gt 0 ]; then
319- print_success " Models endpoint accessible, found $MODEL_COUNT model(s)"
320319 MODEL_NAME=$( echo " $RESPONSE_BODY " | jq -r ' .data[0].id' 2> /dev/null || echo " " )
321- MODEL_URL=$( echo " $RESPONSE_BODY " | jq -r ' .data[0].url' 2> /dev/null || echo " " )
320+ MODEL_CHAT=$( echo " $RESPONSE_BODY " | jq -r ' .data[0].url' 2> /dev/null || echo " " )
321+ if [ -n " $MODEL_CHAT " ]; then
322+ MODEL_CHAT_ENDPOINT=" ${MODEL_CHAT} /v1/chat/completions"
323+ else
324+ print_warning " Model chat endpoint not found" " Model chat endpoint not found for $MODEL_NAME " " Check model HTTPRoute configuration: kubectl get httproute -n llm"
325+ fi
326+ print_success " Models endpoint accessible, found $MODEL_COUNT model(s) using $MODEL_NAME for validation"
322327 else
323328 print_warning " Models endpoint accessible but no models found" " You may need to deploy a model a simulated model can be deployed with the following command:" " kustomize build docs/samples/models/simulator | kubectl apply --server-side=true --force-conflicts -f -"
324329 MODEL_NAME=" "
325- MODEL_URL =" "
330+ MODEL_CHAT_ENDPOINT =" "
326331 fi
327332 elif [ " $HTTP_CODE " = " 404" ]; then
328333 print_fail " Endpoint not found (HTTP 404)" \
329334 " Path is incorrect - traffic reaching pods but wrong path" \
330335 " Check HTTPRoute: kubectl describe httproute maas-api-route -n maas-api"
331336 MODEL_NAME=" "
332- MODEL_URL =" "
337+ MODEL_CHAT_ENDPOINT =" "
333338 elif [ " $HTTP_CODE " = " 502" ] || [ " $HTTP_CODE " = " 503" ]; then
334339 print_fail " Gateway/Service error (HTTP $HTTP_CODE )" \
335340 " Gateway cannot reach backend service" \
336341 " Check MaaS API pods and service: kubectl get pods,svc -n maas-api"
337342 MODEL_NAME=" "
338- MODEL_URL =" "
343+ MODEL_CHAT_ENDPOINT =" "
339344 else
340345 print_fail " Models endpoint failed (HTTP $HTTP_CODE )" " Response: $( echo $RESPONSE_BODY | head -c 100) " " Check MaaS API service and logs"
341346 MODEL_NAME=" "
342- MODEL_URL =" "
347+ MODEL_CHAT_ENDPOINT =" "
343348 fi
344349 else
345350 print_warning " Skipping models endpoint test" " No authentication token available"
346351 MODEL_NAME=" "
347- MODEL_URL =" "
352+ MODEL_CHAT_ENDPOINT =" "
348353 fi
349354
350355 # Test model inference endpoint (if model exists)
351- if [ -n " $TOKEN " ] && [ -n " $MODEL_NAME " ] && [ -n " $MODEL_URL " ]; then
356+ if [ -n " $TOKEN " ] && [ -n " $MODEL_NAME " ] && [ -n " $MODEL_CHAT_ENDPOINT " ]; then
352357 print_check " Model inference endpoint"
353- print_info " Testing: curl -sSk -X POST ${MODEL_URL } -H 'Authorization: Bearer \$ TOKEN' -H 'Content-Type: application/json' -d '{\" model\" : \" ${MODEL_NAME} \" , \" prompt\" : \" Hello\" , \" max_tokens\" : 5}'"
358+ print_info " Testing: curl -sSk -X POST ${MODEL_CHAT_ENDPOINT } -H 'Authorization: Bearer \$ TOKEN' -H 'Content-Type: application/json' -d '{\" model\" : \" ${MODEL_NAME} \" , \" prompt\" : \" Hello\" , \" max_tokens\" : 5}'"
354359
355360 INFERENCE_RESPONSE=$( curl -sSk --connect-timeout 10 --max-time 30 -w " \n%{http_code}" \
356361 -H " Authorization: Bearer ${TOKEN} " \
357362 -H " Content-Type: application/json" \
358363 -d " {\" model\" : \" ${MODEL_NAME} \" , \" prompt\" : \" Hello\" , \" max_tokens\" : 5}" \
359- " ${MODEL_URL } " 2> /dev/null || echo " " )
364+ " ${MODEL_CHAT_ENDPOINT } " 2> /dev/null || echo " " )
360365
361366 HTTP_CODE=$( echo " $INFERENCE_RESPONSE " | tail -n1)
362367 RESPONSE_BODY=$( echo " $INFERENCE_RESPONSE " | sed ' $d' )
382387 fi
383388
384389 # Test rate limiting
385- if [ -n " $TOKEN " ] && [ -n " $MODEL_NAME " ] && [ -n " $MODEL_URL " ]; then
390+ if [ -n " $TOKEN " ] && [ -n " $MODEL_NAME " ] && [ -n " $MODEL_CHAT_ENDPOINT " ]; then
386391 print_check " Rate limiting"
387392 print_info " Sending 10 rapid requests to test rate limiting..."
388393
394399 -H " Authorization: Bearer ${TOKEN} " \
395400 -H " Content-Type: application/json" \
396401 -d " {\" model\" : \" ${MODEL_NAME} \" , \" prompt\" : \" Test\" , \" max_tokens\" : 1}" \
397- " ${MODEL_URL } " 2> /dev/null || echo " 000" )
402+ " ${MODEL_CHAT_ENDPOINT } " 2> /dev/null || echo " 000" )
398403
399404 if [ " $HTTP_CODE " = " 200" ]; then
400405 (( SUCCESS_COUNT++ ))
@@ -414,11 +419,11 @@ else
414419
415420 # Test unauthorized access
416421 print_check " Authorization enforcement (401 without token)"
417- if [ -n " $MODEL_NAME " ] && [ -n " $MODEL_URL " ]; then
422+ if [ -n " $MODEL_NAME " ] && [ -n " $MODEL_CHAT_ENDPOINT " ]; then
418423 UNAUTH_CODE=$( curl -sSk --connect-timeout 5 --max-time 15 -o /dev/null -w " %{http_code}" \
419424 -H " Content-Type: application/json" \
420425 -d " {\" model\" : \" ${MODEL_NAME} \" , \" prompt\" : \" Test\" , \" max_tokens\" : 1}" \
421- " ${MODEL_URL } " 2> /dev/null || echo " 000" )
426+ " ${MODEL_CHAT_ENDPOINT } " 2> /dev/null || echo " 000" )
422427
423428 if [ " $UNAUTH_CODE " = " 401" ]; then
424429 print_success " Authorization is enforced (got 401 without token)"
0 commit comments