wip

leseb · leseb · commit f6dfab9d2417 · 2025-11-28T15:31:06.000+01:00
diff --git a/.github/workflows/redhat-distro-container.yml b/.github/workflows/redhat-distro-container.yml
@@ -73,7 +73,7 @@ jobs:
           LLAMA_STACK_VERSION: ${{ env.LLAMA_STACK_COMMIT_SHA }}
         run: |
           tmp_build_dir=$(mktemp -d)
-          git clone --filter=blob:none --no-checkout https://github.com/llamastack/llama-stack.git "$tmp_build_dir"
+          git clone --filter=blob:none --no-checkout https://github.com/opendatahub-io/llama-stack.git "$tmp_build_dir"
           cd "$tmp_build_dir"
           git checkout "$LLAMA_STACK_VERSION"
           python3 -m venv .venv
diff --git a/tests/run_integration_tests.sh b/tests/run_integration_tests.sh
@@ -38,10 +38,15 @@ function clone_llama_stack() {
     cd "$WORK_DIR"
     # fetch origin incase we didn't clone a fresh repo
     git fetch origin
-    if ! git checkout "v$LLAMA_STACK_VERSION"; then
-        echo "Error: Could not checkout tag v$LLAMA_STACK_VERSION"
+    if [ "$LLAMA_STACK_VERSION" == "main" ]; then
+        checkout_to="main"
+    else
+        checkout_to="v$LLAMA_STACK_VERSION"
+    fi
+    if ! git checkout "$checkout_to"; then
+        echo "Error: Could not checkout $checkout_to"
         echo "Available tags:"
-        git tag | grep "^v" | tail -10
+        git tag | tail -10
         exit 1
     fi
 }
@@ -64,9 +69,8 @@ function run_integration_tests() {
         exit 1
     fi
 
-    # TODO: remove this once we have a stable version of llama-stack client
-    # Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
-    uv run --with llama-stack-client==0.3.0 pytest -s -v tests/integration/inference/ \
+    uv pip install llama-stack-client
+    uv run pytest -s -v tests/integration/inference/ \
         --stack-config=server:"$STACK_CONFIG_PATH" \
         --text-model=vllm-inference/"$INFERENCE_MODEL" \
         --embedding-model=granite-embedding-125m \
diff --git a/tests/smoke.sh b/tests/smoke.sh
@@ -42,6 +42,7 @@ function test_model_list {
     return
   else
     echo "Model $INFERENCE_MODEL was not found :("
+    echo "Response: $resp"
     echo "Container logs:"
     docker logs llama-stack || true
     exit 1
@@ -50,12 +51,13 @@ function test_model_list {
 
 function test_model_openai_inference {
   echo "===> Attempting to chat with model $INFERENCE_MODEL..."
-  resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
+  resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vllm-inference/$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
   if echo "$resp" | grep -q "green"; then
     echo "===> Inference is working :)"
     return
   else
     echo "===> Inference is not working :("
+    echo "Response: $resp"
     echo "Container logs:"
     docker logs llama-stack || true
     exit 1