fix: various fixes

leseb · leseb · commit 6af72651fbd4 · 2025-11-28T15:58:14.000+01:00
Signed-off-by: Sébastien Han &lt;seb@redhat.com&gt;
diff --git a/.github/workflows/redhat-distro-container.yml b/.github/workflows/redhat-distro-container.yml
@@ -44,7 +44,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       INFERENCE_MODEL: Qwen/Qwen3-0.6B
-      EMBEDDING_MODEL: granite-embedding-125m
+      EMBEDDING_MODEL: nomic-embed-text-v1.5
       VLLM_URL: http://localhost:8000/v1
       LLAMA_STACK_COMMIT_SHA: ${{ github.event.inputs.llama_stack_commit_sha || 'main' }}
     strategy:
@@ -73,7 +73,7 @@ jobs:
           LLAMA_STACK_VERSION: ${{ env.LLAMA_STACK_COMMIT_SHA }}
         run: |
           tmp_build_dir=$(mktemp -d)
-          git clone --filter=blob:none --no-checkout https://github.com/llamastack/llama-stack.git "$tmp_build_dir"
+          git clone --filter=blob:none --no-checkout https://github.com/opendatahub-io/llama-stack.git "$tmp_build_dir"
           cd "$tmp_build_dir"
           git checkout "$LLAMA_STACK_VERSION"
           python3 -m venv .venv
diff --git a/tests/run_integration_tests.sh b/tests/run_integration_tests.sh
@@ -5,6 +5,7 @@ set -exuo pipefail
 # Configuration
 WORK_DIR="/tmp/llama-stack-integration-tests"
 INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"
+EMBEDDING_MODEL="${EMBEDDING_MODEL:-nomic-embed-text-v1.5}"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
@@ -38,10 +39,15 @@ function clone_llama_stack() {
     cd "$WORK_DIR"
     # fetch origin incase we didn't clone a fresh repo
     git fetch origin
-    if ! git checkout "v$LLAMA_STACK_VERSION"; then
-        echo "Error: Could not checkout tag v$LLAMA_STACK_VERSION"
+    if [ "$LLAMA_STACK_VERSION" == "main" ]; then
+        checkout_to="main"
+    else
+        checkout_to="v$LLAMA_STACK_VERSION"
+    fi
+    if ! git checkout "$checkout_to"; then
+        echo "Error: Could not checkout $checkout_to"
         echo "Available tags:"
-        git tag | grep "^v" | tail -10
+        git tag | tail -10
         exit 1
     fi
 }
@@ -64,12 +70,13 @@ function run_integration_tests() {
         exit 1
     fi
 
-    # TODO: remove this once we have a stable version of llama-stack client
-    # Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
-    uv run --with llama-stack-client==0.3.0 pytest -s -v tests/integration/inference/ \
+    uv venv
+    source .venv/bin/activate
+    uv pip install llama-stack-client
+    uv run pytest -s -v tests/integration/inference/ \
         --stack-config=server:"$STACK_CONFIG_PATH" \
         --text-model=vllm-inference/"$INFERENCE_MODEL" \
-        --embedding-model=granite-embedding-125m \
+        --embedding-model=nomic-ai/"$EMBEDDING_MODEL" \
         -k "not ($SKIP_TESTS)"
 }
 
diff --git a/tests/smoke.sh b/tests/smoke.sh
@@ -42,6 +42,7 @@ function test_model_list {
     return
   else
     echo "Model $INFERENCE_MODEL was not found :("
+    echo "Response: $resp"
     echo "Container logs:"
     docker logs llama-stack || true
     exit 1
@@ -50,12 +51,13 @@ function test_model_list {
 
 function test_model_openai_inference {
   echo "===> Attempting to chat with model $INFERENCE_MODEL..."
-  resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
+  resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vllm-inference/$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
   if echo "$resp" | grep -q "green"; then
     echo "===> Inference is working :)"
     return
   else
     echo "===> Inference is not working :("
+    echo "Response: $resp"
     echo "Container logs:"
     docker logs llama-stack || true
     exit 1