Skip to content

Commit 4f78bc5

Browse files
committed
fix: various fixes
Signed-off-by: Sébastien Han <seb@redhat.com>
1 parent b8798d4 commit 4f78bc5

File tree

4 files changed

+62
-43
lines changed

4 files changed

+62
-43
lines changed

.github/workflows/redhat-distro-container.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
runs-on: ubuntu-latest
4545
env:
4646
INFERENCE_MODEL: Qwen/Qwen3-0.6B
47-
EMBEDDING_MODEL: granite-embedding-125m
47+
EMBEDDING_MODEL: ibm-granite/granite-embedding-125m-english
4848
VLLM_URL: http://localhost:8000/v1
4949
LLAMA_STACK_COMMIT_SHA: ${{ github.event.inputs.llama_stack_commit_sha || 'main' }}
5050
strategy:
@@ -73,7 +73,7 @@ jobs:
7373
LLAMA_STACK_VERSION: ${{ env.LLAMA_STACK_COMMIT_SHA }}
7474
run: |
7575
tmp_build_dir=$(mktemp -d)
76-
git clone --filter=blob:none --no-checkout https://github.com/llamastack/llama-stack.git "$tmp_build_dir"
76+
git clone --filter=blob:none --no-checkout https://github.com/opendatahub-io/llama-stack.git "$tmp_build_dir"
7777
cd "$tmp_build_dir"
7878
git checkout "$LLAMA_STACK_VERSION"
7979
python3 -m venv .venv

distribution/run.yaml

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -238,27 +238,29 @@ storage:
238238
prompts:
239239
namespace: prompts
240240
backend: kv_default
241-
models:
242-
- metadata: {}
243-
model_id: ${env.INFERENCE_MODEL}
244-
provider_id: vllm-inference
245-
model_type: llm
246-
- metadata:
247-
embedding_dimension: 768
248-
model_id: granite-embedding-125m
249-
provider_id: sentence-transformers
250-
provider_model_id: ibm-granite/granite-embedding-125m-english
251-
model_type: embedding
252-
shields: []
253-
vector_dbs: []
254-
datasets: []
255-
scoring_fns: []
256-
benchmarks: []
257-
tool_groups:
258-
- toolgroup_id: builtin::websearch
259-
provider_id: tavily-search
260-
- toolgroup_id: builtin::rag
261-
provider_id: rag-runtime
241+
registered_resources:
242+
models:
243+
- metadata: {}
244+
model_id: ${env.INFERENCE_MODEL}
245+
provider_id: vllm-inference
246+
model_type: llm
247+
248+
- metadata:
249+
embedding_dimension: 768
250+
model_id: granite-embedding-125m
251+
provider_id: sentence-transformers
252+
provider_model_id: ibm-granite/granite-embedding-125m-english
253+
model_type: embedding
254+
shields: []
255+
vector_dbs: []
256+
datasets: []
257+
scoring_fns: []
258+
benchmarks: []
259+
tool_groups:
260+
- toolgroup_id: builtin::websearch
261+
provider_id: tavily-search
262+
- toolgroup_id: builtin::rag
263+
provider_id: rag-runtime
262264
telemetry:
263265
enabled: true
264266
server:

tests/run_integration_tests.sh

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ set -exuo pipefail
55
# Configuration
66
WORK_DIR="/tmp/llama-stack-integration-tests"
77
INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"
8+
EMBEDDING_MODEL="${EMBEDDING_MODEL:-ibm-granite/granite-embedding-125m-english}"
89

910
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
1011

@@ -38,10 +39,15 @@ function clone_llama_stack() {
3839
cd "$WORK_DIR"
3940
# fetch origin incase we didn't clone a fresh repo
4041
git fetch origin
41-
if ! git checkout "v$LLAMA_STACK_VERSION"; then
42-
echo "Error: Could not checkout tag v$LLAMA_STACK_VERSION"
42+
if [ "$LLAMA_STACK_VERSION" == "main" ]; then
43+
checkout_to="main"
44+
else
45+
checkout_to="v$LLAMA_STACK_VERSION"
46+
fi
47+
if ! git checkout "$checkout_to"; then
48+
echo "Error: Could not checkout $checkout_to"
4349
echo "Available tags:"
44-
git tag | grep "^v" | tail -10
50+
git tag | tail -10
4551
exit 1
4652
fi
4753
}
@@ -64,12 +70,14 @@ function run_integration_tests() {
6470
exit 1
6571
fi
6672

67-
# TODO: remove this once we have a stable version of llama-stack client
68-
# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
69-
uv run --with llama-stack-client==0.3.0 pytest -s -v tests/integration/inference/ \
73+
uv venv
74+
# shellcheck source=/dev/null
75+
source .venv/bin/activate
76+
uv pip install llama-stack-client
77+
uv run pytest -s -v tests/integration/inference/ \
7078
--stack-config=server:"$STACK_CONFIG_PATH" \
7179
--text-model=vllm-inference/"$INFERENCE_MODEL" \
72-
--embedding-model=granite-embedding-125m \
80+
--embedding-model=sentence-transformers/"$EMBEDDING_MODEL" \
7381
-k "not ($SKIP_TESTS)"
7482
}
7583

tests/smoke.sh

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,33 @@ function start_and_wait_for_llama_stack_container {
3535
}
3636

3737
function test_model_list {
38-
echo "===> Looking for model $INFERENCE_MODEL..."
39-
resp=$(curl -fsS http://127.0.0.1:8321/v1/models)
40-
if echo "$resp" | grep -q "$INFERENCE_MODEL"; then
41-
echo "Model $INFERENCE_MODEL was found :)"
42-
return
43-
else
44-
echo "Model $INFERENCE_MODEL was not found :("
45-
echo "Container logs:"
46-
docker logs llama-stack || true
47-
exit 1
48-
fi
38+
for model in "$INFERENCE_MODEL" "$EMBEDDING_MODEL"; do
39+
echo "===> Looking for model $model..."
40+
resp=$(curl -fsS http://127.0.0.1:8321/v1/models)
41+
echo "Response: $resp"
42+
if echo "$resp" | grep -q "$model"; then
43+
echo "Model $model was found :)"
44+
continue
45+
else
46+
echo "Model $model was not found :("
47+
echo "Response: $resp"
48+
echo "Container logs:"
49+
docker logs llama-stack || true
50+
return 1
51+
fi
52+
done
53+
return 0
4954
}
5055

5156
function test_model_openai_inference {
5257
echo "===> Attempting to chat with model $INFERENCE_MODEL..."
53-
resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
58+
resp=$(curl -fsS http://127.0.0.1:8321/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"vllm-inference/$INFERENCE_MODEL\",\"messages\": [{\"role\": \"user\", \"content\": \"What color is grass?\"}], \"max_tokens\": 128, \"temperature\": 0.0}")
5459
if echo "$resp" | grep -q "green"; then
5560
echo "===> Inference is working :)"
5661
return
5762
else
5863
echo "===> Inference is not working :("
64+
echo "Response: $resp"
5965
echo "Container logs:"
6066
docker logs llama-stack || true
6167
exit 1
@@ -65,7 +71,10 @@ function test_model_openai_inference {
6571
main() {
6672
echo "===> Starting smoke test..."
6773
start_and_wait_for_llama_stack_container
68-
test_model_list
74+
if ! test_model_list; then
75+
echo "Model list test failed :("
76+
exit 1
77+
fi
6978
test_model_openai_inference
7079
echo "===> Smoke test completed successfully!"
7180
}

0 commit comments

Comments
 (0)