Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ repos:
always_run: true
files: ^distribution/.*$
additional_dependencies:
- llama-stack==0.2.23
- git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0

- id: doc-gen
name: Distribution Documentation
Expand Down
10 changes: 8 additions & 2 deletions distribution/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ RUN pip install \
autoevals \
boto3 \
chardet \
einops \
faiss-cpu \
fastapi \
fire \
google-cloud-aiplatform \
httpx \
ibm_watsonx_ai \
litellm \
matplotlib \
nltk \
Expand All @@ -39,10 +39,12 @@ RUN pip install \
pypdf \
redis \
requests \
safetensors \
scikit-learn \
scipy \
sentencepiece \
sqlalchemy[asyncio] \
tokenizers \
tqdm \
transformers \
uvicorn
Expand All @@ -56,7 +58,11 @@ RUN pip install \
llama_stack_provider_trustyai_fms==0.2.3
RUN pip install 'torchao>=0.12.0' --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision
RUN pip install --no-deps sentence-transformers
RUN pip install --no-cache llama-stack==0.2.23
RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0
RUN mkdir -p ${HOME}/.llama ${HOME}/.cache
COPY distribution/run.yaml ${APP_ROOT}/run.yaml
#TODO: remove this once we have a stable version of llama-stack
# LLS server version is not aligned with the client version, so we disable the version check
# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
ENV LLAMA_STACK_DISABLE_VERSION_CHECK=true
ENTRYPOINT ["llama", "stack", "run", "/opt/app-root/run.yaml"]
5 changes: 4 additions & 1 deletion distribution/Containerfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ WORKDIR /opt/app-root

RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
{dependencies}
RUN pip install --no-cache llama-stack==0.2.23
{llama_stack_install_source}
RUN mkdir -p ${{HOME}}/.llama ${{HOME}}/.cache
COPY distribution/run.yaml ${{APP_ROOT}}/run.yaml
#TODO: remove this once we have a stable version of llama-stack
# LLS server version is not aligned with the client version, so we disable the version check
# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
ENV LLAMA_STACK_DISABLE_VERSION_CHECK=true

ENTRYPOINT ["llama", "stack", "run", "/opt/app-root/run.yaml"]
2 changes: 1 addition & 1 deletion distribution/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

This image contains the official Open Data Hub Llama Stack distribution, with all the packages and configuration needed to run a Llama Stack server in a containerized environment.

The image is currently shipping with upstream Llama Stack version [0.2.23](https://github.com/llamastack/llama-stack/releases/tag/v0.2.23)
The image is currently shipping with the Open Data Hub version of Llama Stack version [0.3.0rc3+rhai0](https://github.com/opendatahub-io/llama-stack/releases/tag/v0.3.0rc3+rhai0)

You can see an overview of the APIs and Providers the image ships with in the table below.

Expand Down
12 changes: 4 additions & 8 deletions distribution/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import os
from pathlib import Path

CURRENT_LLAMA_STACK_VERSION = "0.2.23"
CURRENT_LLAMA_STACK_VERSION = "0.3.0rc3+rhai0"
LLAMA_STACK_VERSION = os.getenv("LLAMA_STACK_VERSION", CURRENT_LLAMA_STACK_VERSION)
BASE_REQUIREMENTS = [
f"llama-stack=={LLAMA_STACK_VERSION}",
Expand All @@ -30,11 +30,7 @@
"'ibm-cos-sdk==2.14.2'",
]

source_install_command = """RUN tmp_build_dir=$(mktemp -d) && \\
git clone --filter=blob:none --no-checkout https://github.com/llamastack/llama-stack.git $tmp_build_dir && \\
cd $tmp_build_dir && \\
git checkout {llama_stack_version} && \\
pip install --no-cache -e ."""
source_install_command = """RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v{llama_stack_version}"""


def get_llama_stack_install(llama_stack_version):
Expand All @@ -47,8 +43,8 @@ def get_llama_stack_install(llama_stack_version):


def is_install_from_source(llama_stack_version):
"""Check if version string is a git commit SHA (no dots = SHA, has dots = version)."""
return "." not in llama_stack_version
"""Check if version string is a git commit SHA (no dots = SHA, has dots = version) or a custom version (contains +rhai)."""
return "." not in llama_stack_version or "+rhai" in llama_stack_version


def check_llama_installed():
Expand Down
19 changes: 13 additions & 6 deletions scripts/gen_distro_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,22 @@ def extract_llama_stack_version():
content = file.read()

# Look for llama-stack version in pip install commands
# Pattern matches: llama-stack==X.Y.Z
pattern = r"llama-stack==([0-9]+\.[0-9]+\.[0-9]+)"
# Pattern matches: llama-stack==X.Y.Z or llama-stack==X.Y.ZrcN+rhaiM
pattern = r"llama-stack==([0-9]+\.[0-9]+\.[0-9]+(?:rc[0-9]+)?(?:\+rhai[0-9]+)?)"
match = re.search(pattern, content)

if match:
return match.group(1)
else:
print("Error: Could not find llama-stack version in Containerfile")
exit(1)

# Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
git_pattern = r"git\+https://github\.com/[^/]+/llama-stack\.git@v?([0-9]+\.[0-9]+\.[0-9]+(?:rc[0-9]+)?(?:\+rhai[0-9]+)?)"
git_match = re.search(git_pattern, content)

if git_match:
return git_match.group(1)

print("Error: Could not find llama-stack version in Containerfile")
exit(1)

except Exception as e:
print(f"Error reading Containerfile: {e}")
Expand Down Expand Up @@ -170,7 +177,7 @@ def gen_distro_docs():

This image contains the official Open Data Hub Llama Stack distribution, with all the packages and configuration needed to run a Llama Stack server in a containerized environment.

The image is currently shipping with upstream Llama Stack version [{version}](https://github.com/llamastack/llama-stack/releases/tag/v{version})
The image is currently shipping with the Open Data Hub version of Llama Stack version [{version}](https://github.com/opendatahub-io/llama-stack/releases/tag/v{version})

You can see an overview of the APIs and Providers the image ships with in the table below.

Expand Down
31 changes: 23 additions & 8 deletions tests/run_integration_tests.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
#!/usr/bin/env bash

set -euo pipefail
set -exuo pipefail

# Configuration
LLAMA_STACK_REPO="https://github.com/meta-llama/llama-stack.git"
WORK_DIR="/tmp/llama-stack-integration-tests"
INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Get version dynamically from Containerfile.in (look in parent directory)
CONTAINERFILE_IN="$SCRIPT_DIR/../distribution/Containerfile.in"
LLAMA_STACK_VERSION=$(grep -o 'llama-stack==[0-9]\+\.[0-9]\+\.[0-9]\+' "$CONTAINERFILE_IN" | cut -d'=' -f3)
# Get repository and version dynamically from Containerfile
# Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
CONTAINERFILE="$SCRIPT_DIR/../distribution/Containerfile"
GIT_URL=$(grep -o 'git+https://github\.com/[^/]\+/llama-stack\.git@v\?[0-9.+a-z]\+' "$CONTAINERFILE")
if [ -z "$GIT_URL" ]; then
echo "Error: Could not extract llama-stack git URL from Containerfile"
exit 1
fi

# Extract repo URL (remove git+ prefix and @version suffix)
LLAMA_STACK_REPO=${GIT_URL#git+}
LLAMA_STACK_REPO=${LLAMA_STACK_REPO%%@*}
# Extract version (remove git+ prefix and everything before @, and optional v prefix)
LLAMA_STACK_VERSION=${GIT_URL##*@}
LLAMA_STACK_VERSION=${LLAMA_STACK_VERSION#v}
if [ -z "$LLAMA_STACK_VERSION" ]; then
echo "Error: Could not extract llama-stack version from Containerfile.in"
echo "Error: Could not extract llama-stack version from Containerfile"
exit 1
fi

Expand Down Expand Up @@ -41,8 +52,10 @@ function run_integration_tests() {
cd "$WORK_DIR"

# Test to skip
# TODO: enable these when we have a stable version of llama-stack client and server versions are aligned
RC2_SKIP_TESTS=" or test_openai_completion_logprobs or test_openai_completion_logprobs_streaming or test_openai_chat_completion_structured_output or test_multiple_tools_with_different_schemas or test_mcp_tools_in_inference or test_tool_with_complex_schema or test_tool_without_schema"
# TODO: re-enable the 2 chat_completion_non_streaming tests once they contain include max tokens (to prevent them from rambling)
SKIP_TESTS="test_text_chat_completion_tool_calling_tools_not_in_request or test_text_chat_completion_structured_output or test_text_chat_completion_non_streaming or test_openai_chat_completion_non_streaming"
SKIP_TESTS="test_text_chat_completion_tool_calling_tools_not_in_request or test_text_chat_completion_structured_output or test_text_chat_completion_non_streaming or test_openai_chat_completion_non_streaming$RC2_SKIP_TESTS"

# Dynamically determine the path to run.yaml from the original script directory
STACK_CONFIG_PATH="$SCRIPT_DIR/../distribution/run.yaml"
Expand All @@ -51,7 +64,9 @@ function run_integration_tests() {
exit 1
fi

uv run pytest -s -v tests/integration/inference/ \
# TODO: remove this once we have a stable version of llama-stack client
# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
uv run --with llama-stack-client==0.3.0 pytest -s -v tests/integration/inference/ \
--stack-config=server:"$STACK_CONFIG_PATH" \
--text-model=vllm-inference/"$INFERENCE_MODEL" \
--embedding-model=granite-embedding-125m \
Expand Down
Loading