opendatahub-io · mergify · Nov 12, 2025 · Oct 31, 2025
@@ -56,7 +56,7 @@ repos:
         always_run: true
         files: ^distribution/.*$
         additional_dependencies:
-          - llama-stack==0.2.23
+          - git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0
 
       - id: doc-gen
         name: Distribution Documentation

@@ -20,12 +20,12 @@ RUN pip install \
     autoevals \
     boto3 \
     chardet \
+    einops \
     faiss-cpu \
     fastapi \
     fire \
     google-cloud-aiplatform \
     httpx \
-    ibm_watsonx_ai \
     litellm \
     matplotlib \
     nltk \
@@ -39,10 +39,12 @@ RUN pip install \
     pypdf \
     redis \
     requests \
+    safetensors \
     scikit-learn \
     scipy \
     sentencepiece \
     sqlalchemy[asyncio] \
+    tokenizers \
     tqdm \
     transformers \
     uvicorn
@@ -56,7 +58,11 @@ RUN pip install \
     llama_stack_provider_trustyai_fms==0.2.3
 RUN pip install 'torchao>=0.12.0' --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision
 RUN pip install --no-deps sentence-transformers
-RUN pip install --no-cache llama-stack==0.2.23
+RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0
 RUN mkdir -p ${HOME}/.llama ${HOME}/.cache
 COPY distribution/run.yaml ${APP_ROOT}/run.yaml
+#TODO: remove this once we have a stable version of llama-stack
+# LLS server version is not aligned with the client version, so we disable the version check
+# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
+ENV LLAMA_STACK_DISABLE_VERSION_CHECK=true
 ENTRYPOINT ["llama", "stack", "run", "/opt/app-root/run.yaml"]
@@ -3,9 +3,12 @@ WORKDIR /opt/app-root
 
 RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
 {dependencies}
-RUN pip install --no-cache llama-stack==0.2.23
 {llama_stack_install_source}
 RUN mkdir -p ${{HOME}}/.llama ${{HOME}}/.cache
 COPY distribution/run.yaml ${{APP_ROOT}}/run.yaml
+#TODO: remove this once we have a stable version of llama-stack
+# LLS server version is not aligned with the client version, so we disable the version check
+# Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
+ENV LLAMA_STACK_DISABLE_VERSION_CHECK=true
 
 ENTRYPOINT ["llama", "stack", "run", "/opt/app-root/run.yaml"]
@@ -4,7 +4,7 @@
 
 This image contains the official Open Data Hub Llama Stack distribution, with all the packages and configuration needed to run a Llama Stack server in a containerized environment.
 
-The image is currently shipping with upstream Llama Stack version [0.2.23](https://github.com/llamastack/llama-stack/releases/tag/v0.2.23)
+The image is currently shipping with the Open Data Hub version of Llama Stack version [0.3.0rc3+rhai0](https://github.com/opendatahub-io/llama-stack/releases/tag/v0.3.0rc3+rhai0)
 
 You can see an overview of the APIs and Providers the image ships with in the table below.
 

@@ -13,7 +13,7 @@
 import os
 from pathlib import Path
 
-CURRENT_LLAMA_STACK_VERSION = "0.2.23"
+CURRENT_LLAMA_STACK_VERSION = "0.3.0rc3+rhai0"
 LLAMA_STACK_VERSION = os.getenv("LLAMA_STACK_VERSION", CURRENT_LLAMA_STACK_VERSION)
 BASE_REQUIREMENTS = [
     f"llama-stack=={LLAMA_STACK_VERSION}",
@@ -30,11 +30,7 @@
     "'ibm-cos-sdk==2.14.2'",
 ]
 
-source_install_command = """RUN tmp_build_dir=$(mktemp -d) && \\
-    git clone --filter=blob:none --no-checkout https://github.com/llamastack/llama-stack.git $tmp_build_dir && \\
-    cd $tmp_build_dir && \\
-    git checkout {llama_stack_version} && \\
-    pip install --no-cache -e ."""
+source_install_command = """RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v{llama_stack_version}"""
 
 
 def get_llama_stack_install(llama_stack_version):
@@ -47,8 +43,8 @@ def get_llama_stack_install(llama_stack_version):
 
 
 def is_install_from_source(llama_stack_version):
-    """Check if version string is a git commit SHA (no dots = SHA, has dots = version)."""
-    return "." not in llama_stack_version
+    """Check if version string is a git commit SHA (no dots = SHA, has dots = version) or a custom version (contains +rhai)."""
+    return "." not in llama_stack_version or "+rhai" in llama_stack_version
 
 
 def check_llama_installed():

@@ -21,15 +21,22 @@ def extract_llama_stack_version():
             content = file.read()
 
         # Look for llama-stack version in pip install commands
-        # Pattern matches: llama-stack==X.Y.Z
-        pattern = r"llama-stack==([0-9]+\.[0-9]+\.[0-9]+)"
+        # Pattern matches: llama-stack==X.Y.Z or llama-stack==X.Y.ZrcN+rhaiM
+        pattern = r"llama-stack==([0-9]+\.[0-9]+\.[0-9]+(?:rc[0-9]+)?(?:\+rhai[0-9]+)?)"
         match = re.search(pattern, content)
 
         if match:
             return match.group(1)
-        else:
-            print("Error: Could not find llama-stack version in Containerfile")
-            exit(1)
+
+        # Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
+        git_pattern = r"git\+https://github\.com/[^/]+/llama-stack\.git@v?([0-9]+\.[0-9]+\.[0-9]+(?:rc[0-9]+)?(?:\+rhai[0-9]+)?)"
+        git_match = re.search(git_pattern, content)
+
+        if git_match:
+            return git_match.group(1)
+
+        print("Error: Could not find llama-stack version in Containerfile")
+        exit(1)
 
     except Exception as e:
         print(f"Error reading Containerfile: {e}")
@@ -170,7 +177,7 @@ def gen_distro_docs():
 
 This image contains the official Open Data Hub Llama Stack distribution, with all the packages and configuration needed to run a Llama Stack server in a containerized environment.
 
-The image is currently shipping with upstream Llama Stack version [{version}](https://github.com/llamastack/llama-stack/releases/tag/v{version})
+The image is currently shipping with the Open Data Hub version of Llama Stack version [{version}](https://github.com/opendatahub-io/llama-stack/releases/tag/v{version})
 
 You can see an overview of the APIs and Providers the image ships with in the table below.
 

@@ -1,19 +1,30 @@
 #!/usr/bin/env bash
 
-set -euo pipefail
+set -exuo pipefail
 
 # Configuration
-LLAMA_STACK_REPO="https://github.com/meta-llama/llama-stack.git"
 WORK_DIR="/tmp/llama-stack-integration-tests"
 INFERENCE_MODEL="${INFERENCE_MODEL:-Qwen/Qwen3-0.6B}"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# Get version dynamically from Containerfile.in (look in parent directory)
-CONTAINERFILE_IN="$SCRIPT_DIR/../distribution/Containerfile.in"
-LLAMA_STACK_VERSION=$(grep -o 'llama-stack==[0-9]\+\.[0-9]\+\.[0-9]\+' "$CONTAINERFILE_IN" | cut -d'=' -f3)
+# Get repository and version dynamically from Containerfile
+# Look for git URL format: git+https://github.com/*/llama-stack.git@vVERSION or @VERSION
+CONTAINERFILE="$SCRIPT_DIR/../distribution/Containerfile"
+GIT_URL=$(grep -o 'git+https://github\.com/[^/]\+/llama-stack\.git@v\?[0-9.+a-z]\+' "$CONTAINERFILE")
+if [ -z "$GIT_URL" ]; then
+    echo "Error: Could not extract llama-stack git URL from Containerfile"
+    exit 1
+fi
+
+# Extract repo URL (remove git+ prefix and @version suffix)
+LLAMA_STACK_REPO=${GIT_URL#git+}
+LLAMA_STACK_REPO=${LLAMA_STACK_REPO%%@*}
+# Extract version (remove git+ prefix and everything before @, and optional v prefix)
+LLAMA_STACK_VERSION=${GIT_URL##*@}
+LLAMA_STACK_VERSION=${LLAMA_STACK_VERSION#v}
 if [ -z "$LLAMA_STACK_VERSION" ]; then
-    echo "Error: Could not extract llama-stack version from Containerfile.in"
+    echo "Error: Could not extract llama-stack version from Containerfile"
     exit 1
 fi
 
@@ -41,8 +52,10 @@ function run_integration_tests() {
     cd "$WORK_DIR"
 
     # Test to skip
+    # TODO: enable these when we have a stable version of llama-stack client and server versions are aligned
+    RC2_SKIP_TESTS=" or test_openai_completion_logprobs or test_openai_completion_logprobs_streaming or test_openai_chat_completion_structured_output or test_multiple_tools_with_different_schemas or test_mcp_tools_in_inference or test_tool_with_complex_schema or test_tool_without_schema"
     # TODO: re-enable the 2 chat_completion_non_streaming tests once they contain include max tokens (to prevent them from rambling)
-    SKIP_TESTS="test_text_chat_completion_tool_calling_tools_not_in_request or test_text_chat_completion_structured_output or test_text_chat_completion_non_streaming or test_openai_chat_completion_non_streaming"
+    SKIP_TESTS="test_text_chat_completion_tool_calling_tools_not_in_request or test_text_chat_completion_structured_output or test_text_chat_completion_non_streaming or test_openai_chat_completion_non_streaming$RC2_SKIP_TESTS"
 
     # Dynamically determine the path to run.yaml from the original script directory
     STACK_CONFIG_PATH="$SCRIPT_DIR/../distribution/run.yaml"
@@ -51,7 +64,9 @@ function run_integration_tests() {
         exit 1
     fi
 
-    uv run pytest -s -v tests/integration/inference/ \
+    # TODO: remove this once we have a stable version of llama-stack client
+    # Currently, LLS client version is 0.3.0, while the server version is 0.3.0rc3+rhai0
+    uv run --with llama-stack-client==0.3.0 pytest -s -v tests/integration/inference/ \
         --stack-config=server:"$STACK_CONFIG_PATH" \
         --text-model=vllm-inference/"$INFERENCE_MODEL" \
         --embedding-model=granite-embedding-125m \