opendatahub-io · derekhiggins · Dec 1, 2025 · Nov 17, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,7 +56,7 @@ repos:
         always_run: true
         files: ^distribution/.*$
         additional_dependencies:
-          - git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0
+          - uv>=0.9.0
 
       - id: doc-gen
         name: Distribution Documentation

diff --git a/distribution/Containerfile b/distribution/Containerfile
@@ -2,16 +2,17 @@
 # Generated by: distribution/build.py
 FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
 WORKDIR /opt/app-root
+RUN pip install uv
 RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
-RUN pip install --upgrade \
+RUN uv pip install --upgrade \
     'kfp-kubernetes==2.14.6' \
     'pyarrow>=21.0.0' \
     'botocore==1.35.88' \
     'boto3==1.35.88' \
     'aiobotocore==2.16.1' \
     'ibm-cos-sdk-core==2.14.2' \
     'ibm-cos-sdk==2.14.2'
-RUN pip install \
+RUN uv pip install \
     'datasets>=4.0.0' \
     'mcp>=1.8.1' \
     'pymilvus[milvus-lite]>=2.4.10' \
@@ -48,17 +49,17 @@ RUN pip install \
     tqdm \
     transformers \
     uvicorn
-RUN pip install \
-    llama_stack_provider_lmeval==0.3.1
-RUN pip install \
-    llama_stack_provider_ragas==0.4.2
-RUN pip install \
-    llama_stack_provider_ragas[remote]==0.4.2
-RUN pip install \
-    llama_stack_provider_trustyai_fms==0.2.3
-RUN pip install 'torchao>=0.12.0' --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision
-RUN pip install --no-deps sentence-transformers
-RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v0.3.0rc3+rhai0
+RUN uv pip install \
+    llama_stack_provider_lmeval==0.4.1
+RUN uv pip install \
+    llama_stack_provider_ragas==0.5.1
+RUN uv pip install \
+    llama_stack_provider_ragas[remote]==0.5.1
+RUN uv pip install \
+    llama_stack_provider_trustyai_fms==0.3.0
+RUN uv pip install --extra-index-url https://download.pytorch.org/whl/cpu 'torchao>=0.12.0' torch torchvision
+RUN uv pip install --no-deps sentence-transformers
+RUN uv pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@main
 RUN mkdir -p ${HOME}/.llama ${HOME}/.cache
 COPY distribution/run.yaml ${APP_ROOT}/run.yaml
 COPY --chmod=755 distribution/entrypoint.sh ${APP_ROOT}/entrypoint.sh

diff --git a/distribution/Containerfile.in b/distribution/Containerfile.in
@@ -1,6 +1,7 @@
 FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435
 WORKDIR /opt/app-root
 
+RUN pip install uv
 RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient
 {dependencies}
 {llama_stack_install_source}

@@ -4,18 +4,19 @@
 
 This image contains the official Open Data Hub Llama Stack distribution, with all the packages and configuration needed to run a Llama Stack server in a containerized environment.
 
-The image is currently shipping with the Open Data Hub version of Llama Stack version [0.3.0rc3+rhai0](https://github.com/opendatahub-io/llama-stack/releases/tag/v0.3.0rc3+rhai0)
+The image is currently shipping with the Open Data Hub version of Llama Stack version [57b22a0](https://github.com/opendatahub-io/llama-stack/commit/57b22a07f798c09713ee80836306ff1584c376fa)
 
 You can see an overview of the APIs and Providers the image ships with in the table below.
 
 | API | Provider | External? | Enabled by default? | How to enable |
 |-----|----------|-----------|---------------------|---------------|
 | agents | inline::meta-reference | No | ✅ | N/A |
+| batches | inline::reference | No | ✅ | N/A |
 | datasetio | inline::localfs | No | ✅ | N/A |
 | datasetio | remote::huggingface | No | ✅ | N/A |
-| eval | inline::trustyai_ragas | Yes (version 0.4.2) | ❌ | Set the `EMBEDDING_MODEL` environment variable |
-| eval | remote::trustyai_lmeval | Yes (version 0.3.1) | ✅ | N/A |
-| eval | remote::trustyai_ragas | Yes (version 0.4.2) | ❌ | Set the `KUBEFLOW_LLAMA_STACK_URL` environment variable |
+| eval | inline::trustyai_ragas | Yes (version 0.5.1) | ❌ | Set the `EMBEDDING_MODEL` environment variable |
+| eval | remote::trustyai_lmeval | Yes (version 0.4.1) | ✅ | N/A |
+| eval | remote::trustyai_ragas | Yes (version 0.5.1) | ❌ | Set the `KUBEFLOW_LLAMA_STACK_URL` environment variable |
 | files | inline::localfs | No | ✅ | N/A |
 | inference | inline::sentence-transformers | No | ✅ | N/A |
 | inference | remote::azure | No | ❌ | Set the `AZURE_API_KEY` environment variable |
@@ -24,11 +25,10 @@ You can see an overview of the APIs and Providers the image ships with in the ta
 | inference | remote::vertexai | No | ❌ | Set the `VERTEX_AI_PROJECT` environment variable |
 | inference | remote::vllm | No | ❌ | Set the `VLLM_URL` environment variable |
 | inference | remote::watsonx | No | ❌ | Set the `WATSONX_API_KEY` environment variable |
-| safety | remote::trustyai_fms | Yes (version 0.2.3) | ✅ | N/A |
+| safety | remote::trustyai_fms | Yes (version 0.3.0) | ✅ | N/A |
 | scoring | inline::basic | No | ✅ | N/A |
 | scoring | inline::braintrust | No | ✅ | N/A |
 | scoring | inline::llm-as-judge | No | ✅ | N/A |
-| telemetry | inline::meta-reference | No | ✅ | N/A |
 | tool_runtime | inline::rag-runtime | No | ✅ | N/A |
 | tool_runtime | remote::brave-search | No | ✅ | N/A |
 | tool_runtime | remote::model-context-protocol | No | ✅ | N/A |

diff --git a/distribution/build.py b/distribution/build.py
@@ -11,9 +11,10 @@
 import subprocess
 import sys
 import os
+import shlex
 from pathlib import Path
 
-CURRENT_LLAMA_STACK_VERSION = "0.3.0rc3+rhai0"
+CURRENT_LLAMA_STACK_VERSION = "main"
 LLAMA_STACK_VERSION = os.getenv("LLAMA_STACK_VERSION", CURRENT_LLAMA_STACK_VERSION)
 BASE_REQUIREMENTS = [
     f"llama-stack=={LLAMA_STACK_VERSION}",
@@ -30,7 +31,7 @@
     "'ibm-cos-sdk==2.14.2'",
 ]
 
-source_install_command = """RUN pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@v{llama_stack_version}"""
+source_install_command = """RUN uv pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@{llama_stack_version}"""
 
 
 def get_llama_stack_install(llama_stack_version):
@@ -47,10 +48,10 @@ def is_install_from_source(llama_stack_version):
     return "." not in llama_stack_version or "+rhai" in llama_stack_version
 
 
-def check_llama_installed():
+def check_package_installed(package_name):
     """Check if llama binary is installed and accessible."""
-    if not shutil.which("llama"):
-        print("Error: llama binary not found. Please install it first.")
+    if not shutil.which(package_name):
+        print(f"Error: {package_name} not found. Please install it first.")
         sys.exit(1)
 
 
@@ -87,9 +88,32 @@ def check_llama_stack_version():
         print("Continuing without version validation...")
 
 
+def install_llama_stack_from_source(llama_stack_version):
+    """Install llama-stack from source using git."""
+    print("installing llama-stack from source...")
+    try:
+        result = subprocess.run(
+            f"uv pip install git+https://github.com/opendatahub-io/llama-stack.git@{llama_stack_version}",
+            shell=True,
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+        # Print stdout if there's any output
+        if result.stdout:
+            print(result.stdout)
+    except subprocess.CalledProcessError as e:
+        print(f"Error installing llama-stack: {e}")
+        if e.stdout:
+            print(f"stdout: {e.stdout}")
+        if e.stderr:
+            print(f"stderr: {e.stderr}")
+        sys.exit(1)
+
+
 def get_dependencies():
     """Execute the llama stack build command and capture dependencies."""
-    cmd = "llama stack build --config distribution/build.yaml --print-deps-only"
+    cmd = "llama stack list-deps distribution/build.yaml"
     try:
         result = subprocess.run(
             cmd, shell=True, capture_output=True, text=True, check=True
@@ -101,56 +125,91 @@ def get_dependencies():
         no_cache = []
 
         for line in result.stdout.splitlines():
-            if line.strip().startswith("uv pip"):
-                # Split the line into command and packages
-                parts = line.replace("uv ", "RUN ", 1).split(" ", 3)
+            line = line.strip()
+            if not line:  # Skip empty lines
+                continue
+
+            # Handle both "uv pip" format and direct package list format
+            if line.startswith("uv pip"):
+                # Legacy format: "uv pip install ..."
+                line = line.replace("uv ", "RUN ", 1)
+                parts = line.split(" ", 3)
                 if len(parts) >= 4:  # We have packages to sort
                     cmd_parts = parts[:3]  # "RUN pip install"
-                    packages = sorted(
-                        set(parts[3].split())
-                    )  # Sort the package names and remove duplicates
-
-                    # Add quotes to packages with > or < to prevent bash redirection
-                    packages = [
-                        f"'{package}'"
-                        if (">" in package or "<" in package)
-                        else package
-                        for package in packages
-                    ]
-
-                    # Modify pymilvus package to include milvus-lite extra
-                    packages = [
-                        package.replace("pymilvus", "pymilvus[milvus-lite]")
-                        if "pymilvus" in package and "[milvus-lite]" not in package
-                        else package
-                        for package in packages
-                    ]
-                    packages = sorted(set(packages))
-
-                    # Determine command type and format accordingly
-                    if ("--index-url" in line) or ("--extra-index-url" in line):
-                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
-                        torch_deps.append(full_cmd)
-                    elif "--no-deps" in line:
-                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
-                        no_deps.append(full_cmd)
-                    elif "--no-cache" in line:
-                        full_cmd = " ".join(cmd_parts + [" ".join(packages)])
-                        no_cache.append(full_cmd)
-                    else:
-                        formatted_packages = " \\\n    ".join(packages)
-                        full_cmd = f"{' '.join(cmd_parts)} \\\n    {formatted_packages}"
-                        standard_deps.append(full_cmd)
+                    packages_str = parts[3]
                 else:
                     standard_deps.append(" ".join(parts))
+                    continue
+            else:
+                # New format: just packages, possibly with flags
+                cmd_parts = ["RUN", "uv", "pip", "install"]
+                packages_str = line
+
+            # Parse packages and flags from the line
+            # Use shlex.split to properly handle quoted package names
+            parts_list = shlex.split(packages_str)
+            packages = []
+            flags = []
+            extra_index_url = None
+
+            i = 0
+            while i < len(parts_list):
+                if parts_list[i] == "--extra-index-url" and i + 1 < len(parts_list):
+                    extra_index_url = parts_list[i + 1]
+                    flags.extend([parts_list[i], parts_list[i + 1]])
+                    i += 2
+                elif parts_list[i] == "--index-url" and i + 1 < len(parts_list):
+                    flags.extend([parts_list[i], parts_list[i + 1]])
+                    i += 2
+                elif parts_list[i] in ["--no-deps", "--no-cache"]:
+                    flags.append(parts_list[i])
+                    i += 1
+                else:
+                    packages.append(parts_list[i])
+                    i += 1
+
+            # Sort and deduplicate packages
+            packages = sorted(set(packages))
+
+            # Add quotes to packages with > or < to prevent bash redirection
+            packages = [
+                f"'{package}'" if (">" in package or "<" in package) else package
+                for package in packages
+            ]
+
+            # Modify pymilvus package to include milvus-lite extra
+            packages = [
+                package.replace("pymilvus", "pymilvus[milvus-lite]")
+                if "pymilvus" in package and "[milvus-lite]" not in package
+                else package
+                for package in packages
+            ]
+            packages = sorted(set(packages))
+
+            # Build the command based on flags
+            if extra_index_url or "--index-url" in flags:
+                # Torch dependencies with extra index URL
+                full_cmd = " ".join(cmd_parts + flags + packages)
+                torch_deps.append(full_cmd)
+            elif "--no-deps" in flags:
+                full_cmd = " ".join(cmd_parts + flags + packages)
+                no_deps.append(full_cmd)
+            elif "--no-cache" in flags:
+                full_cmd = " ".join(cmd_parts + flags + packages)
+                no_cache.append(full_cmd)
+            else:
+                # Standard dependencies with multi-line formatting
+                formatted_packages = " \\\n    ".join(packages)
+                full_cmd = f"{' '.join(cmd_parts)} \\\n    {formatted_packages}"
+                standard_deps.append(full_cmd)
 
         # Combine all dependencies in specific order
         all_deps = []
 
         # Add pinned dependencies FIRST to ensure version compatibility
         if PINNED_DEPENDENCIES:
             pinned_packages = " \\\n    ".join(PINNED_DEPENDENCIES)
-            pinned_cmd = f"RUN pip install --upgrade \\\n    {pinned_packages}"
+            pinned_cmd = f"RUN uv pip install --upgrade \\\n    {pinned_packages}"
             all_deps.append(pinned_cmd)
 
         all_deps.extend(sorted(standard_deps))  # Regular pip installs
@@ -203,8 +262,11 @@ def generate_containerfile(dependencies, llama_stack_install):
 
 
 def main():
+    check_package_installed("uv")
+    install_llama_stack_from_source(LLAMA_STACK_VERSION)
+
     print("Checking llama installation...")
-    check_llama_installed()
+    check_package_installed("llama")
 
     # Do not perform version check if installing from source
     if not is_install_from_source(LLAMA_STACK_VERSION):

diff --git a/distribution/build.yaml b/distribution/build.yaml
@@ -16,25 +16,23 @@ distribution_spec:
     - provider_type: inline::faiss
     safety:
     - provider_type: remote::trustyai_fms
-      module: llama_stack_provider_trustyai_fms==0.2.3
+      module: llama_stack_provider_trustyai_fms==0.3.0
     agents:
     - provider_type: inline::meta-reference
     eval:
     - provider_type: remote::trustyai_lmeval
-      module: llama_stack_provider_lmeval==0.3.1
+      module: llama_stack_provider_lmeval==0.4.1
     - provider_type: inline::trustyai_ragas
-      module: llama_stack_provider_ragas==0.4.2
+      module: llama_stack_provider_ragas==0.5.1
     - provider_type: remote::trustyai_ragas
-      module: llama_stack_provider_ragas[remote]==0.4.2
+      module: llama_stack_provider_ragas[remote]==0.5.1
     datasetio:
     - provider_type: remote::huggingface
     - provider_type: inline::localfs
     scoring:
     - provider_type: inline::basic
     - provider_type: inline::llm-as-judge
     - provider_type: inline::braintrust
-    telemetry:
-    - provider_type: inline::meta-reference
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search