llama-stack-distribution/distribution/build.py at 7a7b96422b0783d3aae57974aa29515e04ac67b5 · leseb/llama-stack-distribution · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

# Usage: ./distribution/build.py

import shutil
import subprocess
import sys
import os
import shlex
from pathlib import Path

CURRENT_LLAMA_STACK_VERSION = "main"
LLAMA_STACK_VERSION = os.getenv("LLAMA_STACK_VERSION", CURRENT_LLAMA_STACK_VERSION)
BASE_REQUIREMENTS = [
    f"llama-stack=={LLAMA_STACK_VERSION}",
]

# Constrain packages we are patching to ensure reliable and repeatable build
PINNED_DEPENDENCIES = [
    "'kfp-kubernetes==2.14.6'",
    "'pyarrow>=21.0.0'",
    "'botocore==1.35.88'",
    "'boto3==1.35.88'",
    "'aiobotocore==2.16.1'",
    "'ibm-cos-sdk-core==2.14.2'",
    "'ibm-cos-sdk==2.14.2'",
]

source_install_command = """RUN uv pip install --no-cache --no-deps git+https://github.com/opendatahub-io/llama-stack.git@{llama_stack_version}"""


def get_llama_stack_install(llama_stack_version):
    # If the version is a commit SHA or a short commit SHA, we need to install from source
    if is_install_from_source(llama_stack_version):
        print(f"Installing llama-stack from source: {llama_stack_version}")
        return source_install_command.format(
            llama_stack_version=llama_stack_version
        ).rstrip()


def is_install_from_source(llama_stack_version):
    """Check if version string is a git commit SHA (no dots = SHA, has dots = version) or a custom version (contains +rhai)."""
    return "." not in llama_stack_version or "+rhai" in llama_stack_version


def check_package_installed(package_name):
    """Check if llama binary is installed and accessible."""
    if not shutil.which(package_name):
        print(f"Error: {package_name} not found. Please install it first.")
        sys.exit(1)


def check_llama_stack_version():
    """Check if the llama-stack version in BASE_REQUIREMENTS matches the installed version."""
    try:
        result = subprocess.run(
            ["llama stack --version"],
            shell=True,
            capture_output=True,
            text=True,
            check=True,
        )
        installed_version = result.stdout.strip()

        # Extract version from BASE_REQUIREMENTS
        expected_version = None
        for req in BASE_REQUIREMENTS:
            if req.startswith("llama-stack=="):
                expected_version = req.split("==")[1]
                break

        if expected_version and installed_version != expected_version:
            print("Error: llama-stack version mismatch!")
            print(f"  Expected: {expected_version}")
            print(f"  Installed: {installed_version}")
            print(
                "  If you just bumped the llama-stack version in BASE_REQUIREMENTS, you must update the version from .pre-commit-config.yaml"
            )
            sys.exit(1)

    except subprocess.CalledProcessError as e:
        print(f"Warning: Could not check llama-stack version: {e}")
        print("Continuing without version validation...")


def install_llama_stack_from_source(llama_stack_version):
    """Install llama-stack from source using git."""
    print("installing llama-stack from source...")
    try:
        result = subprocess.run(
            f"uv pip install git+https://github.com/opendatahub-io/llama-stack.git@{llama_stack_version}",
            shell=True,
            check=True,
            capture_output=True,
            text=True,
        )
        # Print stdout if there's any output
        if result.stdout:
            print(result.stdout)
    except subprocess.CalledProcessError as e:
        print(f"Error installing llama-stack: {e}")
        if e.stdout:
            print(f"stdout: {e.stdout}")
        if e.stderr:
            print(f"stderr: {e.stderr}")
        sys.exit(1)


def get_dependencies():
    """Execute the llama stack build command and capture dependencies."""
    cmd = "llama stack list-deps distribution/build.yaml"
    try:
        result = subprocess.run(
            cmd, shell=True, capture_output=True, text=True, check=True
        )
        # Categorize and sort different types of pip install commands
        standard_deps = []
        torch_deps = []
        no_deps = []
        no_cache = []

        for line in result.stdout.splitlines():
            line = line.strip()
            if not line:  # Skip empty lines
                continue

            # Handle both "uv pip" format and direct package list format
            if line.startswith("uv pip"):
                # Legacy format: "uv pip install ..."
                line = line.replace("uv ", "RUN ", 1)
                parts = line.split(" ", 3)
                if len(parts) >= 4:  # We have packages to sort
                    cmd_parts = parts[:3]  # "RUN pip install"
                    packages_str = parts[3]
                else:
                    standard_deps.append(" ".join(parts))
                    continue
            else:
                # New format: just packages, possibly with flags
                cmd_parts = ["RUN", "uv", "pip", "install"]
                packages_str = line

            # Parse packages and flags from the line
            # Use shlex.split to properly handle quoted package names
            parts_list = shlex.split(packages_str)
            packages = []
            flags = []
            extra_index_url = None

            i = 0
            while i < len(parts_list):
                if parts_list[i] == "--extra-index-url" and i + 1 < len(parts_list):
                    extra_index_url = parts_list[i + 1]
                    flags.extend([parts_list[i], parts_list[i + 1]])
                    i += 2
                elif parts_list[i] == "--index-url" and i + 1 < len(parts_list):
                    flags.extend([parts_list[i], parts_list[i + 1]])
                    i += 2
                elif parts_list[i] in ["--no-deps", "--no-cache"]:
                    flags.append(parts_list[i])
                    i += 1
                else:
                    packages.append(parts_list[i])
                    i += 1

            # Sort and deduplicate packages
            packages = sorted(set(packages))

            # Add quotes to packages with > or < to prevent bash redirection
            packages = [
                f"'{package}'" if (">" in package or "<" in package) else package
                for package in packages
            ]

            # Modify pymilvus package to include milvus-lite extra
            packages = [
                package.replace("pymilvus", "pymilvus[milvus-lite]")
                if "pymilvus" in package and "[milvus-lite]" not in package
                else package
                for package in packages
            ]
            packages = sorted(set(packages))

            # Build the command based on flags
            if extra_index_url or "--index-url" in flags:
                # Torch dependencies with extra index URL
                full_cmd = " ".join(cmd_parts + flags + packages)
                torch_deps.append(full_cmd)
            elif "--no-deps" in flags:
                full_cmd = " ".join(cmd_parts + flags + packages)
                no_deps.append(full_cmd)
            elif "--no-cache" in flags:
                full_cmd = " ".join(cmd_parts + flags + packages)
                no_cache.append(full_cmd)
            else:
                # Standard dependencies with multi-line formatting
                formatted_packages = " \\\n    ".join(packages)
                full_cmd = f"{' '.join(cmd_parts)} \\\n    {formatted_packages}"
                standard_deps.append(full_cmd)

        # Combine all dependencies in specific order
        all_deps = []

        # Add pinned dependencies FIRST to ensure version compatibility
        if PINNED_DEPENDENCIES:
            pinned_packages = " \\\n    ".join(PINNED_DEPENDENCIES)
            pinned_cmd = f"RUN uv pip install --upgrade \\\n    {pinned_packages}"
            all_deps.append(pinned_cmd)

        all_deps.extend(sorted(standard_deps))  # Regular pip installs
        all_deps.extend(sorted(torch_deps))  # PyTorch specific installs
        all_deps.extend(sorted(no_deps))  # No-deps installs
        all_deps.extend(sorted(no_cache))  # No-cache installs

        result = "\n".join(all_deps)
        return result
    except subprocess.CalledProcessError as e:
        print(f"Error executing command: {e}")
        print(f"Command output: {e.output}")
        print(f"Command stderr: {e.stderr}")
        sys.exit(1)


def generate_containerfile(dependencies, llama_stack_install):
    """Generate Containerfile from template with dependencies."""
    template_path = Path("distribution/Containerfile.in")
    output_path = Path("distribution/Containerfile")

    if not template_path.exists():
        print(f"Error: Template file {template_path} not found")
        sys.exit(1)

    # Read template
    with open(template_path) as f:
        template_content = f.read()

    # Add warning message at the top
    warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: distribution/build.py\n\n"

    # Process template using string formatting
    containerfile_content = warning + template_content.format(
        dependencies=dependencies.rstrip(),
        llama_stack_install_source=llama_stack_install if llama_stack_install else "",
    )

    # Remove any blank lines that result from empty substitutions
    containerfile_content = (
        "\n".join(line for line in containerfile_content.splitlines() if line.strip())
        + "\n"
    )

    # Write output
    with open(output_path, "w") as f:
        f.write(containerfile_content)

    print(f"Successfully generated {output_path}")


def main():
    check_package_installed("uv")
    install_llama_stack_from_source(LLAMA_STACK_VERSION)

    print("Checking llama installation...")
    check_package_installed("llama")

    # Do not perform version check if installing from source
    if not is_install_from_source(LLAMA_STACK_VERSION):
        print("Checking llama-stack version...")
        check_llama_stack_version()

    print("Getting dependencies...")
    dependencies = get_dependencies()

    print("Getting llama-stack install...")
    llama_stack_install = get_llama_stack_install(LLAMA_STACK_VERSION)

    print("Generating Containerfile...")
    generate_containerfile(dependencies, llama_stack_install)

    print("Done!")


if __name__ == "__main__":
    main()