diff --git a/distribution/Containerfile b/distribution/Containerfile index 35f12427d..70cb42891 100644 --- a/distribution/Containerfile +++ b/distribution/Containerfile @@ -7,6 +7,7 @@ WORKDIR /opt/app-root RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient RUN pip install \ aiosqlite \ + asyncpg \ autoevals \ boto3 \ chardet \ @@ -14,9 +15,6 @@ RUN pip install \ fastapi \ fire \ httpx \ - kubernetes \ - llama_stack_provider_lmeval==0.2.4 \ - llama_stack_provider_trustyai_fms==0.2.1 \ matplotlib \ mcp>=1.8.1 \ nltk \ @@ -39,6 +37,10 @@ RUN pip install \ tqdm \ transformers \ uvicorn +RUN pip install \ + llama_stack_provider_lmeval==0.2.4 +RUN pip install \ + llama_stack_provider_trustyai_fms==0.2.1 RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu torch torchao>=0.12.0 torchvision RUN pip install --no-deps sentence-transformers RUN pip install --no-cache llama-stack==0.2.21 diff --git a/distribution/build.yaml b/distribution/build.yaml index 8b34b0720..188db93bf 100644 --- a/distribution/build.yaml +++ b/distribution/build.yaml @@ -8,12 +8,15 @@ distribution_spec: - provider_type: inline::sentence-transformers vector_io: - provider_type: inline::milvus + - provider_type: remote::milvus safety: - provider_type: remote::trustyai_fms + module: llama_stack_provider_trustyai_fms==0.2.1 agents: - provider_type: inline::meta-reference eval: - provider_type: remote::trustyai_lmeval + module: llama_stack_provider_lmeval==0.2.4 datasetio: - provider_type: remote::huggingface - provider_type: inline::localfs @@ -28,10 +31,14 @@ distribution_spec: - provider_type: remote::tavily-search - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol + files: + - provider_type: inline::localfs container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027 additional_pip_packages: - aiosqlite - sqlalchemy[asyncio] +- asyncpg +- psycopg2-binary image_type: container image_name: llama-stack-rh external_providers_dir: distribution/providers.d diff --git a/distribution/run.yaml b/distribution/run.yaml index ddb7151a0..0d8eee44d 100644 --- a/distribution/run.yaml +++ b/distribution/run.yaml @@ -10,6 +10,7 @@ apis: - telemetry - tool_runtime - vector_io +- files providers: inference: - provider_id: vllm-inference @@ -44,6 +45,20 @@ providers: type: sqlite namespace: null db_path: /opt/app-root/src/.llama/distributions/rh/milvus_registry.db + - provider_id: ${env.MILVUS_ENDPOINT:+milvus-remote} + provider_type: remote::milvus + config: + uri: ${env.MILVUS_ENDPOINT:=} + token: ${env.MILVUS_TOKEN:=} + secure: ${env.MILVUS_SECURE:=} + consistency_level: ${env.MILVUS_CONSISTENCY_LEVEL:=} + ca_pem_path: ${env.MILVUS_CA_PEM_PATH:=} + client_pem_path: ${env.MILVUS_CLIENT_PEM_PATH:=} + client_key_path: ${env.MILVUS_CLIENT_KEY_PATH:=} + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh/milvus_remote_registry.db safety: - provider_id: trustyai_fms provider_type: remote::trustyai_fms @@ -66,7 +81,7 @@ providers: - provider_id: trustyai_lmeval provider_type: remote::trustyai_lmeval config: - use_k8s: True + use_k8s: ${env.TRUSTYAI_LMEVAL_USE_K8S:=true} base_url: ${env.VLLM_URL:=http://localhost:8000/v1} datasetio: - provider_id: huggingface diff --git a/tests/smoke.sh b/tests/smoke.sh index 971a84e59..3bac53332 100755 --- a/tests/smoke.sh +++ b/tests/smoke.sh @@ -12,7 +12,6 @@ function start_and_wait_for_llama_stack_container { --env INFERENCE_MODEL="$INFERENCE_MODEL" \ --env VLLM_URL="$VLLM_URL" \ --env TRUSTYAI_LMEVAL_USE_K8S=False \ - --env TRUSTYAI_LM_EVAL_NAMESPACE=dummy \ --name llama-stack \ "$IMAGE_NAME:$GITHUB_SHA" echo "Started Llama Stack container..."