fix: use env to configure vLLM (#49)

mergify[bot] · web-flow · commit 4b2d68f9b65d · 2025-09-25T16:06:11.000Z
# What does this PR do? This PR allows running LLS without `vLLM` provider. It allows configuring `vLLM` url through env vars. Currently, the default config using `run.yaml` requires `vLLM` by default. Upon configuring other providers, vLLM is not needed. This behavior is not always correct, as using a different providers does _not_ requires a running vLLM instance. cc @leseb @derekhiggins ## Summary by CodeRabbit - New Features - Conditional activation of the VLLM inference provider and related models based on environment variables for opt-in usage. - Bug Fixes - Avoids unintended connections to a localhost inference endpoint by removing hardcoded default URLs. - Chores - Simplified configuration defaults for inference and evaluation endpoints (empty unless set), and a fallback model ID to ensure predictable startup. Approved-by: nathan-weinberg Approved-by: derekhiggins
diff --git a/distribution/run.yaml b/distribution/run.yaml
@@ -13,10 +13,10 @@ apis:
 - files
 providers:
   inference:
-  - provider_id: vllm-inference
+  - provider_id: ${env.VLLM_URL:+vllm-inference}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
@@ -107,7 +107,7 @@ providers:
     module: llama_stack_provider_lmeval==0.2.4
     config:
         use_k8s: ${env.TRUSTYAI_LMEVAL_USE_K8S:=true}
-        base_url: ${env.VLLM_URL:=http://localhost:8000/v1}
+        base_url: ${env.VLLM_URL:=}
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -175,8 +175,8 @@ inference_store:
   db_path: /opt/app-root/src/.llama/distributions/rh/inference_store.db
 models:
 - metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: vllm-inference
+  model_id: ${env.INFERENCE_MODEL:=dummy}
+  provider_id: ${env.VLLM_URL:+vllm-inference}
   model_type: llm
 - metadata:
     embedding_dimension: 768