trustyai-explainability · Oct 17, 2025 · Oct 10, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -4,23 +4,12 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
-
         {
-            "name": "Debug Ragas Distribution -- Remote",
+            "name": "Debug Ragas Distribution",
             "type": "debugpy",
             "request": "launch",
             "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-remote.yaml"],
-            "cwd": "${workspaceFolder}",
-            "envFile": "${workspaceFolder}/.env",
-            "justMyCode": false
-        },
-        {
-            "name": "Debug Ragas Distribution -- Inline",
-            "type": "debugpy",
-            "request": "launch",
-            "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-inline.yaml"],
+            "args": ["stack", "run", "distribution/run.yaml"],
             "cwd": "${workspaceFolder}",
             "envFile": "${workspaceFolder}/.env",
             "justMyCode": false

diff --git a/README.md b/README.md
@@ -14,8 +14,8 @@ This repository implements [Ragas](https://github.com/explodinggradients/ragas)
 The goal is to provide all of Ragas' evaluation functionality over Llama Stack's eval API, while leveraging the Llama Stack's built-in APIs for inference (llms and embeddings), datasets, and benchmarks.
 
 There are two versions of the provider:
-- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
-- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines.
+- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 ## Prerequisites
 - Python 3.12
@@ -41,12 +41,29 @@ There are two versions of the provider:
     ```
 - The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
 
-### Remote provider (default)
+### Inline provider (default with base installation)
+
+Create a `.env` file with the required environment variable:
+```bash
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
+```
+
+Run the server:
+```bash
+dotenv run uv run llama stack run distribution/run.yaml
+```
+
+### Remote provider (requires optional dependencies)
+
+First install the remote dependencies:
+```bash
+uv pip install -e ".[remote]"
+```
 
 Create a `.env` file with the following:
 ```bash
 # Required for both inline and remote
-EMBEDDING_MODEL=all-MiniLM-L6-v2
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
 
 # Required for remote provider
 KUBEFLOW_LLAMA_STACK_URL=<your-llama-stack-url>
@@ -73,22 +90,9 @@ Where:
 
 Run the server:
 ```bash
-dotenv run uv run llama stack run distribution/run-remote.yaml
-```
-
-### Inline provider (need to specify `.inline` in the module name)
-
-Create a `.env` file with the required environment variable:
-```bash
-EMBEDDING_MODEL=all-MiniLM-L6-v2
-```
-
-Run the server:
-```bash
-dotenv run uv run llama stack run distribution/run-inline.yaml
+dotenv run uv run llama stack run distribution/run.yaml
 ```
 
-You will notice that `run-inline.yaml` file has the module name as `llama_stack_provider_ragas.inline`, in order to specify the inline provider.
 
 ## Usage
 See the demos in the `demos` directory.
diff --git a/demos/remote_demo.ipynb → demos/basic_demo.ipynb b/demos/remote_demo.ipynb → demos/basic_demo.ipynb
diff --git a/demos/inline_demo.ipynb b/demos/inline_demo.ipynb
diff --git a/distribution/run-inline.yaml b/distribution/run-inline.yaml
diff --git a/distribution/run-remote.yaml → distribution/run.yaml b/distribution/run-remote.yaml → distribution/run.yaml
@@ -9,8 +9,8 @@ apis:
   - datasetio
 providers:
   eval:
-    - provider_id: trustyai_ragas
-      provider_type: remote::trustyai_ragas
+    - provider_id: trustyai_ragas_remote
+      provider_type: remote::trustyai_ragas_remote
       module: llama_stack_provider_ragas
       config:
         embedding_model: ${env.EMBEDDING_MODEL}
@@ -20,7 +20,12 @@ providers:
           pipelines_endpoint: ${env.KUBEFLOW_PIPELINES_ENDPOINT}
           namespace: ${env.KUBEFLOW_NAMESPACE}
           llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL}
-          base_image: ${env.KUBEFLOW_BASE_IMAGE}
+          base_image: ${env.KUBEFLOW_BASE_IMAGE:=}
+    - provider_id: trustyai_ragas_inline
+      provider_type: inline::trustyai_ragas_inline
+      module: llama_stack_provider_ragas.inline
+      config:
+        embedding_model: ${env.EMBEDDING_MODEL}
   datasetio:
     - provider_id: localfs
       provider_type: inline::localfs

diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc
@@ -15,8 +15,8 @@ The goal is to provide all of Ragas' evaluation functionality over Llama Stack's
 
 There are two versions of the provider:
 
-* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. This is the *default* when using the module-based import.
-* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
+* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 == Getting Started
 

diff --git a/docs/modules/ROOT/pages/inline-provider.adoc b/docs/modules/ROOT/pages/inline-provider.adoc
@@ -139,8 +139,8 @@ The inline provider is setup in the following lines of the `run-inline.yaml`:
 [,yaml]
 ----
 eval:
-  - provider_id: trustyai_ragas
-    provider_type: inline::trustyai_ragas
+  - provider_id: trustyai_ragas_inline
+    provider_type: inline::trustyai_ragas_inline
     module: llama_stack_provider_ragas.inline
     config:
       embedding_model: ${env.EMBEDDING_MODEL}

diff --git a/docs/modules/ROOT/pages/remote-provider.adoc b/docs/modules/ROOT/pages/remote-provider.adoc
@@ -195,9 +195,9 @@ The remote provider is setup in the following lines of the `run-remote.yaml`:
 [,yaml]
 ----
 eval:
-  - provider_id: trustyai_ragas
-    provider_type: remote::trustyai_ragas
-    module: llama_stack_provider_ragas.remote # can also just be llama_stack_provider_ragas and it will default to remote
+  - provider_id: trustyai_ragas_remote
+    provider_type: remote::trustyai_ragas_remote
+    module: llama_stack_provider_ragas.remote
     config:
       embedding_model: ${env.EMBEDDING_MODEL}
       kubeflow_config:

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,8 @@ authors = [
 keywords = ["llama-stack", "ragas", "evaluation"]
 dependencies = [
     "setuptools-scm",
-    "llama-stack==0.2.23",
+    "llama-stack @ git+https://github.com/llamastack/llama-stack.git",
+    "llama-stack-client @ git+https://github.com/llamastack/llama-stack-client-python.git",
     "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found
     "ragas==0.3.0",
     "pandas==2.3.0",
@@ -84,8 +85,10 @@ ignore = [
     "C901",  # too complex
 ]
 
+[tool.hatch.metadata]
+allow-direct-references = true
+
 [tool.mypy]
-python_version = "3.12"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = false

diff --git a/src/llama_stack_provider_ragas/constants.py b/src/llama_stack_provider_ragas/constants.py
@@ -5,6 +5,9 @@
     faithfulness,
 )
 
+PROVIDER_ID_INLINE = "trustyai_ragas_inline"
+PROVIDER_ID_REMOTE = "trustyai_ragas_remote"
+
 METRIC_MAPPING = {
     metric_func.name: metric_func
     for metric_func in [
@@ -20,7 +23,6 @@
         # "rouge_score": RougeScore(),
     ]
 }
-
 AVAILABLE_METRICS = list(METRIC_MAPPING.keys())
 
 # Kubeflow ConfigMap keys and defaults for base image resolution

diff --git a/src/llama_stack_provider_ragas/inline/provider.py b/src/llama_stack_provider_ragas/inline/provider.py
@@ -1,10 +1,12 @@
 from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
 
+from ..constants import PROVIDER_ID_INLINE
+
 
 def get_provider_spec() -> ProviderSpec:
     return InlineProviderSpec(
         api=Api.eval,
-        provider_type="inline::trustyai_ragas",
+        provider_type=f"inline::{PROVIDER_ID_INLINE}",
         pip_packages=["ragas==0.3.0"],
         config_class="llama_stack_provider_ragas.config.RagasProviderInlineConfig",
         module="llama_stack_provider_ragas.inline",