trustyai-explainability · dmaniloff · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -4,23 +4,12 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
-
         {
-            "name": "Debug Ragas Distribution -- Remote",
+            "name": "Debug Ragas Distribution",
             "type": "debugpy",
             "request": "launch",
             "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-remote.yaml"],
-            "cwd": "${workspaceFolder}",
-            "envFile": "${workspaceFolder}/.env",
-            "justMyCode": false
-        },
-        {
-            "name": "Debug Ragas Distribution -- Inline",
-            "type": "debugpy",
-            "request": "launch",
-            "module": "llama_stack.cli.llama",
-            "args": ["stack", "run", "distribution/run-inline.yaml"],
+            "args": ["stack", "run", "distribution/run.yaml"],
             "cwd": "${workspaceFolder}",
             "envFile": "${workspaceFolder}/.env",
             "justMyCode": false

diff --git a/README.md b/README.md
@@ -14,8 +14,8 @@ This repository implements [Ragas](https://github.com/explodinggradients/ragas)
 The goal is to provide all of Ragas' evaluation functionality over Llama Stack's eval API, while leveraging the Llama Stack's built-in APIs for inference (llms and embeddings), datasets, and benchmarks.
 
 There are two versions of the provider:
-- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
-- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines.
+- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 ## Prerequisites
 - Python 3.12
@@ -41,12 +41,29 @@ There are two versions of the provider:
     ```
 - The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
- The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
+- The sample LS distributions (one for inline and one for remote provider) are simple LS distributions that use Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
- The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
+- The sample LS distributions (one for inline and one for remote provider) are simple LS distributions that use Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands.
 
-### Remote provider (default)
+### Inline provider (default with base installation)
+
+Create a `.env` file with the required environment variable:
+```bash
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
+```
+
+Run the server:
+```bash
+dotenv run uv run llama stack run distribution/run.yaml
+```
+
+### Remote provider (requires optional dependencies)
+
+First install the remote dependencies:
+```bash
+uv pip install -e ".[remote]"
+```
 
 Create a `.env` file with the following:
 ```bash
 # Required for both inline and remote
-EMBEDDING_MODEL=all-MiniLM-L6-v2
+EMBEDDING_MODEL=ollama/all-minilm:l6-v2
 
 # Required for remote provider
 KUBEFLOW_LLAMA_STACK_URL=<your-llama-stack-url>
@@ -75,22 +92,9 @@ Where:
 
 Run the server:
 ```bash
-dotenv run uv run llama stack run distribution/run-remote.yaml
-```
-
-### Inline provider (need to specify `.inline` in the module name)
-
-Create a `.env` file with the required environment variable:
-```bash
-EMBEDDING_MODEL=all-MiniLM-L6-v2
-```
-
-Run the server:
-```bash
-dotenv run uv run llama stack run distribution/run-inline.yaml
+dotenv run uv run llama stack run distribution/run.yaml
 ```
 
-You will notice that `run-inline.yaml` file has the module name as `llama_stack_provider_ragas.inline`, in order to specify the inline provider.
 
 ## Usage
 See the demos in the `demos` directory.
diff --git a/demos/remote_demo.ipynb → demos/basic_demo.ipynb b/demos/remote_demo.ipynb → demos/basic_demo.ipynb
diff --git a/demos/inline_demo.ipynb b/demos/inline_demo.ipynb
diff --git a/distribution/run-inline.yaml b/distribution/run-inline.yaml
diff --git a/distribution/run-remote.yaml → distribution/run.yaml b/distribution/run-remote.yaml → distribution/run.yaml
@@ -9,9 +9,9 @@ apis:
   - datasetio
 providers:
   eval:
-    - provider_id: trustyai_ragas
+    - provider_id: ${env.KUBEFLOW_LLAMA_STACK_URL:+trustyai_ragas_remote}
       provider_type: remote::trustyai_ragas
-      module: llama_stack_provider_ragas
+      module: llama_stack_provider_ragas.remote
       config:
         embedding_model: ${env.EMBEDDING_MODEL}
         kubeflow_config:
@@ -22,6 +22,11 @@ providers:
           llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL}
           base_image: ${env.KUBEFLOW_BASE_IMAGE}
           pipelines_api_token: ${env.KUBEFLOW_PIPELINES_TOKEN:=}
+    - provider_id: ${env.EMBEDDING_MODEL:+trustyai_ragas_inline}
+      provider_type: inline::trustyai_ragas
+      module: llama_stack_provider_ragas.inline
+      config:
+        embedding_model: ${env.EMBEDDING_MODEL}
   datasetio:
     - provider_id: localfs
       provider_type: inline::localfs

diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc
@@ -15,8 +15,8 @@ The goal is to provide all of Ragas' evaluation functionality over Llama Stack's
 
 There are two versions of the provider:
 
-* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. This is the *default* when using the module-based import.
-* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server.
+* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation.
+* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`.
 
 == Getting Started
 

diff --git a/docs/modules/ROOT/pages/inline-provider.adoc b/docs/modules/ROOT/pages/inline-provider.adoc
@@ -139,7 +139,7 @@ The inline provider is setup in the following lines of the `run-inline.yaml`:
 [,yaml]
 ----
 eval:
-  - provider_id: trustyai_ragas
+  - provider_id: trustyai_ragas_inline
     provider_type: inline::trustyai_ragas
     module: llama_stack_provider_ragas.inline
     config:

diff --git a/docs/modules/ROOT/pages/remote-provider.adoc b/docs/modules/ROOT/pages/remote-provider.adoc
@@ -201,9 +201,9 @@ The remote provider is setup in the following lines of the `run-remote.yaml`:
 [,yaml]
 ----
 eval:
-  - provider_id: trustyai_ragas
+  - provider_id: trustyai_ragas_remote
     provider_type: remote::trustyai_ragas
-    module: llama_stack_provider_ragas.remote # can also just be llama_stack_provider_ragas and it will default to remote
+    module: llama_stack_provider_ragas.remote
     config:
       embedding_model: ${env.EMBEDDING_MODEL}
       kubeflow_config:

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "llama-stack-provider-ragas"
-version = "0.4.0"
+version = "0.4.1"
 description = "Ragas evaluation as an out-of-tree Llama Stack provider"
 readme = "README.md"
 requires-python = ">=3.12"

diff --git a/src/llama_stack_provider_ragas/constants.py b/src/llama_stack_provider_ragas/constants.py
@@ -5,6 +5,10 @@
     faithfulness,
 )
 
+PROVIDER_TYPE = "trustyai_ragas"
+PROVIDER_ID_INLINE = "trustyai_ragas_inline"
+PROVIDER_ID_REMOTE = "trustyai_ragas_remote"
+
 METRIC_MAPPING = {
     metric_func.name: metric_func
     for metric_func in [
@@ -20,7 +24,6 @@
         # "rouge_score": RougeScore(),
     ]
 }
-
 AVAILABLE_METRICS = list(METRIC_MAPPING.keys())
 
 # Kubeflow ConfigMap keys and defaults for base image resolution

diff --git a/src/llama_stack_provider_ragas/inline/provider.py b/src/llama_stack_provider_ragas/inline/provider.py
@@ -1,10 +1,12 @@
 from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
 
+from ..constants import PROVIDER_TYPE
+
 
 def get_provider_spec() -> ProviderSpec:
     return InlineProviderSpec(
         api=Api.eval,
-        provider_type="inline::trustyai_ragas",
+        provider_type=f"inline::{PROVIDER_TYPE}",
         pip_packages=["ragas==0.3.0"],
         config_class="llama_stack_provider_ragas.config.RagasProviderInlineConfig",
         module="llama_stack_provider_ragas.inline",

diff --git a/src/llama_stack_provider_ragas/provider.py b/src/llama_stack_provider_ragas/provider.py
@@ -1,5 +1,37 @@
-# remote is the default provider
-from .remote import get_adapter_impl
-from .remote.provider import get_provider_spec
+import logging
 
-__all__ = ["get_provider_spec", "get_adapter_impl"]
+from .inline.provider import get_provider_spec as get_inline_provider_spec
+
+logger = logging.getLogger(__name__)
+
+
+def _has_remote_dependencies() -> bool:
+    """Check if remote dependencies are available."""
+    try:
+        import kfp  # noqa: F401
+        import kubernetes  # noqa: F401
+        import s3fs  # noqa: F401
+        from kfp import kubernetes as kfp_kubernetes  # noqa: F401
+
+        return True
+    except ImportError:
+        return False
+
+
+def get_provider_spec():
+    providers = [get_inline_provider_spec()]
+
+    if _has_remote_dependencies():
+        from .remote.provider import get_provider_spec as get_remote_provider_spec
+
+        providers.append(get_remote_provider_spec())
+    else:
+        logger.info(
+            "Remote provider dependencies not found, returning inline provider only. "
+            "Enable remote evaluation with 'pip install llama-stack-provider-ragas[remote]'."
+        )
+
+    return providers
+
+
+__all__ = ["get_provider_spec"]
diff --git a/src/llama_stack_provider_ragas/remote/provider.py b/src/llama_stack_provider_ragas/remote/provider.py
@@ -4,12 +4,14 @@
     RemoteProviderSpec,
 )
 
+from ..constants import PROVIDER_TYPE
+
 
 def get_provider_spec() -> ProviderSpec:
     return RemoteProviderSpec(
         api=Api.eval,
-        provider_type="remote::trustyai_ragas",
-        adapter_type="trustyai_ragas",
+        provider_type=f"remote::{PROVIDER_TYPE}",
+        adapter_type=PROVIDER_TYPE,
         module="llama_stack_provider_ragas.remote",
         pip_packages=[
             "ragas==0.3.0",

diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py
@@ -3,6 +3,8 @@
 import pytest
 from ragas.metrics import answer_relevancy
 
+from llama_stack_provider_ragas.constants import PROVIDER_ID_INLINE
+
 # mark as integration, see tool.pytest.ini_options in pyproject.toml
 pytestmark = pytest.mark.integration_test
 
@@ -34,7 +36,7 @@ def test_single_metric_evaluation(
         benchmark_id=benchmark_id,
         dataset_id=dataset_id,
         scoring_functions=[metric_to_test.name],
-        provider_id="trustyai_ragas",
+        provider_id=PROVIDER_ID_INLINE,
     )
 
     job = lls_client.eval.run_eval(

diff --git a/uv.lock b/uv.lock