From a1cb76902860cf4047927fd56dbec2303ee1ed93 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Fri, 10 Oct 2025 11:59:44 -0400 Subject: [PATCH 01/22] upgrade to llama-stack 0.3.0. - Changed llama-stack and llama-stack-client dependencies to point to GitHub repositories. - Unified demo notebooks into one, and run.yaml into one as well. - Refactored provider specifications to return a list. - Updated calls to embeddings and completions. --- .vscode/launch.json | 15 +- demos/{remote_demo.ipynb => basic_demo.ipynb} | 614 ++++++------ demos/inline_demo.ipynb | 880 ------------------ distribution/run-inline.yaml | 61 -- distribution/{run-remote.yaml => run.yaml} | 11 +- pyproject.toml | 7 +- .../inline/provider.py | 2 +- .../inline/wrappers_inline.py | 77 +- src/llama_stack_provider_ragas/provider.py | 15 +- .../remote/provider.py | 4 +- tests/test_inline_evaluation.py | 2 +- uv.lock | 106 +-- 12 files changed, 363 insertions(+), 1431 deletions(-) rename demos/{remote_demo.ipynb => basic_demo.ipynb} (60%) delete mode 100644 demos/inline_demo.ipynb delete mode 100644 distribution/run-inline.yaml rename distribution/{run-remote.yaml => run.yaml} (86%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 99013fd3..018ef524 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,23 +4,12 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ - { - "name": "Debug Ragas Distribution -- Remote", + "name": "Debug Ragas Distribution", "type": "debugpy", "request": "launch", "module": "llama_stack.cli.llama", - "args": ["stack", "run", "distribution/run-remote.yaml"], - "cwd": "${workspaceFolder}", - "envFile": "${workspaceFolder}/.env", - "justMyCode": false - }, - { - "name": "Debug Ragas Distribution -- Inline", - "type": "debugpy", - "request": "launch", - "module": "llama_stack.cli.llama", - "args": ["stack", "run", "distribution/run-inline.yaml"], + "args": ["stack", "run", "distribution/run.yaml"], "cwd": "${workspaceFolder}", "envFile": "${workspaceFolder}/.env", "justMyCode": false diff --git a/demos/remote_demo.ipynb b/demos/basic_demo.ipynb similarity index 60% rename from demos/remote_demo.ipynb rename to demos/basic_demo.ipynb index b88d67cf..4192b65d 100644 --- a/demos/remote_demo.ipynb +++ b/demos/basic_demo.ipynb @@ -18,7 +18,7 @@ "Nuke any old distro config files you might have lying around (I find these get in the way whenever I change my `.env` variables): \n", "```bash\n", "ls ~/.llama/distributions/\n", - "rm -r ~/.llama/distributions/trustyai_ragas_distro\n", + "rm -r ~/.llama/distributions/\n", "```\n", "\n", "Then, run your llama stack server with:\n", @@ -71,14 +71,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" ] } ], "source": [ - "# You will need ngrok to enable remote access to your Llama Stack server\n", + "# If usingf the remote provider, you will need ngrok to enable remote access to your Llama Stack server\n", + "# Otherwise, the base_url is just http://localhost:8321\n", "client = LlamaStackClient(base_url=os.getenv(\"KUBEFLOW_LLAMA_STACK_URL\"))\n", - "assert client.models.list()" + "available_models = client.models.list()\n", + "assert any(model.model_type == \"llm\" for model in available_models)\n", + "assert any(model.model_type == \"embedding\" for model in available_models)\n" ] }, { @@ -143,20 +146,42 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/datasets \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: DELETE https://f19083a01dc3.ngrok-free.app/v1beta/datasets/ragas_demo_dataset \"HTTP/1.1 404 Not Found\"\n" + ] + } + ], + "source": [ + "# De-register the dataset if it already exists\n", + "dataset_id = \"ragas_demo_dataset\"\n", + "try:\n", + " client.datasets.unregister(dataset_id)\n", + "except Exception:\n", + " pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1beta/datasets \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
DatasetRegisterResponse(\n",
-       "identifier='ragas_demo_dataset_remote',\n",
+       "identifier='ragas_demo_dataset',\n",
        "metadata={\n",
        "│   │   'provider_id': 'localfs',\n",
        "│   │   'description': 'Sample RAG evaluation dataset for Ragas demo',\n",
        "│   │   'size': 3.0,\n",
        "│   │   'format': 'ragas',\n",
-       "│   │   'created_at': '2025-09-25T19:26:49.309288'\n",
+       "│   │   'created_at': '2025-10-10T11:55:31.946291'\n",
        "},\n",
        "provider_id='localfs',\n",
        "purpose='eval/question-answer',\n",
@@ -188,20 +213,20 @@
        "│   │   type='rows'\n",
        "),\n",
        "type='dataset',\n",
-       "provider_resource_id='ragas_demo_dataset_remote',\n",
+       "provider_resource_id='ragas_demo_dataset',\n",
        "owner=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mDatasetRegisterResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-09-25T19:26:49.309288'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-10T11:55:31.946291'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", @@ -233,7 +258,7 @@ "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'rows'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'dataset'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mowner\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -243,9 +268,6 @@ } ], "source": [ - "# Register the dataset\n", - "dataset_id = \"ragas_demo_dataset_remote\"\n", - "\n", "dataset_response = client.datasets.register(\n", " dataset_id=dataset_id,\n", " purpose=\"eval/question-answer\", # RAG evaluation purpose\n", @@ -272,14 +294,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -297,40 +320,45 @@ } ], "source": [ - "benchmark_id = \"ragas_demo_benchmark_remote\"\n", + "# comment out the provider you don't want to run\n", + "benchmarks_providers = [\n", + " (\"ragas_demo_benchmark__inline\", \"trustyai_ragas_inline\"),\n", + " (\"ragas_demo_benchmark__remote\", \"trustyai_ragas_remote\"),\n", + "]\n", "\n", - "benchmark_response = client.benchmarks.register(\n", - " benchmark_id=benchmark_id,\n", - " dataset_id=dataset_id,\n", - " scoring_functions=[\n", - " \"answer_relevancy\", # How relevant is the answer to the question?\n", - " # \"context_precision\", # How precise are the retrieved contexts?\n", - " # \"faithfulness\", # How faithful is the answer to the contexts?\n", - " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", - " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", - " ],\n", - " provider_id=\"trustyai_ragas\",\n", - " # metadata={\n", - " # \"provider\": \"ragas\",\n", - " # \"version\": \"1.0\",\n", - " # \"metrics_count\": len(ragas_metrics),\n", - " # \"created_at\": datetime.now().isoformat()\n", - " # }\n", - ")\n", + "for benchmark_id, provider_id in benchmarks_providers:\n", + " benchmark_response = client.benchmarks.register(\n", + " benchmark_id=benchmark_id,\n", + " dataset_id=dataset_id,\n", + " scoring_functions=[\n", + " \"answer_relevancy\", # How relevant is the answer to the question?\n", + " # \"context_precision\", # How precise are the retrieved contexts?\n", + " # \"faithfulness\", # How faithful is the answer to the contexts?\n", + " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", + " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", + " ],\n", + " provider_id=provider_id,\n", + " # metadata={\n", + " # \"provider\": \"ragas\",\n", + " # \"version\": \"1.0\",\n", + " # \"metrics_count\": len(ragas_metrics),\n", + " # \"created_at\": datetime.now().isoformat()\n", + " # }\n", + " )\n", "\n", "pprint(benchmark_response)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -338,13 +366,22 @@ "text/html": [ "
[\n",
        "Benchmark(\n",
-       "│   │   dataset_id='ragas_demo_dataset_remote',\n",
-       "│   │   identifier='ragas_demo_benchmark_remote',\n",
+       "│   │   dataset_id='ragas_demo_dataset',\n",
+       "│   │   identifier='ragas_demo_benchmark__inline',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='ragas_demo_benchmark__inline'\n",
+       "),\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='ragas_demo_dataset',\n",
+       "│   │   identifier='ragas_demo_benchmark__remote',\n",
        "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas',\n",
+       "│   │   provider_id='trustyai_ragas_remote',\n",
        "│   │   scoring_functions=['answer_relevancy'],\n",
        "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='ragas_demo_benchmark_remote'\n",
+       "│   │   provider_resource_id='ragas_demo_benchmark__remote'\n",
        ")\n",
        "]\n",
        "
\n" @@ -352,13 +389,22 @@ "text/plain": [ "\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark__inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark__inline'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_remote'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark__remote'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[1m]\u001b[0m\n" ] @@ -369,7 +415,7 @@ ], "source": [ "benchmarks = client.benchmarks.list()\n", - "pprint(benchmarks[-1:])" + "pprint(benchmarks)" ] }, { @@ -383,21 +429,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
+       "job_id='3345bb0f-23e7-46f8-81ca-3f9469de3928',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -415,13 +461,13 @@
        "│   │   │   'scoring_params': {},\n",
        "│   │   │   'num_examples': None\n",
        "│   │   },\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
        "│   │   'benchmark': {\n",
-       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_id': 'trustyai_ragas',\n",
+       "│   │   │   'identifier': 'ragas_demo_benchmark__remote',\n",
+       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark__remote',\n",
+       "│   │   │   'provider_id': 'trustyai_ragas_remote',\n",
        "│   │   │   'type': 'benchmark',\n",
-       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
+       "│   │   │   'dataset_id': 'ragas_demo_dataset',\n",
        "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
        "│   │   │   'metadata': {}\n",
        "│   │   },\n",
@@ -437,18 +483,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
-       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
+       "│   │   │   'llama_stack_url': 'https://f19083a01dc3.ngrok-free.app',\n",
+       "│   │   │   'base_image': None\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
+       "kubeflow_run_id='d1d10645-c339-4276-962c-bb006fee15e7',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'3345bb0f-23e7-46f8-81ca-3f9469de3928'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -466,13 +512,13 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", @@ -488,11 +534,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://f19083a01dc3.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'd1d10645-c339-4276-962c-bb006fee15e7'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -506,8 +552,82 @@ "# since we can't set the embedding model in the benchmark config,\n", "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n", "\n", - "job = client.eval.run_eval(\n", - " benchmark_id=benchmark_id,\n", + "remote_job = client.alpha.eval.run_eval(\n", + " benchmark_id=\"ragas_demo_benchmark__remote\",\n", + " benchmark_config={\n", + " \"eval_candidate\": {\n", + " \"type\": \"model\",\n", + " \"model\": \"ollama/granite3.3:2b\",\n", + " \"sampling_params\": {\"temperature\": 0.1, \"max_tokens\": 100},\n", + " },\n", + " \"scoring_params\": {},\n", + " # \"num_examples\": 1,\n", + " },\n", + ")\n", + "pprint(remote_job)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
Job(\n",
+       "job_id='0',\n",
+       "status='in_progress',\n",
+       "result=None,\n",
+       "eval_config={\n",
+       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
+       "│   │   'ragas_config': {\n",
+       "│   │   │   'batch_size': None,\n",
+       "│   │   │   'show_progress': True,\n",
+       "│   │   │   'raise_exceptions': True,\n",
+       "│   │   │   'experiment_name': None,\n",
+       "│   │   │   'column_map': None\n",
+       "│   │   }\n",
+       "}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'0'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Review settings in distributinon/run.yaml, eg., note that\n", + "# since we can't set the embedding model in the benchmark config,\n", + "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n", + "\n", + "inline_job = client.alpha.eval.run_eval(\n", + " benchmark_id=\"ragas_demo_benchmark__inline\",\n", " benchmark_config={\n", " \"eval_candidate\": {\n", " \"type\": \"model\",\n", @@ -518,7 +638,7 @@ " # \"num_examples\": 1,\n", " },\n", ")\n", - "pprint(job)" + "pprint(inline_job)" ] }, { @@ -530,117 +650,51 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0 \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
+       "job_id='0',\n",
        "status='in_progress',\n",
-       "runtime_config={\n",
-       "│   │   'benchmark_config': {\n",
-       "│   │   │   'eval_candidate': {\n",
-       "│   │   │   │   'type': 'model',\n",
-       "│   │   │   │   'model': 'ollama/granite3.3:2b',\n",
-       "│   │   │   │   'sampling_params': {\n",
-       "│   │   │   │   │   'strategy': {'type': 'greedy'},\n",
-       "│   │   │   │   │   'max_tokens': 100,\n",
-       "│   │   │   │   │   'repetition_penalty': 1.0,\n",
-       "│   │   │   │   │   'stop': None\n",
-       "│   │   │   │   },\n",
-       "│   │   │   │   'system_message': None\n",
-       "│   │   │   },\n",
-       "│   │   │   'scoring_params': {},\n",
-       "│   │   │   'num_examples': None\n",
-       "│   │   },\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
-       "│   │   'benchmark': {\n",
-       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_id': 'trustyai_ragas',\n",
-       "│   │   │   'type': 'benchmark',\n",
-       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
-       "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
-       "│   │   │   'metadata': {}\n",
-       "│   │   },\n",
+       "result=None,\n",
+       "eval_config={\n",
+       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
        "│   │   'ragas_config': {\n",
        "│   │   │   'batch_size': None,\n",
        "│   │   │   'show_progress': True,\n",
        "│   │   │   'raise_exceptions': True,\n",
        "│   │   │   'experiment_name': None,\n",
        "│   │   │   'column_map': None\n",
-       "│   │   },\n",
-       "│   │   'kubeflow_config': {\n",
-       "│   │   │   'results_s3_prefix': 's3://public-rhods/ragas-evaluation-pipeline/test-two',\n",
-       "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
-       "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
-       "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
-       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
-       "},\n",
-       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
-       "result=None\n",
+       "}\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'0'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'eval_candidate'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'model'\u001b[0m: \u001b[32m'ollama/granite3.3:2b'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'sampling_params'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'strategy'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'greedy'\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'max_tokens'\u001b[0m: \u001b[1;36m100\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'repetition_penalty'\u001b[0m: \u001b[1;36m1.0\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'stop'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'system_message'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'kubeflow_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'results_s3_prefix'\u001b[0m: \u001b[32m's3://public-rhods/ragas-evaluation-pipeline/test-two'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -649,8 +703,12 @@ } ], "source": [ - "job = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id)\n", - "pprint(job)" + "# wait a bit for the job to complete\n", + "pprint(\n", + " client.alpha.eval.jobs.status(\n", + " benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n", + " )\n", + ")" ] }, { @@ -662,15 +720,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/3345bb0f-23e7-46f8-81ca-3f9469de3928 \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
-       "status='completed',\n",
+       "job_id='3345bb0f-23e7-46f8-81ca-3f9469de3928',\n",
+       "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
        "│   │   │   'eval_candidate': {\n",
@@ -687,13 +745,13 @@
        "│   │   │   'scoring_params': {},\n",
        "│   │   │   'num_examples': None\n",
        "│   │   },\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
        "│   │   'benchmark': {\n",
-       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
-       "│   │   │   'provider_id': 'trustyai_ragas',\n",
+       "│   │   │   'identifier': 'ragas_demo_benchmark__remote',\n",
+       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark__remote',\n",
+       "│   │   │   'provider_id': 'trustyai_ragas_remote',\n",
        "│   │   │   'type': 'benchmark',\n",
-       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
+       "│   │   │   'dataset_id': 'ragas_demo_dataset',\n",
        "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
        "│   │   │   'metadata': {}\n",
        "│   │   },\n",
@@ -709,59 +767,19 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
-       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
+       "│   │   │   'llama_stack_url': 'https://f19083a01dc3.ngrok-free.app',\n",
+       "│   │   │   'base_image': None\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
-       "result={\n",
-       "│   │   'generations': [\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   │   'reference': 'Paris'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   │   }\n",
-       "│   │   ],\n",
-       "│   │   'scores': {\n",
-       "│   │   │   'answer_relevancy': {\n",
-       "│   │   │   │   'score_rows': [\n",
-       "│   │   │   │   │   {'score': 0.9567410688},\n",
-       "│   │   │   │   │   {'score': 0.9262221944000001},\n",
-       "│   │   │   │   │   {'score': 0.8774825363000001}\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'aggregated_results': {\n",
-       "│   │   │   │   │   'average': 0.9201485998333334,\n",
-       "│   │   │   │   │   'count': 3,\n",
-       "│   │   │   │   │   'min': 0.8774825363000001,\n",
-       "│   │   │   │   │   'max': 0.9567410688\n",
-       "│   │   │   │   }\n",
-       "│   │   │   }\n",
-       "│   │   }\n",
-       "}\n",
+       "kubeflow_run_id='d1d10645-c339-4276-962c-bb006fee15e7',\n",
+       "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'completed'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'3345bb0f-23e7-46f8-81ca-3f9469de3928'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'eval_candidate'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -778,13 +796,13 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", @@ -800,52 +818,12 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://f19083a01dc3.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'generations'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'scores'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'score_rows'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944000001\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'aggregated_results'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9201485998333334\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'd1d10645-c339-4276-962c-bb006fee15e7'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -855,7 +833,11 @@ ], "source": [ "# wait a bit for the job to complete\n", - "pprint(client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id))" + "pprint(\n", + " client.alpha.eval.jobs.status(\n", + " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", + " )\n", + ")" ] }, { @@ -867,89 +849,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/3345bb0f-23e7-46f8-81ca-3f9469de3928/result \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ - "
EvaluateResponse(\n",
-       "generations=[\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   'reference': 'Paris'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   }\n",
-       "],\n",
-       "scores={\n",
-       "│   │   'answer_relevancy': ScoringResult(\n",
-       "│   │   │   aggregated_results={\n",
-       "│   │   │   │   'average': 0.9201485998333334,\n",
-       "│   │   │   │   'count': 3.0,\n",
-       "│   │   │   │   'min': 0.8774825363000001,\n",
-       "│   │   │   │   'max': 0.9567410688\n",
-       "│   │   │   },\n",
-       "│   │   │   score_rows=[{'score': 0.9567410688}, {'score': 0.9262221944000001}, {'score': 0.8774825363000001}]\n",
-       "│   │   )\n",
-       "}\n",
-       ")\n",
+       "
EvaluateResponse(generations=[], scores={})\n",
        "
\n" ], "text/plain": [ - "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9201485998333334\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944000001\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" + "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\u001b[1m)\u001b[0m\n" ] }, "metadata": {}, @@ -957,9 +867,51 @@ } ], "source": [ - "results = client.eval.jobs.retrieve(benchmark_id=benchmark_id, job_id=job.job_id)\n", - "pprint(results)" + "remote_results = client.alpha.eval.jobs.retrieve(\n", + " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", + ")\n", + "pprint(remote_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0/result \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
EvaluateResponse(generations=[], scores={})\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "inline_results = client.alpha.eval.jobs.retrieve(\n", + " benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n", + ")\n", + "pprint(inline_results)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -978,7 +930,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.11" + "version": "3.13.7" } }, "nbformat": 4, diff --git a/demos/inline_demo.ipynb b/demos/inline_demo.ipynb deleted file mode 100644 index e24ba331..00000000 --- a/demos/inline_demo.ipynb +++ /dev/null @@ -1,880 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Ragas Evaluation with Llama Stack - Demo [inline execution]\n", - "\n", - "This notebook demonstrates how to use the Ragas out-of-tree provider.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Setup and Imports\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Install dev packages if not already installed\n", - "# !uv pip install -e \".[dev]\"\n", - "\n", - "from datetime import datetime\n", - "\n", - "from llama_stack_client import LlamaStackClient\n", - "from rich.pretty import pprint" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Llama Stack Client Setup\n", - "\n", - "- Make sure we have an inference model (model_type='llm')\n", - "- Make sure we have an embedding model (model_type='embedding')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/models \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
[\n",
-       "Model(\n",
-       "│   │   identifier='all-MiniLM-L6-v2',\n",
-       "│   │   metadata={'embedding_dimension': 384.0},\n",
-       "│   │   api_model_type='embedding',\n",
-       "│   │   provider_id='ollama',\n",
-       "│   │   type='model',\n",
-       "│   │   provider_resource_id='all-minilm:latest',\n",
-       "│   │   model_type='embedding'\n",
-       "),\n",
-       "Model(\n",
-       "│   │   identifier='ollama/granite3.3:2b',\n",
-       "│   │   metadata={},\n",
-       "│   │   api_model_type='llm',\n",
-       "│   │   provider_id='ollama',\n",
-       "│   │   type='model',\n",
-       "│   │   provider_resource_id='granite3.3:2b',\n",
-       "│   │   model_type='llm'\n",
-       "),\n",
-       "Model(\n",
-       "│   │   identifier='ollama/all-minilm:l6-v2',\n",
-       "│   │   metadata={'embedding_dimension': 384.0, 'context_length': 512.0},\n",
-       "│   │   api_model_type='embedding',\n",
-       "│   │   provider_id='ollama',\n",
-       "│   │   type='model',\n",
-       "│   │   provider_resource_id='all-minilm:l6-v2',\n",
-       "│   │   model_type='embedding'\n",
-       "),\n",
-       "Model(\n",
-       "│   │   identifier='all-minilm',\n",
-       "│   │   metadata={'embedding_dimension': 384.0, 'context_length': 512.0},\n",
-       "│   │   api_model_type='embedding',\n",
-       "│   │   provider_id='ollama',\n",
-       "│   │   type='model',\n",
-       "│   │   provider_resource_id='all-minilm:l6-v2',\n",
-       "│   │   model_type='embedding'\n",
-       "),\n",
-       "Model(\n",
-       "│   │   identifier='nomic-embed-text',\n",
-       "│   │   metadata={'embedding_dimension': 768.0, 'context_length': 8192.0},\n",
-       "│   │   api_model_type='embedding',\n",
-       "│   │   provider_id='ollama',\n",
-       "│   │   type='model',\n",
-       "│   │   provider_resource_id='nomic-embed-text:latest',\n",
-       "│   │   model_type='embedding'\n",
-       ")\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:latest'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ollama/granite3.3:2b'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'llm'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'granite3.3:2b'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'llm'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m512.0\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:l6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'all-minilm'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m512.0\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:l6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'nomic-embed-text'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m768.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m8192.0\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'nomic-embed-text:latest'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "client = LlamaStackClient(base_url=\"http://localhost:8321\")\n", - "\n", - "models = client.models.list()\n", - "pprint(models)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Dataset Preparation\n", - "\n", - "Create a sample RAG evaluation dataset. In a real scenario, you would load your own dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Sample Ragas evaluation dataset\n", - "evaluation_data = [\n", - " {\n", - " \"user_input\": \"What is the capital of France?\",\n", - " \"response\": \"The capital of France is Paris.\",\n", - " \"retrieved_contexts\": [\n", - " \"Paris is the capital and most populous city of France.\"\n", - " ],\n", - " \"reference\": \"Paris\",\n", - " },\n", - " {\n", - " \"user_input\": \"Who invented the telephone?\",\n", - " \"response\": \"Alexander Graham Bell invented the telephone in 1876.\",\n", - " \"retrieved_contexts\": [\n", - " \"Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.\"\n", - " ],\n", - " \"reference\": \"Alexander Graham Bell\",\n", - " },\n", - " {\n", - " \"user_input\": \"What is photosynthesis?\",\n", - " \"response\": \"Photosynthesis is the process by which plants convert sunlight into energy.\",\n", - " \"retrieved_contexts\": [\n", - " \"Photosynthesis is a process used by plants to convert light energy into chemical energy.\"\n", - " ],\n", - " \"reference\": \"Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.\",\n", - " },\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Dataset Registration\n", - "\n", - "Register the dataset with Llama Stack's Datasets API using the direct rows approach.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/datasets \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
DatasetRegisterResponse(\n",
-       "identifier='ragas_demo_dataset_inline',\n",
-       "metadata={\n",
-       "│   │   'provider_id': 'localfs',\n",
-       "│   │   'description': 'Sample RAG evaluation dataset for Ragas demo',\n",
-       "│   │   'size': 3.0,\n",
-       "│   │   'format': 'ragas',\n",
-       "│   │   'created_at': '2025-09-25T19:25:53.739691'\n",
-       "},\n",
-       "provider_id='localfs',\n",
-       "purpose='eval/question-answer',\n",
-       "source=SourceRowsDataSource(\n",
-       "│   │   rows=[\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   │   'reference': 'Paris'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   │   }\n",
-       "│   │   ],\n",
-       "│   │   type='rows'\n",
-       "),\n",
-       "type='dataset',\n",
-       "provider_resource_id='ragas_demo_dataset_inline',\n",
-       "owner=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mDatasetRegisterResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'localfs'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-09-25T19:25:53.739691'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33msource\u001b[0m=\u001b[1;35mSourceRowsDataSource\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mrows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'rows'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'dataset'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mowner\u001b[0m=\u001b[3;35mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Register the dataset\n", - "dataset_id = \"ragas_demo_dataset_inline\"\n", - "\n", - "dataset_response = client.datasets.register(\n", - " dataset_id=dataset_id,\n", - " purpose=\"eval/question-answer\", # RAG evaluation purpose\n", - " source={\"type\": \"rows\", \"rows\": evaluation_data},\n", - " metadata={\n", - " \"provider_id\": \"localfs\", # seems there's a bug in datasets\n", - " \"description\": \"Sample RAG evaluation dataset for Ragas demo\",\n", - " \"size\": len(evaluation_data),\n", - " \"format\": \"ragas\",\n", - " \"created_at\": datetime.now().isoformat(),\n", - " },\n", - ")\n", - "pprint(dataset_response)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. Benchmark Registration\n", - "\n", - "Register a benchmark that defines what metrics to use for evaluation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
None\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3;35mNone\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "benchmark_id = f\"ragas_demo_benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", - "\n", - "ragas_metrics = [\n", - " \"answer_relevancy\", # How relevant is the answer to the question?\n", - " # \"context_precision\", # How precise are the retrieved contexts?\n", - " # \"faithfulness\", # How faithful is the answer to the contexts?\n", - " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", - " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", - "]\n", - "\n", - "benchmark_response = client.benchmarks.register(\n", - " benchmark_id=benchmark_id,\n", - " dataset_id=dataset_id,\n", - " scoring_functions=ragas_metrics,\n", - " provider_id=\"trustyai_ragas\",\n", - " # metadata={\n", - " # \"provider\": \"ragas\",\n", - " # \"version\": \"1.0\",\n", - " # \"metrics_count\": len(ragas_metrics),\n", - " # \"created_at\": datetime.now().isoformat()\n", - " # }\n", - ")\n", - "\n", - "pprint(benchmark_response)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
[\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='ragas_demo_dataset_inline',\n",
-       "│   │   identifier='ragas_demo_benchmark_20250925_192402',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='ragas_demo_benchmark_20250925_192402'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='ragas_demo_dataset_inline',\n",
-       "│   │   identifier='ragas_demo_benchmark_20250925_192553',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='ragas_demo_benchmark_20250925_192553'\n",
-       ")\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192402'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192402'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192553'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192553'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "benchmarks = client.benchmarks.list()\n", - "pprint(benchmarks[-2:])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 7. Evaluation Execution\n", - "\n", - "Run the evaluation using our Ragas out-of-tree provider.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
Job(\n",
-       "job_id='1',\n",
-       "status='in_progress',\n",
-       "result=None,\n",
-       "eval_config={\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
-       "│   │   'ragas_config': {\n",
-       "│   │   │   'batch_size': None,\n",
-       "│   │   │   'show_progress': True,\n",
-       "│   │   │   'raise_exceptions': True,\n",
-       "│   │   │   'experiment_name': None,\n",
-       "│   │   │   'column_map': None\n",
-       "│   │   }\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# since we can't set the embedding model in the benchmark config,\n", - "# the embedding model is set in the distribution run.yaml file (all-MiniLM-L6-v2)\n", - "\n", - "job = client.eval.run_eval(\n", - " benchmark_id=benchmark_id,\n", - " benchmark_config={\n", - " \"eval_candidate\": {\n", - " \"type\": \"model\",\n", - " # \"model\": \"meta-llama/Llama-3.2-3B-Instruct\",\n", - " \"model\": \"ollama/granite3.3:2b\",\n", - " \"sampling_params\": {\"temperature\": 0.1, \"max_tokens\": 100},\n", - " },\n", - " \"scoring_params\": {},\n", - " },\n", - ")\n", - "pprint(job)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 8. Results Display\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1 \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
Job(\n",
-       "job_id='1',\n",
-       "status='in_progress',\n",
-       "result=None,\n",
-       "eval_config={\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
-       "│   │   'ragas_config': {\n",
-       "│   │   │   'batch_size': None,\n",
-       "│   │   │   'show_progress': True,\n",
-       "│   │   │   'raise_exceptions': True,\n",
-       "│   │   │   'experiment_name': None,\n",
-       "│   │   │   'column_map': None\n",
-       "│   │   }\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "job = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id)\n", - "pprint(job)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1 \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
Job(\n",
-       "job_id='1',\n",
-       "status='completed',\n",
-       "result={\n",
-       "│   │   'generations': [\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   │   'reference': 'Paris'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   │   },\n",
-       "│   │   │   {\n",
-       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   │   }\n",
-       "│   │   ],\n",
-       "│   │   'scores': {\n",
-       "│   │   │   'answer_relevancy': {\n",
-       "│   │   │   │   'score_rows': [\n",
-       "│   │   │   │   │   {'score': 0.9567410688422774},\n",
-       "│   │   │   │   │   {'score': 0.9262221944126402},\n",
-       "│   │   │   │   │   {'score': 0.8774825363469155}\n",
-       "│   │   │   │   ],\n",
-       "│   │   │   │   'aggregated_results': {'answer_relevancy': 0.9201485998672777}\n",
-       "│   │   │   }\n",
-       "│   │   }\n",
-       "},\n",
-       "eval_config={\n",
-       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
-       "│   │   'ragas_config': {\n",
-       "│   │   │   'batch_size': None,\n",
-       "│   │   │   'show_progress': True,\n",
-       "│   │   │   'raise_exceptions': True,\n",
-       "│   │   │   'experiment_name': None,\n",
-       "│   │   │   'column_map': None\n",
-       "│   │   }\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'completed'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'generations'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'scores'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'score_rows'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688422774\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944126402\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363469155\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'aggregated_results'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9201485998672777\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# wait a bit for the job to complete\n", - "pprint(client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id))" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1/result \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
EvaluateResponse(\n",
-       "generations=[\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   'reference': 'Paris'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   }\n",
-       "],\n",
-       "scores={\n",
-       "│   │   'answer_relevancy': ScoringResult(\n",
-       "│   │   │   aggregated_results={'answer_relevancy': 0.9201485998672777},\n",
-       "│   │   │   score_rows=[\n",
-       "│   │   │   │   {'score': 0.9567410688422774},\n",
-       "│   │   │   │   {'score': 0.9262221944126402},\n",
-       "│   │   │   │   {'score': 0.8774825363469155}\n",
-       "│   │   │   ]\n",
-       "│   │   )\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9201485998672777\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688422774\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944126402\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363469155\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "results = client.eval.jobs.retrieve(benchmark_id=benchmark_id, job_id=job.job_id)\n", - "pprint(results)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/distribution/run-inline.yaml b/distribution/run-inline.yaml deleted file mode 100644 index 7a40e2de..00000000 --- a/distribution/run-inline.yaml +++ /dev/null @@ -1,61 +0,0 @@ -version: "2" -image_name: trustyai_ragas_distro_inline -apis: - - eval - - inference - - telemetry - - datasetio - - files - - benchmarks -providers: - eval: - - provider_id: trustyai_ragas - provider_type: inline::trustyai_ragas - module: llama_stack_provider_ragas.inline - config: - embedding_model: ${env.EMBEDDING_MODEL} - datasetio: - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline}/localfs_datasetio.db - inference: - - provider_id: ollama - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL:=http://localhost:11434} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline}/trace_store.db - otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline/files} - metadata_store: - type: sqlite - db_path: ${env.METADATA_STORE_DB_PATH:=~/.llama/distributions/trustyai_ragas_distro_inline}/registry.db} - -models: - - metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding - - metadata: {} - model_id: granite3.3:2b - provider_id: ollama - provider_model_id: granite3.3:2b - model_type: llm -server: - host: localhost - port: 8321 diff --git a/distribution/run-remote.yaml b/distribution/run.yaml similarity index 86% rename from distribution/run-remote.yaml rename to distribution/run.yaml index bbdaaaa3..d13d4ce9 100644 --- a/distribution/run-remote.yaml +++ b/distribution/run.yaml @@ -9,8 +9,8 @@ apis: - datasetio providers: eval: - - provider_id: trustyai_ragas - provider_type: remote::trustyai_ragas + - provider_id: trustyai_ragas_remote + provider_type: remote::trustyai_ragas_remote module: llama_stack_provider_ragas config: embedding_model: ${env.EMBEDDING_MODEL} @@ -20,7 +20,12 @@ providers: pipelines_endpoint: ${env.KUBEFLOW_PIPELINES_ENDPOINT} namespace: ${env.KUBEFLOW_NAMESPACE} llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL} - base_image: ${env.KUBEFLOW_BASE_IMAGE} + base_image: ${env.KUBEFLOW_BASE_IMAGE:=} + - provider_id: trustyai_ragas_inline + provider_type: inline::trustyai_ragas_inline + module: llama_stack_provider_ragas.inline + config: + embedding_model: ${env.EMBEDDING_MODEL} datasetio: - provider_id: localfs provider_type: inline::localfs diff --git a/pyproject.toml b/pyproject.toml index 1e3b83ba..670310aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,8 @@ authors = [ keywords = ["llama-stack", "ragas", "evaluation"] dependencies = [ "setuptools-scm", - "llama-stack==0.2.23", + "llama-stack @ git+https://github.com/llamastack/llama-stack.git", + "llama-stack-client @ git+https://github.com/llamastack/llama-stack-client-python.git", "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found "ragas==0.3.0", "pandas==2.3.0", @@ -84,8 +85,10 @@ ignore = [ "C901", # too complex ] +[tool.hatch.metadata] +allow-direct-references = true + [tool.mypy] -python_version = "3.12" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = false diff --git a/src/llama_stack_provider_ragas/inline/provider.py b/src/llama_stack_provider_ragas/inline/provider.py index c45f81ae..c289097e 100644 --- a/src/llama_stack_provider_ragas/inline/provider.py +++ b/src/llama_stack_provider_ragas/inline/provider.py @@ -4,7 +4,7 @@ def get_provider_spec() -> ProviderSpec: return InlineProviderSpec( api=Api.eval, - provider_type="inline::trustyai_ragas", + provider_type="inline::trustyai_ragas_inline", pip_packages=["ragas==0.3.0"], config_class="llama_stack_provider_ragas.config.RagasProviderInlineConfig", module="llama_stack_provider_ragas.inline", diff --git a/src/llama_stack_provider_ragas/inline/wrappers_inline.py b/src/llama_stack_provider_ragas/inline/wrappers_inline.py index d0ffb13c..26da4797 100644 --- a/src/llama_stack_provider_ragas/inline/wrappers_inline.py +++ b/src/llama_stack_provider_ragas/inline/wrappers_inline.py @@ -3,7 +3,6 @@ from langchain_core.language_models.llms import Generation, LLMResult from langchain_core.prompt_values import PromptValue -from llama_stack.apis.inference import EmbeddingTaskType from ragas.embeddings.base import BaseRagasEmbeddings from ragas.llms.base import BaseRagasLLM from ragas.run_config import RunConfig @@ -39,12 +38,11 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]: async def aembed_documents(self, texts: list[str]) -> list[list[float]]: """Embed documents using Llama Stack inference API.""" try: - response = await self.inference_api.embeddings( - model_id=self.embedding_model_id, - contents=texts, - task_type=EmbeddingTaskType.document, + response = await self.inference_api.openai_embeddings( + model=self.embedding_model_id, + input=texts, ) - return response.embeddings # type: ignore + return [data.embedding for data in response.data] # type: ignore except Exception as e: logger.error(f"Document embedding failed: {str(e)}") raise @@ -52,12 +50,11 @@ async def aembed_documents(self, texts: list[str]) -> list[list[float]]: async def aembed_query(self, text: str) -> list[float]: """Embed query using Llama Stack inference API.""" try: - response = await self.inference_api.embeddings( - model_id=self.embedding_model_id, - contents=[text], - task_type=EmbeddingTaskType.query, + response = await self.inference_api.openai_embeddings( + model=self.embedding_model_id, + input=text, ) - return response.embeddings[0] # type: ignore + return response.data[0].embedding # type: ignore except Exception as e: logger.error(f"Query embedding failed: {str(e)}") raise @@ -132,18 +129,6 @@ async def agenerate_text( # Log the prompt if enabled self._log_prompt(prompt_text) - # Create sampling params for this generation - gen_sampling_params = self.sampling_params - if temperature is not None: - # Update temperature if provided - gen_sampling_params = ( - gen_sampling_params.copy() - if hasattr(gen_sampling_params, "copy") - else gen_sampling_params - ) - if hasattr(gen_sampling_params, "temperature"): - gen_sampling_params.temperature = temperature - # Generate responses (handle multiple completions if n > 1) generations = [] llm_output = { @@ -152,27 +137,47 @@ async def agenerate_text( "provider": "llama_stack", } + # sampling params for this generation should be set via the benchmark config + # we will ignore the temperature and stop params passed in here for _ in range(n): - response = await self.inference_api.completion( - model_id=self.model_id, - content=prompt_text, - sampling_params=gen_sampling_params, + response = await self.inference_api.openai_completion( + model=self.model_id, + prompt=prompt_text, + max_tokens=self.sampling_params.max_tokens + if self.sampling_params and self.sampling_params.max_tokens + else None, + temperature=( + self.sampling_params.strategy.temperature + if self.sampling_params + and hasattr(self.sampling_params.strategy, "temperature") + and self.sampling_params.strategy.temperature + else None + ), + top_p=( + self.sampling_params.strategy.top_p + if self.sampling_params + and hasattr(self.sampling_params.strategy, "top_p") + and self.sampling_params.strategy.top_p + else None + ), + stop=self.sampling_params.stop + if self.sampling_params and self.sampling_params.stop + else None, ) + # Extract text from OpenAI completion response + choice = response.choices[0] if response.choices else None + text = choice.text if choice else "" + # Store Llama Stack response info in llm_output llama_stack_info = { - "stop_reason": ( - response.stop_reason.value if response.stop_reason else None - ), - "content_length": len(response.content), - "has_logprobs": response.logprobs is not None, - "logprobs_count": ( - len(response.logprobs) if response.logprobs else 0 - ), + "stop_reason": (choice.finish_reason if choice else None), + "content_length": len(text), + "has_logprobs": choice.logprobs is not None if choice else False, } llm_output["llama_stack_responses"].append(llama_stack_info) # type: ignore - generations.append(Generation(text=response.content)) + generations.append(Generation(text=text)) return LLMResult(generations=[generations], llm_output=llm_output) diff --git a/src/llama_stack_provider_ragas/provider.py b/src/llama_stack_provider_ragas/provider.py index ad28301f..9507a577 100644 --- a/src/llama_stack_provider_ragas/provider.py +++ b/src/llama_stack_provider_ragas/provider.py @@ -1,5 +1,12 @@ -# remote is the default provider -from .remote import get_adapter_impl -from .remote.provider import get_provider_spec +from .inline.provider import get_provider_spec as get_inline_provider_spec +from .remote.provider import get_provider_spec as get_remote_provider_spec -__all__ = ["get_provider_spec", "get_adapter_impl"] + +def get_provider_spec(): + return [ + get_inline_provider_spec(), + get_remote_provider_spec(), + ] + + +__all__ = ["get_provider_spec"] diff --git a/src/llama_stack_provider_ragas/remote/provider.py b/src/llama_stack_provider_ragas/remote/provider.py index 0572916d..63c8e5d1 100644 --- a/src/llama_stack_provider_ragas/remote/provider.py +++ b/src/llama_stack_provider_ragas/remote/provider.py @@ -8,8 +8,8 @@ def get_provider_spec() -> ProviderSpec: return RemoteProviderSpec( api=Api.eval, - provider_type="remote::trustyai_ragas", - adapter_type="trustyai_ragas", + provider_type="remote::trustyai_ragas_remote", + adapter_type="trustyai_ragas_remote", module="llama_stack_provider_ragas.remote", pip_packages=[ "ragas==0.3.0", diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py index f615f88b..95fc0103 100644 --- a/tests/test_inline_evaluation.py +++ b/tests/test_inline_evaluation.py @@ -44,7 +44,7 @@ def test_single_metric_evaluation( provider_id="trustyai_ragas", ) - job = lls_client.eval.run_eval( + job = lls_client.alpha.eval.run_eval( benchmark_id=benchmark_id, benchmark_config={ "eval_candidate": { diff --git a/uv.lock b/uv.lock index f45bc390..491ca3b5 100644 --- a/uv.lock +++ b/uv.lock @@ -849,34 +849,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" }, ] -[[package]] -name = "grpcio" -version = "1.73.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/79/e8/b43b851537da2e2f03fa8be1aef207e5cbfb1a2e014fbb6b40d24c177cd3/grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87", size = 12730355, upload-time = "2025-06-26T01:53:24.622Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/41/456caf570c55d5ac26f4c1f2db1f2ac1467d5bf3bcd660cba3e0a25b195f/grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf", size = 5334621, upload-time = "2025-06-26T01:52:23.602Z" }, - { url = "https://files.pythonhosted.org/packages/2a/c2/9a15e179e49f235bb5e63b01590658c03747a43c9775e20c4e13ca04f4c4/grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887", size = 10601131, upload-time = "2025-06-26T01:52:25.691Z" }, - { url = "https://files.pythonhosted.org/packages/0c/1d/1d39e90ef6348a0964caa7c5c4d05f3bae2c51ab429eb7d2e21198ac9b6d/grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582", size = 5759268, upload-time = "2025-06-26T01:52:27.631Z" }, - { url = "https://files.pythonhosted.org/packages/8a/2b/2dfe9ae43de75616177bc576df4c36d6401e0959833b2e5b2d58d50c1f6b/grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918", size = 6409791, upload-time = "2025-06-26T01:52:29.711Z" }, - { url = "https://files.pythonhosted.org/packages/6e/66/e8fe779b23b5a26d1b6949e5c70bc0a5fd08f61a6ec5ac7760d589229511/grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2", size = 6003728, upload-time = "2025-06-26T01:52:31.352Z" }, - { url = "https://files.pythonhosted.org/packages/a9/39/57a18fcef567784108c4fc3f5441cb9938ae5a51378505aafe81e8e15ecc/grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b", size = 6103364, upload-time = "2025-06-26T01:52:33.028Z" }, - { url = "https://files.pythonhosted.org/packages/c5/46/28919d2aa038712fc399d02fa83e998abd8c1f46c2680c5689deca06d1b2/grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1", size = 6749194, upload-time = "2025-06-26T01:52:34.734Z" }, - { url = "https://files.pythonhosted.org/packages/3d/56/3898526f1fad588c5d19a29ea0a3a4996fb4fa7d7c02dc1be0c9fd188b62/grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8", size = 6283902, upload-time = "2025-06-26T01:52:36.503Z" }, - { url = "https://files.pythonhosted.org/packages/dc/64/18b77b89c5870d8ea91818feb0c3ffb5b31b48d1b0ee3e0f0d539730fea3/grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642", size = 3668687, upload-time = "2025-06-26T01:52:38.678Z" }, - { url = "https://files.pythonhosted.org/packages/3c/52/302448ca6e52f2a77166b2e2ed75f5d08feca4f2145faf75cb768cccb25b/grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646", size = 4334887, upload-time = "2025-06-26T01:52:40.743Z" }, - { url = "https://files.pythonhosted.org/packages/37/bf/4ca20d1acbefabcaba633ab17f4244cbbe8eca877df01517207bd6655914/grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9", size = 5335615, upload-time = "2025-06-26T01:52:42.896Z" }, - { url = "https://files.pythonhosted.org/packages/75/ed/45c345f284abec5d4f6d77cbca9c52c39b554397eb7de7d2fcf440bcd049/grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5", size = 10595497, upload-time = "2025-06-26T01:52:44.695Z" }, - { url = "https://files.pythonhosted.org/packages/a4/75/bff2c2728018f546d812b755455014bc718f8cdcbf5c84f1f6e5494443a8/grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b", size = 5765321, upload-time = "2025-06-26T01:52:46.871Z" }, - { url = "https://files.pythonhosted.org/packages/70/3b/14e43158d3b81a38251b1d231dfb45a9b492d872102a919fbf7ba4ac20cd/grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182", size = 6415436, upload-time = "2025-06-26T01:52:49.134Z" }, - { url = "https://files.pythonhosted.org/packages/e5/3f/81d9650ca40b54338336fd360f36773be8cb6c07c036e751d8996eb96598/grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854", size = 6007012, upload-time = "2025-06-26T01:52:51.076Z" }, - { url = "https://files.pythonhosted.org/packages/55/f4/59edf5af68d684d0f4f7ad9462a418ac517201c238551529098c9aa28cb0/grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2", size = 6105209, upload-time = "2025-06-26T01:52:52.773Z" }, - { url = "https://files.pythonhosted.org/packages/e4/a8/700d034d5d0786a5ba14bfa9ce974ed4c976936c2748c2bd87aa50f69b36/grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5", size = 6753655, upload-time = "2025-06-26T01:52:55.064Z" }, - { url = "https://files.pythonhosted.org/packages/1f/29/efbd4ac837c23bc48e34bbaf32bd429f0dc9ad7f80721cdb4622144c118c/grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668", size = 6287288, upload-time = "2025-06-26T01:52:57.33Z" }, - { url = "https://files.pythonhosted.org/packages/d8/61/c6045d2ce16624bbe18b5d169c1a5ce4d6c3a47bc9d0e5c4fa6a50ed1239/grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4", size = 3668151, upload-time = "2025-06-26T01:52:59.405Z" }, - { url = "https://files.pythonhosted.org/packages/c2/d7/77ac689216daee10de318db5aa1b88d159432dc76a130948a56b3aa671a2/grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f", size = 4335747, upload-time = "2025-06-26T01:53:01.233Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -1401,7 +1373,7 @@ wheels = [ [[package]] name = "llama-stack" version = "0.2.23" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/llamastack/llama-stack.git#26fd5dbd34ec3cea1ca3bc0da6f30513a8c55ac9" } dependencies = [ { name = "aiohttp" }, { name = "aiosqlite" }, @@ -1424,15 +1396,12 @@ dependencies = [ { name = "python-jose", extra = ["cryptography"] }, { name = "python-multipart" }, { name = "rich" }, + { name = "sqlalchemy", extra = ["asyncio"] }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b1/d1/39838d16c3f1fa459f3fb2e171908082be2dcf885405c93c23d3b7fd8a83/llama_stack-0.2.23.tar.gz", hash = "sha256:464d29c1ae3989fce2226513c9c18648f6236cbf5a399d3cf3a6b3dc4435be8c", size = 3331049, upload-time = "2025-09-26T21:11:16.86Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/b6/ad644dcd505c6e9bcc1dd256a08fca7bcd11c87684d68bf45f2a2f0028b9/llama_stack-0.2.23-py3-none-any.whl", hash = "sha256:43be14d97ca2fff77ca1e42241acb7945a67b7d183e9938bdb7159ee9610982a", size = 3662192, upload-time = "2025-09-26T21:11:15.291Z" }, -] [[package]] name = "llama-stack-client" @@ -1476,15 +1445,11 @@ dependencies = [ [package.optional-dependencies] dev = [ - { name = "aiosqlite" }, { name = "ipykernel" }, { name = "kfp" }, { name = "kfp-kubernetes" }, { name = "kubernetes" }, { name = "mypy" }, - { name = "ollama" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -1492,14 +1457,6 @@ dev = [ { name = "rich" }, { name = "ruff" }, { name = "s3fs" }, - { name = "uvicorn" }, -] -distro = [ - { name = "aiosqlite" }, - { name = "ollama" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp" }, - { name = "uvicorn" }, ] remote = [ { name = "kfp" }, @@ -1510,19 +1467,14 @@ remote = [ [package.metadata] requires-dist = [ - { name = "aiosqlite", marker = "extra == 'distro'" }, { name = "greenlet", specifier = "==3.2.4" }, { name = "ipykernel", marker = "extra == 'dev'" }, { name = "kfp", marker = "extra == 'remote'", specifier = ">=2.5.0" }, { name = "kfp-kubernetes", marker = "extra == 'remote'", specifier = ">=2.0.0" }, { name = "kubernetes", marker = "extra == 'remote'", specifier = ">=30.0.0" }, - { name = "llama-stack", specifier = "==0.2.23" }, - { name = "llama-stack-provider-ragas", extras = ["distro"], marker = "extra == 'dev'" }, + { name = "llama-stack", git = "https://github.com/llamastack/llama-stack.git" }, { name = "llama-stack-provider-ragas", extras = ["remote"], marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" }, - { name = "ollama", marker = "extra == 'distro'" }, - { name = "opentelemetry-api", marker = "extra == 'distro'" }, - { name = "opentelemetry-exporter-otlp", marker = "extra == 'distro'" }, { name = "pandas", specifier = "==2.3.0" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "pyarrow", specifier = "==20.0.0" }, @@ -1535,9 +1487,8 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "s3fs", marker = "extra == 'remote'", specifier = ">=2024.12.0" }, { name = "setuptools-scm" }, - { name = "uvicorn", marker = "extra == 'distro'" }, ] -provides-extras = ["remote", "distro", "dev"] +provides-extras = ["remote", "dev"] [[package]] name = "markdown-it-py" @@ -1810,19 +1761,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] -[[package]] -name = "ollama" -version = "0.5.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "httpx" }, - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8d/96/c7fe0d2d1b3053be614822a7b722c7465161b3672ce90df71515137580a0/ollama-0.5.1.tar.gz", hash = "sha256:5a799e4dc4e7af638b11e3ae588ab17623ee019e496caaf4323efbaa8feeff93", size = 41112, upload-time = "2025-05-30T21:32:48.679Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/76/3f96c8cdbf3955d7a73ee94ce3e0db0755d6de1e0098a70275940d1aff2f/ollama-0.5.1-py3-none-any.whl", hash = "sha256:4c8839f35bc173c7057b1eb2cbe7f498c1a7e134eafc9192824c8aecb3617506", size = 13369, upload-time = "2025-05-30T21:32:47.429Z" }, -] - [[package]] name = "openai" version = "1.109.1" @@ -1855,19 +1793,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/5a/3f8d078dbf55d18442f6a2ecedf6786d81d7245844b2b20ce2b8ad6f0307/opentelemetry_api-1.35.0-py3-none-any.whl", hash = "sha256:c4ea7e258a244858daf18474625e9cc0149b8ee354f37843415771a40c25ee06", size = 65566, upload-time = "2025-07-11T12:23:07.944Z" }, ] -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.35.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-exporter-otlp-proto-grpc" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2e/2e/63718faa67b17f449a7fb7efdc7125a408cbe5d8c0bb35f423f2776d60b5/opentelemetry_exporter_otlp-1.35.0.tar.gz", hash = "sha256:f94feff09b3524df867c7876b79c96cef20068106cb5efe55340e8d08192c8a4", size = 6142, upload-time = "2025-07-11T12:23:30.128Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/db/2da28358d3101ca936c1643becbb4ebd69e9e48acf27f153d735a4813c6b/opentelemetry_exporter_otlp-1.35.0-py3-none-any.whl", hash = "sha256:8e6bb9025f6238db7d69bba7ee37c77e4858d0a1ff22a9e126f7c9e017e83afe", size = 7016, upload-time = "2025-07-11T12:23:10.679Z" }, -] - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.35.0" @@ -1880,24 +1805,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/2c/e31dd3c719bff87fa77391eb7f38b1430d22868c52312cba8aad60f280e5/opentelemetry_exporter_otlp_proto_common-1.35.0-py3-none-any.whl", hash = "sha256:863465de697ae81279ede660f3918680b4480ef5f69dcdac04f30722ed7b74cc", size = 18349, upload-time = "2025-07-11T12:23:11.713Z" }, ] -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.35.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "grpcio" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/20/de/222e4f2f8cd39250991f84d76b661534aef457cafc6a3eb3fcd513627698/opentelemetry_exporter_otlp_proto_grpc-1.35.0.tar.gz", hash = "sha256:ac4c2c3aa5674642db0df0091ab43ec08bbd91a9be469c8d9b18923eb742b9cc", size = 23794, upload-time = "2025-07-11T12:23:31.662Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/a6/3f60a77279e6a3dc21fc076dcb51be159a633b0bba5cba9fb804062a9332/opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl", hash = "sha256:ee31203eb3e50c7967b8fa71db366cc355099aca4e3726e489b248cdb2fd5a62", size = 18846, upload-time = "2025-07-11T12:23:12.957Z" }, -] - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.35.0" @@ -2896,6 +2803,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, ] +[package.optional-dependencies] +asyncio = [ + { name = "greenlet" }, +] + [[package]] name = "stack-data" version = "0.6.3" From abf61a37387b7d6cb0c4de078c04e9846ff0accc Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 12:11:47 -0400 Subject: [PATCH 02/22] Use SamplingParams in wrappers. - Updated LlamaStackInlineLLM and LlamaStackRemoteLLM to accept SamplingParams directly. - Removed unused prompt logging and token estimation methods. - Enhanced error handling for completion responses. - Adjusted tests to reflect changes in sampling parameters structure and usage. - Improved integration with Kubeflow by ensuring proper sampling parameters are passed. --- .../inline/wrappers_inline.py | 72 +++------- .../remote/kubeflow/components.py | 7 +- .../remote/ragas_remote_eval.py | 16 +-- .../remote/wrappers_remote.py | 130 ++++++++++-------- tests/conftest.py | 13 +- tests/test_inline_evaluation.py | 13 +- tests/test_kubeflow_integration.py | 25 +++- 7 files changed, 131 insertions(+), 145 deletions(-) diff --git a/src/llama_stack_provider_ragas/inline/wrappers_inline.py b/src/llama_stack_provider_ragas/inline/wrappers_inline.py index 26da4797..f1313574 100644 --- a/src/llama_stack_provider_ragas/inline/wrappers_inline.py +++ b/src/llama_stack_provider_ragas/inline/wrappers_inline.py @@ -3,6 +3,7 @@ from langchain_core.language_models.llms import Generation, LLMResult from langchain_core.prompt_values import PromptValue +from llama_stack.apis.inference import SamplingParams, TopPSamplingStrategy from ragas.embeddings.base import BaseRagasEmbeddings from ragas.llms.base import BaseRagasLLM from ragas.run_config import RunConfig @@ -42,7 +43,7 @@ async def aembed_documents(self, texts: list[str]) -> list[list[float]]: model=self.embedding_model_id, input=texts, ) - return [data.embedding for data in response.data] # type: ignore + return [data.embedding for data in response.data] except Exception as e: logger.error(f"Document embedding failed: {str(e)}") raise @@ -67,7 +68,7 @@ def __init__( self, inference_api, model_id: str, - sampling_params, + sampling_params: SamplingParams | None = None, run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = True, ): @@ -75,31 +76,6 @@ def __init__( self.inference_api = inference_api self.model_id = model_id self.sampling_params = sampling_params - self.enable_prompt_logging = True - self.prompt_counter = 0 - - def _estimate_tokens(self, text: str) -> int: - """Estimate token count for a given text. - - This is a rough estimation - for accurate counts, you'd need the actual tokenizer. - """ - # Rough estimation: ~4 characters per token for English text - return len(text) // 4 - - def _log_prompt(self, prompt_text: str, prompt_type: str = "evaluation") -> None: - """Log prompt details if enabled.""" - if not self.enable_prompt_logging: - return - - self.prompt_counter += 1 - estimated_tokens = self._estimate_tokens(prompt_text) - - logger.info(f"=== RAGAS PROMPT #{self.prompt_counter} ({prompt_type}) ===") - logger.info(f"Estimated tokens: {estimated_tokens}") - logger.info(f"Character count: {len(prompt_text)}") - logger.info(f"Prompt preview: {prompt_text[:200]}...") - logger.info(f"Full prompt:\n{prompt_text}") - logger.info("=" * 50) def generate_text( self, @@ -123,13 +99,6 @@ async def agenerate_text( ) -> LLMResult: """Asynchronous text generation using Llama Stack inference API.""" try: - # Convert PromptValue to string - prompt_text = prompt.to_string() - - # Log the prompt if enabled - self._log_prompt(prompt_text) - - # Generate responses (handle multiple completions if n > 1) generations = [] llm_output = { "llama_stack_responses": [], @@ -142,29 +111,24 @@ async def agenerate_text( for _ in range(n): response = await self.inference_api.openai_completion( model=self.model_id, - prompt=prompt_text, + prompt=prompt.to_string(), max_tokens=self.sampling_params.max_tokens - if self.sampling_params and self.sampling_params.max_tokens + if self.sampling_params + else None, + temperature=self.sampling_params.strategy.temperature + if self.sampling_params + and isinstance(self.sampling_params.strategy, TopPSamplingStrategy) else None, - temperature=( - self.sampling_params.strategy.temperature - if self.sampling_params - and hasattr(self.sampling_params.strategy, "temperature") - and self.sampling_params.strategy.temperature - else None - ), - top_p=( - self.sampling_params.strategy.top_p - if self.sampling_params - and hasattr(self.sampling_params.strategy, "top_p") - and self.sampling_params.strategy.top_p - else None - ), - stop=self.sampling_params.stop - if self.sampling_params and self.sampling_params.stop + top_p=self.sampling_params.strategy.top_p + if self.sampling_params + and isinstance(self.sampling_params.strategy, TopPSamplingStrategy) else None, + stop=self.sampling_params.stop if self.sampling_params else None, ) + if not response.choices: + logger.warning("Completion response returned no choices") + # Extract text from OpenAI completion response choice = response.choices[0] if response.choices else None text = choice.text if choice else "" @@ -185,10 +149,6 @@ async def agenerate_text( logger.error(f"LLM generation failed: {str(e)}") raise - def get_temperature(self, n: int) -> float: - """Get temperature based on number of completions.""" - return 0.3 if n > 1 else 1e-8 - # TODO: revisit this # def is_finished(self, response: LLMResult) -> bool: # """ diff --git a/src/llama_stack_provider_ragas/remote/kubeflow/components.py b/src/llama_stack_provider_ragas/remote/kubeflow/components.py index f09eb071..e6539bab 100644 --- a/src/llama_stack_provider_ragas/remote/kubeflow/components.py +++ b/src/llama_stack_provider_ragas/remote/kubeflow/components.py @@ -91,6 +91,7 @@ def run_ragas_evaluation( import logging import pandas as pd + from llama_stack.apis.inference import SamplingParams from ragas import EvaluationDataset, evaluate from ragas.dataset_schema import EvaluationResult from ragas.run_config import RunConfig @@ -105,10 +106,14 @@ def run_ragas_evaluation( logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) + # sampling_params is passed in from the benchmark config as model_dump() + # we need to convert it back to a SamplingParams object + sampling_params_obj = SamplingParams.model_validate(sampling_params) + llm = LlamaStackRemoteLLM( base_url=llama_stack_base_url, model_id=model, - sampling_params=sampling_params, + sampling_params=sampling_params_obj, ) embeddings = LlamaStackRemoteEmbeddings( base_url=llama_stack_base_url, diff --git a/src/llama_stack_provider_ragas/remote/ragas_remote_eval.py b/src/llama_stack_provider_ragas/remote/ragas_remote_eval.py index a07d434f..ce75a51c 100644 --- a/src/llama_stack_provider_ragas/remote/ragas_remote_eval.py +++ b/src/llama_stack_provider_ragas/remote/ragas_remote_eval.py @@ -169,18 +169,6 @@ async def run_eval( async def _submit_to_kubeflow(self, job: RagasEvaluationJob) -> str: from .kubeflow.pipeline import ragas_evaluation_pipeline - # temperature = ( - # job.runtime_config.benchmark_config.sampling_params.temperature - # if job.runtime_config.benchmark_config.sampling_params.strategy.type - # == "top_p" - # else None - # ) - - # sampling_params = { - # "temperature": temperature, - # "max_tokens": job.runtime_config.benchmark_config.sampling_params.max_tokens, - # } - pipeline_args = { "dataset_id": job.runtime_config.benchmark.dataset_id, "llama_stack_base_url": job.runtime_config.kubeflow_config.llama_stack_url, @@ -190,7 +178,9 @@ async def _submit_to_kubeflow(self, job: RagasEvaluationJob) -> str: else -1 ), "model": job.runtime_config.benchmark_config.eval_candidate.model, - "sampling_params": job.runtime_config.benchmark_config.eval_candidate.sampling_params.model_dump(), + "sampling_params": job.runtime_config.benchmark_config.eval_candidate.sampling_params.model_dump( + exclude_none=True + ), "embedding_model": self.config.embedding_model, "metrics": job.runtime_config.benchmark.scoring_functions, "result_s3_location": job.result_s3_location, diff --git a/src/llama_stack_provider_ragas/remote/wrappers_remote.py b/src/llama_stack_provider_ragas/remote/wrappers_remote.py index c8323b80..7f4059e4 100644 --- a/src/llama_stack_provider_ragas/remote/wrappers_remote.py +++ b/src/llama_stack_provider_ragas/remote/wrappers_remote.py @@ -2,8 +2,9 @@ from langchain_core.language_models.llms import Generation, LLMResult from langchain_core.prompt_values import PromptValue -from llama_stack_client import AsyncLlamaStackClient, LlamaStackClient -from llama_stack_client.types import CompletionResponse +from llama_stack.apis.inference import SamplingParams, TopPSamplingStrategy +from llama_stack_client import AsyncLlamaStackClient, LlamaStackClient, omit +from llama_stack_client.types.completion_create_response import CompletionCreateResponse from llama_stack_client.types.create_embeddings_response import CreateEmbeddingsResponse from ragas.embeddings.base import BaseRagasEmbeddings from ragas.llms.base import BaseRagasLLM @@ -95,7 +96,7 @@ def __init__( self, base_url: str, model_id: str, - sampling_params: dict | None = None, + sampling_params: SamplingParams | None = None, run_config: RunConfig | None = None, multiple_completion_supported: bool = True, ): @@ -106,42 +107,7 @@ def __init__( self.sync_client = LlamaStackClient(base_url=base_url) self.async_client = AsyncLlamaStackClient(base_url=base_url) self.model_id = model_id - self.sampling_params = sampling_params or {} - self.enable_prompt_logging = True - self.prompt_counter = 0 - - def _estimate_tokens(self, text: str) -> int: - """Estimate token count for a given text.""" - # Rough estimation: ~4 characters per token for English text - return len(text) // 4 - - def _log_prompt(self, prompt_text: str, prompt_type: str = "evaluation") -> None: - """Log prompt details if enabled.""" - if not self.enable_prompt_logging: - return - - self.prompt_counter += 1 - estimated_tokens = self._estimate_tokens(prompt_text) - - logger.info(f"=== RAGAS PROMPT #{self.prompt_counter} ({prompt_type}) ===") - logger.info(f"Estimated tokens: {estimated_tokens}") - logger.info(f"Character count: {len(prompt_text)}") - logger.info(f"Prompt preview: {prompt_text[:200]}...") - logger.info(f"Full prompt:\n{prompt_text}") - logger.info("=" * 50) - - def _prepare_generation_params( - self, prompt: PromptValue, temperature: float | None = None - ) -> tuple[str, dict]: - """Prepare prompt text and sampling parameters for generation.""" - prompt_text = prompt.to_string() - self._log_prompt(prompt_text) - - sampling_params = self.sampling_params.copy() - if temperature is not None: - sampling_params["temperature"] = temperature - - return prompt_text, sampling_params + self.sampling_params = sampling_params def _initialize_llm_output(self) -> dict: """Create initial LLM output structure.""" @@ -152,12 +118,14 @@ def _initialize_llm_output(self) -> dict: } def _update_llm_output( - self, response: CompletionResponse, llm_output: dict + self, response: CompletionCreateResponse, llm_output: dict ) -> None: """Process completion response and update llm_output.""" + choice = response.choices[0] if response.choices else None llama_stack_info = { - "stop_reason": response.stop_reason, - "content_length": len(response.content), + "stop_reason": choice.finish_reason if choice else None, + "content_length": len(choice.text) if choice else 0, + "has_logprobs": choice.logprobs is not None if choice else False, } llm_output["llama_stack_responses"].append(llama_stack_info) @@ -171,21 +139,44 @@ def generate_text( ) -> LLMResult: """Synchronous text generation using Llama Stack client.""" try: - prompt_text, sampling_params = self._prepare_generation_params( - prompt, temperature - ) generations = [] llm_output = self._initialize_llm_output() + # sampling params for this generation should be set via the benchmark config + # we will ignore the temperature and stop params passed in here for _ in range(n): - response: CompletionResponse = self.sync_client.inference.completion( - content=prompt_text, - model_id=self.model_id, - sampling_params=sampling_params if sampling_params else None, + response: CompletionCreateResponse = ( + self.sync_client.completions.create( + model=self.model_id, + prompt=prompt.to_string(), + max_tokens=self.sampling_params.max_tokens + if self.sampling_params + else omit, + temperature=self.sampling_params.strategy.temperature + if self.sampling_params + and isinstance( + self.sampling_params.strategy, TopPSamplingStrategy + ) + else omit, + top_p=self.sampling_params.strategy.top_p + if self.sampling_params + and isinstance( + self.sampling_params.strategy, TopPSamplingStrategy + ) + else omit, + stop=self.sampling_params.stop + if self.sampling_params + else omit, + ) ) + if not response.choices: + logger.warning("Completion response returned no choices") + self._update_llm_output(response, llm_output) - generations.append(Generation(text=response.content)) + choice = response.choices[0] if response.choices else None + text = choice.text if choice else "" + generations.append(Generation(text=text)) return LLMResult(generations=[generations], llm_output=llm_output) @@ -203,23 +194,44 @@ async def agenerate_text( ) -> LLMResult: """Asynchronous text generation using Llama Stack client.""" try: - prompt_text, sampling_params = self._prepare_generation_params( - prompt, temperature - ) generations = [] llm_output = self._initialize_llm_output() + # sampling params for this generation should be set via the benchmark config + # we will ignore the temperature and stop params passed in here for _ in range(n): - response: CompletionResponse = ( - await self.async_client.inference.completion( - content=prompt_text, - model_id=self.model_id, - sampling_params=sampling_params if sampling_params else None, + response: CompletionCreateResponse = ( + await self.async_client.completions.create( + model=self.model_id, + prompt=prompt.to_string(), + max_tokens=self.sampling_params.max_tokens + if self.sampling_params + else omit, + temperature=self.sampling_params.strategy.temperature + if self.sampling_params + and isinstance( + self.sampling_params.strategy, TopPSamplingStrategy + ) + else omit, + top_p=self.sampling_params.strategy.top_p + if self.sampling_params + and isinstance( + self.sampling_params.strategy, TopPSamplingStrategy + ) + else omit, + stop=self.sampling_params.stop + if self.sampling_params + else omit, ) ) + if not response.choices: + logger.warning("Completion response returned no choices") + self._update_llm_output(response, llm_output) - generations.append(Generation(text=response.content)) + choice = response.choices[0] if response.choices else None + text = choice.text if choice else "" + generations.append(Generation(text=text)) return LLMResult(generations=[generations], llm_output=llm_output) diff --git a/tests/conftest.py b/tests/conftest.py index 79326331..e841e814 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,9 @@ import os +from datetime import datetime import pytest from dotenv import load_dotenv +from llama_stack.apis.inference import SamplingParams, TopPSamplingStrategy from llama_stack_client import LlamaStackClient from ragas import EvaluationDataset @@ -14,6 +16,11 @@ load_dotenv() +@pytest.fixture +def unique_timestamp(): + return datetime.now().strftime("%Y%m%d_%H%M%S") + + @pytest.fixture def lls_client(): return LlamaStackClient( @@ -33,7 +40,11 @@ def embedding_model(): @pytest.fixture def sampling_params(): - return {"temperature": 0.1, "max_tokens": 100} + return SamplingParams( + strategy=TopPSamplingStrategy(temperature=0.1, top_p=0.95), + max_tokens=100, + stop=None, + ) @pytest.fixture diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py index 95fc0103..bb0fe234 100644 --- a/tests/test_inline_evaluation.py +++ b/tests/test_inline_evaluation.py @@ -1,19 +1,14 @@ """Integration tests for Ragas evaluation using Llama Stack eval API (inline).""" -from datetime import datetime - import pytest from ragas.metrics import answer_relevancy +from llama_stack_provider_ragas.constants import PROVIDER_ID_INLINE + # mark as integration, see tool.pytest.ini_options in pyproject.toml pytestmark = pytest.mark.integration_test -@pytest.fixture -def unique_timestamp(): - return datetime.now().strftime("%Y%m%d_%H%M%S") - - @pytest.mark.parametrize( "metric_to_test", [ @@ -41,7 +36,7 @@ def test_single_metric_evaluation( benchmark_id=benchmark_id, dataset_id=dataset_id, scoring_functions=[metric_to_test.name], - provider_id="trustyai_ragas", + provider_id=PROVIDER_ID_INLINE, ) job = lls_client.alpha.eval.run_eval( @@ -50,7 +45,7 @@ def test_single_metric_evaluation( "eval_candidate": { "type": "model", "model": model, - "sampling_params": sampling_params, + "sampling_params": sampling_params.model_dump(exclude_none=True), }, "scoring_params": {}, }, diff --git a/tests/test_kubeflow_integration.py b/tests/test_kubeflow_integration.py index 15d15da3..62da8c37 100644 --- a/tests/test_kubeflow_integration.py +++ b/tests/test_kubeflow_integration.py @@ -172,7 +172,7 @@ def pipeline_ragas_evaluation(): run_fake_ragas_evaluation( input_dataset=test_dataset.output, model=model, - sampling_params=sampling_params, + sampling_params=sampling_params.model_dump(exclude_none=True), embedding_model=remote_eval_config.embedding_model, metrics=[metric_to_test.name], llama_stack_base_url=remote_eval_config.kubeflow_config.llama_stack_url, @@ -195,18 +195,31 @@ def pipeline_ragas_evaluation(): ], # , context_precision, faithfulness, context_recall] ) def test_full_pipeline( - kf_client, remote_eval_config, metric_to_test, model, sampling_params + lls_client, + kf_client, + raw_evaluation_data, + remote_eval_config, + metric_to_test, + model, + sampling_params, + unique_timestamp, ): - embedding_model = remote_eval_config.embedding_model + dataset_id = f"test_ragas_dataset_remote_{unique_timestamp}" + lls_client.datasets.register( + dataset_id=dataset_id, + purpose="eval/question-answer", + source={"type": "rows", "rows": raw_evaluation_data}, + metadata={"provider_id": "localfs"}, + ) run_result = kf_client.create_run_from_pipeline_func( pipeline_func=ragas_evaluation_pipeline, namespace=remote_eval_config.kubeflow_config.namespace, arguments={ "model": model, - "dataset_id": "ragas_demo_dataset_remote", # TODO: this will fail if the dataset does not exist - "sampling_params": sampling_params, - "embedding_model": embedding_model, + "dataset_id": dataset_id, + "sampling_params": sampling_params.model_dump(exclude_none=True), + "embedding_model": remote_eval_config.embedding_model, "metrics": [metric_to_test.name], "llama_stack_base_url": remote_eval_config.kubeflow_config.llama_stack_url, "s3_credentials_secret_name": remote_eval_config.kubeflow_config.s3_credentials_secret_name, From b8cfeae1d68b21a71c1b00245621fac40c3b55f6 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 12:12:38 -0400 Subject: [PATCH 03/22] Conditionally include remote if dependencies are installed. --- src/llama_stack_provider_ragas/provider.py | 35 ++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/llama_stack_provider_ragas/provider.py b/src/llama_stack_provider_ragas/provider.py index 9507a577..98a5ef43 100644 --- a/src/llama_stack_provider_ragas/provider.py +++ b/src/llama_stack_provider_ragas/provider.py @@ -1,12 +1,37 @@ +import logging + from .inline.provider import get_provider_spec as get_inline_provider_spec -from .remote.provider import get_provider_spec as get_remote_provider_spec + +logger = logging.getLogger(__name__) + + +def _has_remote_dependencies() -> bool: + """Check if remote dependencies are available.""" + try: + import kfp # noqa: F401 + import kubernetes # noqa: F401 + import s3fs # noqa: F401 + from kfp import kubernetes as kfp_kubernetes # noqa: F401 + + return True + except ImportError: + return False def get_provider_spec(): - return [ - get_inline_provider_spec(), - get_remote_provider_spec(), - ] + providers = [get_inline_provider_spec()] + + if _has_remote_dependencies(): + from .remote.provider import get_provider_spec as get_remote_provider_spec + + providers.append(get_remote_provider_spec()) + else: + logger.info( + "Remote provider dependencies not found, returning inline provider only. " + "Enable remote evaluation with 'pip install llama-stack-provider-ragas[remote]'." + ) + + return providers __all__ = ["get_provider_spec"] From d6e39eda76ecb04d75979e17d9239d9c561356fe Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 12:14:27 -0400 Subject: [PATCH 04/22] Update basic_demo notebook and constants for Ragas evaluation. --- demos/basic_demo.ipynb | 315 +++++++++++++++++--- src/llama_stack_provider_ragas/constants.py | 4 +- 2 files changed, 276 insertions(+), 43 deletions(-) diff --git a/demos/basic_demo.ipynb b/demos/basic_demo.ipynb index 4192b65d..2c312647 100644 --- a/demos/basic_demo.ipynb +++ b/demos/basic_demo.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -48,8 +48,11 @@ "import os\n", "from datetime import datetime\n", "\n", + "import pandas as pd\n", "from llama_stack_client import LlamaStackClient\n", - "from rich.pretty import pprint" + "from rich.pretty import pprint\n", + "\n", + "from llama_stack_provider_ragas.constants import PROVIDER_ID_INLINE, PROVIDER_ID_REMOTE" ] }, { @@ -71,7 +74,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" ] } ], @@ -81,7 +84,7 @@ "client = LlamaStackClient(base_url=os.getenv(\"KUBEFLOW_LLAMA_STACK_URL\"))\n", "available_models = client.models.list()\n", "assert any(model.model_type == \"llm\" for model in available_models)\n", - "assert any(model.model_type == \"embedding\" for model in available_models)\n" + "assert any(model.model_type == \"embedding\" for model in available_models)" ] }, { @@ -146,7 +149,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: DELETE https://f19083a01dc3.ngrok-free.app/v1beta/datasets/ragas_demo_dataset \"HTTP/1.1 404 Not Found\"\n" + "INFO:httpx:HTTP Request: DELETE https://7554874a6543.ngrok-free.app/v1beta/datasets/ragas_demo_dataset \"HTTP/1.1 204 No Content\"\n" ] } ], @@ -156,7 +159,7 @@ "try:\n", " client.datasets.unregister(dataset_id)\n", "except Exception:\n", - " pass\n" + " pass" ] }, { @@ -168,7 +171,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1beta/datasets \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1beta/datasets \"HTTP/1.1 200 OK\"\n" ] }, { @@ -181,7 +184,7 @@ "│ │ 'description': 'Sample RAG evaluation dataset for Ragas demo',\n", "│ │ 'size': 3.0,\n", "│ │ 'format': 'ragas',\n", - "│ │ 'created_at': '2025-10-10T11:55:31.946291'\n", + "│ │ 'created_at': '2025-10-13T11:01:35.695103'\n", "},\n", "provider_id='localfs',\n", "purpose='eval/question-answer',\n", @@ -226,7 +229,7 @@ "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-10T11:55:31.946291'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-13T11:01:35.695103'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", @@ -301,8 +304,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -322,8 +325,8 @@ "source": [ "# comment out the provider you don't want to run\n", "benchmarks_providers = [\n", - " (\"ragas_demo_benchmark__inline\", \"trustyai_ragas_inline\"),\n", - " (\"ragas_demo_benchmark__remote\", \"trustyai_ragas_remote\"),\n", + " (\"ragas_demo_benchmark__inline\", PROVIDER_ID_INLINE),\n", + " (\"ragas_demo_benchmark__remote\", PROVIDER_ID_REMOTE),\n", "]\n", "\n", "for benchmark_id, provider_id in benchmarks_providers:\n", @@ -358,7 +361,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -436,14 +439,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='3345bb0f-23e7-46f8-81ca-3f9469de3928',\n",
+       "job_id='765563f1-e68a-4122-b50a-d643c071c98f',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -483,18 +486,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://f19083a01dc3.ngrok-free.app',\n",
-       "│   │   │   'base_image': None\n",
+       "│   │   │   'llama_stack_url': 'https://7554874a6543.ngrok-free.app',\n",
+       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='d1d10645-c339-4276-962c-bb006fee15e7',\n",
+       "kubeflow_run_id='670d2656-aafe-4ac2-a277-be1324733fe8',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'3345bb0f-23e7-46f8-81ca-3f9469de3928'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'765563f1-e68a-4122-b50a-d643c071c98f'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -534,11 +537,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://f19083a01dc3.ngrok-free.app'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://7554874a6543.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'd1d10645-c339-4276-962c-bb006fee15e7'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'670d2656-aafe-4ac2-a277-be1324733fe8'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -576,7 +579,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" ] }, { @@ -657,7 +660,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0 \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0 \"HTTP/1.1 200 OK\"\n" ] }, { @@ -720,14 +723,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/3345bb0f-23e7-46f8-81ca-3f9469de3928 \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/765563f1-e68a-4122-b50a-d643c071c98f \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='3345bb0f-23e7-46f8-81ca-3f9469de3928',\n",
+       "job_id='765563f1-e68a-4122-b50a-d643c071c98f',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -767,18 +770,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://f19083a01dc3.ngrok-free.app',\n",
-       "│   │   │   'base_image': None\n",
+       "│   │   │   'llama_stack_url': 'https://7554874a6543.ngrok-free.app',\n",
+       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='d1d10645-c339-4276-962c-bb006fee15e7',\n",
+       "kubeflow_run_id='670d2656-aafe-4ac2-a277-be1324733fe8',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'3345bb0f-23e7-46f8-81ca-3f9469de3928'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'765563f1-e68a-4122-b50a-d643c071c98f'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -818,11 +821,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://f19083a01dc3.ngrok-free.app'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://7554874a6543.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'd1d10645-c339-4276-962c-bb006fee15e7'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'670d2656-aafe-4ac2-a277-be1324733fe8'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -842,24 +845,96 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/3345bb0f-23e7-46f8-81ca-3f9469de3928/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/765563f1-e68a-4122-b50a-d643c071c98f/result \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ - "
EvaluateResponse(generations=[], scores={})\n",
+       "
EvaluateResponse(\n",
+       "generations=[\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   'reference': 'Paris'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   }\n",
+       "],\n",
+       "scores={\n",
+       "│   │   'answer_relevancy': ScoringResult(\n",
+       "│   │   │   aggregated_results={\n",
+       "│   │   │   │   'average': 0.9028964605666667,\n",
+       "│   │   │   │   'count': 3.0,\n",
+       "│   │   │   │   'min': 0.7873005565000001,\n",
+       "│   │   │   │   'max': 0.9610799629000001\n",
+       "│   │   │   },\n",
+       "│   │   │   score_rows=[{'score': 0.9603088623}, {'score': 0.9610799629000001}, {'score': 0.7873005565000001}]\n",
+       "│   │   )\n",
+       "}\n",
+       ")\n",
        "
\n" ], "text/plain": [ - "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\u001b[1m)\u001b[0m\n" + "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9028964605666667\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.7873005565000001\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9610799629000001\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9603088623\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9610799629000001\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.7873005565000001\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" ] }, "metadata": {}, @@ -875,24 +950,94 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://f19083a01dc3.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0/result \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ - "
EvaluateResponse(generations=[], scores={})\n",
+       "
EvaluateResponse(\n",
+       "generations=[\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   'reference': 'Paris'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   }\n",
+       "],\n",
+       "scores={\n",
+       "│   │   'answer_relevancy': ScoringResult(\n",
+       "│   │   │   aggregated_results={'answer_relevancy': 0.8827011153807643},\n",
+       "│   │   │   score_rows=[\n",
+       "│   │   │   │   {'score': 0.9603088622890463},\n",
+       "│   │   │   │   {'score': 0.944474574022757},\n",
+       "│   │   │   │   {'score': 0.7433199098304898}\n",
+       "│   │   │   ]\n",
+       "│   │   )\n",
+       "}\n",
+       ")\n",
        "
\n" ], "text/plain": [ - "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\u001b[1m)\u001b[0m\n" + "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.8827011153807643\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9603088622890463\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.944474574022757\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.7433199098304898\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" ] }, "metadata": {}, @@ -906,6 +1051,92 @@ "pprint(inline_results)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inline vs Remote Side-by-side" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
inlineremotediff
00.9603090.9603091.095368e-11
10.9444750.9610801.660539e-02
20.7433200.7873014.398065e-02
\n", + "
" + ], + "text/plain": [ + " inline remote diff\n", + "0 0.960309 0.960309 1.095368e-11\n", + "1 0.944475 0.961080 1.660539e-02\n", + "2 0.743320 0.787301 4.398065e-02" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_dict(\n", + " {\n", + " \"inline\": [\n", + " r[\"score\"] for r in inline_results.scores[\"answer_relevancy\"].score_rows\n", + " ],\n", + " \"remote\": [\n", + " r[\"score\"] for r in remote_results.scores[\"answer_relevancy\"].score_rows\n", + " ],\n", + " },\n", + ").assign(diff=lambda df: df[\"remote\"] - df[\"inline\"])" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/llama_stack_provider_ragas/constants.py b/src/llama_stack_provider_ragas/constants.py index 800166d2..35b9ea23 100644 --- a/src/llama_stack_provider_ragas/constants.py +++ b/src/llama_stack_provider_ragas/constants.py @@ -5,6 +5,9 @@ faithfulness, ) +PROVIDER_ID_INLINE = "trustyai_ragas_inline" +PROVIDER_ID_REMOTE = "trustyai_ragas_remote" + METRIC_MAPPING = { metric_func.name: metric_func for metric_func in [ @@ -20,7 +23,6 @@ # "rouge_score": RougeScore(), ] } - AVAILABLE_METRICS = list(METRIC_MAPPING.keys()) # Kubeflow ConfigMap keys and defaults for base image resolution From 7c39ed9915c341f4ffe63cce176cfec342f3e6e0 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 12:14:59 -0400 Subject: [PATCH 05/22] uv sync. --- uv.lock | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 7 deletions(-) diff --git a/uv.lock b/uv.lock index 491ca3b5..25dbbaf6 100644 --- a/uv.lock +++ b/uv.lock @@ -849,6 +849,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" }, ] +[[package]] +name = "grpcio" +version = "1.75.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/f7/8963848164c7604efb3a3e6ee457fdb3a469653e19002bd24742473254f8/grpcio-1.75.1.tar.gz", hash = "sha256:3e81d89ece99b9ace23a6916880baca613c03a799925afb2857887efa8b1b3d2", size = 12731327, upload-time = "2025-09-26T09:03:36.887Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/81/42be79e73a50aaa20af66731c2defeb0e8c9008d9935a64dd8ea8e8c44eb/grpcio-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:7b888b33cd14085d86176b1628ad2fcbff94cfbbe7809465097aa0132e58b018", size = 5668314, upload-time = "2025-09-26T09:01:55.424Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a7/3686ed15822fedc58c22f82b3a7403d9faf38d7c33de46d4de6f06e49426/grpcio-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8775036efe4ad2085975531d221535329f5dac99b6c2a854a995456098f99546", size = 11476125, upload-time = "2025-09-26T09:01:57.927Z" }, + { url = "https://files.pythonhosted.org/packages/14/85/21c71d674f03345ab183c634ecd889d3330177e27baea8d5d247a89b6442/grpcio-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb658f703468d7fbb5dcc4037c65391b7dc34f808ac46ed9136c24fc5eeb041d", size = 6246335, upload-time = "2025-09-26T09:02:00.76Z" }, + { url = "https://files.pythonhosted.org/packages/fd/db/3beb661bc56a385ae4fa6b0e70f6b91ac99d47afb726fe76aaff87ebb116/grpcio-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b7177a1cdb3c51b02b0c0a256b0a72fdab719600a693e0e9037949efffb200b", size = 6916309, upload-time = "2025-09-26T09:02:02.894Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9c/eda9fe57f2b84343d44c1b66cf3831c973ba29b078b16a27d4587a1fdd47/grpcio-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d4fa6ccc3ec2e68a04f7b883d354d7fea22a34c44ce535a2f0c0049cf626ddf", size = 6435419, upload-time = "2025-09-26T09:02:05.055Z" }, + { url = "https://files.pythonhosted.org/packages/c3/b8/090c98983e0a9d602e3f919a6e2d4e470a8b489452905f9a0fa472cac059/grpcio-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d86880ecaeb5b2f0a8afa63824de93adb8ebe4e49d0e51442532f4e08add7d6", size = 7064893, upload-time = "2025-09-26T09:02:07.275Z" }, + { url = "https://files.pythonhosted.org/packages/ec/c0/6d53d4dbbd00f8bd81571f5478d8a95528b716e0eddb4217cc7cb45aae5f/grpcio-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a8041d2f9e8a742aeae96f4b047ee44e73619f4f9d24565e84d5446c623673b6", size = 8011922, upload-time = "2025-09-26T09:02:09.527Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7c/48455b2d0c5949678d6982c3e31ea4d89df4e16131b03f7d5c590811cbe9/grpcio-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3652516048bf4c314ce12be37423c79829f46efffb390ad64149a10c6071e8de", size = 7466181, upload-time = "2025-09-26T09:02:12.279Z" }, + { url = "https://files.pythonhosted.org/packages/fd/12/04a0e79081e3170b6124f8cba9b6275871276be06c156ef981033f691880/grpcio-1.75.1-cp312-cp312-win32.whl", hash = "sha256:44b62345d8403975513af88da2f3d5cc76f73ca538ba46596f92a127c2aea945", size = 3938543, upload-time = "2025-09-26T09:02:14.77Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d7/11350d9d7fb5adc73d2b0ebf6ac1cc70135577701e607407fe6739a90021/grpcio-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:b1e191c5c465fa777d4cafbaacf0c01e0d5278022082c0abbd2ee1d6454ed94d", size = 4641938, upload-time = "2025-09-26T09:02:16.927Z" }, + { url = "https://files.pythonhosted.org/packages/46/74/bac4ab9f7722164afdf263ae31ba97b8174c667153510322a5eba4194c32/grpcio-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:3bed22e750d91d53d9e31e0af35a7b0b51367e974e14a4ff229db5b207647884", size = 5672779, upload-time = "2025-09-26T09:02:19.11Z" }, + { url = "https://files.pythonhosted.org/packages/a6/52/d0483cfa667cddaa294e3ab88fd2c2a6e9dc1a1928c0e5911e2e54bd5b50/grpcio-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5b8f381eadcd6ecaa143a21e9e80a26424c76a0a9b3d546febe6648f3a36a5ac", size = 11470623, upload-time = "2025-09-26T09:02:22.117Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e4/d1954dce2972e32384db6a30273275e8c8ea5a44b80347f9055589333b3f/grpcio-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5bf4001d3293e3414d0cf99ff9b1139106e57c3a66dfff0c5f60b2a6286ec133", size = 6248838, upload-time = "2025-09-26T09:02:26.426Z" }, + { url = "https://files.pythonhosted.org/packages/06/43/073363bf63826ba8077c335d797a8d026f129dc0912b69c42feaf8f0cd26/grpcio-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f82ff474103e26351dacfe8d50214e7c9322960d8d07ba7fa1d05ff981c8b2d", size = 6922663, upload-time = "2025-09-26T09:02:28.724Z" }, + { url = "https://files.pythonhosted.org/packages/c2/6f/076ac0df6c359117676cacfa8a377e2abcecec6a6599a15a672d331f6680/grpcio-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ee119f4f88d9f75414217823d21d75bfe0e6ed40135b0cbbfc6376bc9f7757d", size = 6436149, upload-time = "2025-09-26T09:02:30.971Z" }, + { url = "https://files.pythonhosted.org/packages/6b/27/1d08824f1d573fcb1fa35ede40d6020e68a04391709939e1c6f4193b445f/grpcio-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:664eecc3abe6d916fa6cf8dd6b778e62fb264a70f3430a3180995bf2da935446", size = 7067989, upload-time = "2025-09-26T09:02:33.233Z" }, + { url = "https://files.pythonhosted.org/packages/c6/98/98594cf97b8713feb06a8cb04eeef60b4757e3e2fb91aa0d9161da769843/grpcio-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c32193fa08b2fbebf08fe08e84f8a0aad32d87c3ad42999c65e9449871b1c66e", size = 8010717, upload-time = "2025-09-26T09:02:36.011Z" }, + { url = "https://files.pythonhosted.org/packages/8c/7e/bb80b1bba03c12158f9254762cdf5cced4a9bc2e8ed51ed335915a5a06ef/grpcio-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5cebe13088b9254f6e615bcf1da9131d46cfa4e88039454aca9cb65f639bd3bc", size = 7463822, upload-time = "2025-09-26T09:02:38.26Z" }, + { url = "https://files.pythonhosted.org/packages/23/1c/1ea57fdc06927eb5640f6750c697f596f26183573069189eeaf6ef86ba2d/grpcio-1.75.1-cp313-cp313-win32.whl", hash = "sha256:4b4c678e7ed50f8ae8b8dbad15a865ee73ce12668b6aaf411bf3258b5bc3f970", size = 3938490, upload-time = "2025-09-26T09:02:40.268Z" }, + { url = "https://files.pythonhosted.org/packages/4b/24/fbb8ff1ccadfbf78ad2401c41aceaf02b0d782c084530d8871ddd69a2d49/grpcio-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:5573f51e3f296a1bcf71e7a690c092845fb223072120f4bdb7a5b48e111def66", size = 4642538, upload-time = "2025-09-26T09:02:42.519Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1b/9a0a5cecd24302b9fdbcd55d15ed6267e5f3d5b898ff9ac8cbe17ee76129/grpcio-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:c05da79068dd96723793bffc8d0e64c45f316248417515f28d22204d9dae51c7", size = 5673319, upload-time = "2025-09-26T09:02:44.742Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ec/9d6959429a83fbf5df8549c591a8a52bb313976f6646b79852c4884e3225/grpcio-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06373a94fd16ec287116a825161dca179a0402d0c60674ceeec8c9fba344fe66", size = 11480347, upload-time = "2025-09-26T09:02:47.539Z" }, + { url = "https://files.pythonhosted.org/packages/09/7a/26da709e42c4565c3d7bf999a9569da96243ce34a8271a968dee810a7cf1/grpcio-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4484f4b7287bdaa7a5b3980f3c7224c3c622669405d20f69549f5fb956ad0421", size = 6254706, upload-time = "2025-09-26T09:02:50.4Z" }, + { url = "https://files.pythonhosted.org/packages/f1/08/dcb26a319d3725f199c97e671d904d84ee5680de57d74c566a991cfab632/grpcio-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2720c239c1180eee69f7883c1d4c83fc1a495a2535b5fa322887c70bf02b16e8", size = 6922501, upload-time = "2025-09-26T09:02:52.711Z" }, + { url = "https://files.pythonhosted.org/packages/78/66/044d412c98408a5e23cb348845979a2d17a2e2b6c3c34c1ec91b920f49d0/grpcio-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:07a554fa31c668cf0e7a188678ceeca3cb8fead29bbe455352e712ec33ca701c", size = 6437492, upload-time = "2025-09-26T09:02:55.542Z" }, + { url = "https://files.pythonhosted.org/packages/4e/9d/5e3e362815152aa1afd8b26ea613effa005962f9da0eec6e0e4527e7a7d1/grpcio-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3e71a2105210366bfc398eef7f57a664df99194f3520edb88b9c3a7e46ee0d64", size = 7081061, upload-time = "2025-09-26T09:02:58.261Z" }, + { url = "https://files.pythonhosted.org/packages/1e/1a/46615682a19e100f46e31ddba9ebc297c5a5ab9ddb47b35443ffadb8776c/grpcio-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8679aa8a5b67976776d3c6b0521e99d1c34db8a312a12bcfd78a7085cb9b604e", size = 8010849, upload-time = "2025-09-26T09:03:00.548Z" }, + { url = "https://files.pythonhosted.org/packages/67/8e/3204b94ac30b0f675ab1c06540ab5578660dc8b690db71854d3116f20d00/grpcio-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aad1c774f4ebf0696a7f148a56d39a3432550612597331792528895258966dc0", size = 7464478, upload-time = "2025-09-26T09:03:03.096Z" }, + { url = "https://files.pythonhosted.org/packages/b7/97/2d90652b213863b2cf466d9c1260ca7e7b67a16780431b3eb1d0420e3d5b/grpcio-1.75.1-cp314-cp314-win32.whl", hash = "sha256:62ce42d9994446b307649cb2a23335fa8e927f7ab2cbf5fcb844d6acb4d85f9c", size = 4012672, upload-time = "2025-09-26T09:03:05.477Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/e2e6e9fc1c985cd1a59e6996a05647c720fe8a03b92f5ec2d60d366c531e/grpcio-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:f86e92275710bea3000cb79feca1762dc0ad3b27830dd1a74e82ab321d4ee464", size = 4772475, upload-time = "2025-09-26T09:03:07.661Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1405,8 +1446,8 @@ dependencies = [ [[package]] name = "llama-stack-client" -version = "0.2.23" -source = { registry = "https://pypi.org/simple" } +version = "0.3.0a4" +source = { git = "https://github.com/llamastack/llama-stack-client-python.git#43443fe86d23a2a7e7d4e753c2fecc364dea2f7c" } dependencies = [ { name = "anyio" }, { name = "click" }, @@ -1424,10 +1465,6 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/8f/306d5fcf2f97b3a6251219b03c194836a2ff4e0fcc8146c9970e50a72cd3/llama_stack_client-0.2.23.tar.gz", hash = "sha256:68f34e8ac8eea6a73ed9d4977d849992b2d8bd835804d770a11843431cd5bf74", size = 322288, upload-time = "2025-09-26T21:11:08.342Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/75/3eb58e092a681804013dbec7b7f549d18f55acf6fd6e6b27de7e249766d8/llama_stack_client-0.2.23-py3-none-any.whl", hash = "sha256:eee42c74eee8f218f9455e5a06d5d4be43f8a8c82a7937ef51ce367f916df847", size = 379809, upload-time = "2025-09-26T21:11:06.856Z" }, -] [[package]] name = "llama-stack-provider-ragas" @@ -1436,6 +1473,7 @@ source = { editable = "." } dependencies = [ { name = "greenlet" }, { name = "llama-stack" }, + { name = "llama-stack-client" }, { name = "pandas" }, { name = "pyarrow" }, { name = "ragas" }, @@ -1445,11 +1483,15 @@ dependencies = [ [package.optional-dependencies] dev = [ + { name = "aiosqlite" }, { name = "ipykernel" }, { name = "kfp" }, { name = "kfp-kubernetes" }, { name = "kubernetes" }, { name = "mypy" }, + { name = "ollama" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -1457,6 +1499,14 @@ dev = [ { name = "rich" }, { name = "ruff" }, { name = "s3fs" }, + { name = "uvicorn" }, +] +distro = [ + { name = "aiosqlite" }, + { name = "ollama" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp" }, + { name = "uvicorn" }, ] remote = [ { name = "kfp" }, @@ -1467,14 +1517,20 @@ remote = [ [package.metadata] requires-dist = [ + { name = "aiosqlite", marker = "extra == 'distro'" }, { name = "greenlet", specifier = "==3.2.4" }, { name = "ipykernel", marker = "extra == 'dev'" }, { name = "kfp", marker = "extra == 'remote'", specifier = ">=2.5.0" }, { name = "kfp-kubernetes", marker = "extra == 'remote'", specifier = ">=2.0.0" }, { name = "kubernetes", marker = "extra == 'remote'", specifier = ">=30.0.0" }, { name = "llama-stack", git = "https://github.com/llamastack/llama-stack.git" }, + { name = "llama-stack-client", git = "https://github.com/llamastack/llama-stack-client-python.git" }, + { name = "llama-stack-provider-ragas", extras = ["distro"], marker = "extra == 'dev'" }, { name = "llama-stack-provider-ragas", extras = ["remote"], marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" }, + { name = "ollama", marker = "extra == 'distro'" }, + { name = "opentelemetry-api", marker = "extra == 'distro'" }, + { name = "opentelemetry-exporter-otlp", marker = "extra == 'distro'" }, { name = "pandas", specifier = "==2.3.0" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "pyarrow", specifier = "==20.0.0" }, @@ -1487,8 +1543,9 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "s3fs", marker = "extra == 'remote'", specifier = ">=2024.12.0" }, { name = "setuptools-scm" }, + { name = "uvicorn", marker = "extra == 'distro'" }, ] -provides-extras = ["remote", "dev"] +provides-extras = ["remote", "distro", "dev"] [[package]] name = "markdown-it-py" @@ -1761,6 +1818,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] +[[package]] +name = "ollama" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/47/f9ee32467fe92744474a8c72e138113f3b529fc266eea76abfdec9a33f3b/ollama-0.6.0.tar.gz", hash = "sha256:da2b2d846b5944cfbcee1ca1e6ee0585f6c9d45a2fe9467cbcd096a37383da2f", size = 50811, upload-time = "2025-09-24T22:46:02.417Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/c1/edc9f41b425ca40b26b7c104c5f6841a4537bb2552bfa6ca66e81405bb95/ollama-0.6.0-py3-none-any.whl", hash = "sha256:534511b3ccea2dff419ae06c3b58d7f217c55be7897c8ce5868dfb6b219cf7a0", size = 14130, upload-time = "2025-09-24T22:46:01.19Z" }, +] + [[package]] name = "openai" version = "1.109.1" @@ -1793,6 +1863,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/5a/3f8d078dbf55d18442f6a2ecedf6786d81d7245844b2b20ce2b8ad6f0307/opentelemetry_api-1.35.0-py3-none-any.whl", hash = "sha256:c4ea7e258a244858daf18474625e9cc0149b8ee354f37843415771a40c25ee06", size = 65566, upload-time = "2025-07-11T12:23:07.944Z" }, ] +[[package]] +name = "opentelemetry-exporter-otlp" +version = "1.35.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/2e/63718faa67b17f449a7fb7efdc7125a408cbe5d8c0bb35f423f2776d60b5/opentelemetry_exporter_otlp-1.35.0.tar.gz", hash = "sha256:f94feff09b3524df867c7876b79c96cef20068106cb5efe55340e8d08192c8a4", size = 6142, upload-time = "2025-07-11T12:23:30.128Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/db/2da28358d3101ca936c1643becbb4ebd69e9e48acf27f153d735a4813c6b/opentelemetry_exporter_otlp-1.35.0-py3-none-any.whl", hash = "sha256:8e6bb9025f6238db7d69bba7ee37c77e4858d0a1ff22a9e126f7c9e017e83afe", size = 7016, upload-time = "2025-07-11T12:23:10.679Z" }, +] + [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.35.0" @@ -1805,6 +1888,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/2c/e31dd3c719bff87fa77391eb7f38b1430d22868c52312cba8aad60f280e5/opentelemetry_exporter_otlp_proto_common-1.35.0-py3-none-any.whl", hash = "sha256:863465de697ae81279ede660f3918680b4480ef5f69dcdac04f30722ed7b74cc", size = 18349, upload-time = "2025-07-11T12:23:11.713Z" }, ] +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.35.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/de/222e4f2f8cd39250991f84d76b661534aef457cafc6a3eb3fcd513627698/opentelemetry_exporter_otlp_proto_grpc-1.35.0.tar.gz", hash = "sha256:ac4c2c3aa5674642db0df0091ab43ec08bbd91a9be469c8d9b18923eb742b9cc", size = 23794, upload-time = "2025-07-11T12:23:31.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/a6/3f60a77279e6a3dc21fc076dcb51be159a633b0bba5cba9fb804062a9332/opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl", hash = "sha256:ee31203eb3e50c7967b8fa71db366cc355099aca4e3726e489b248cdb2fd5a62", size = 18846, upload-time = "2025-07-11T12:23:12.957Z" }, +] + [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.35.0" From ee7c63eca9db117e17134072ec912fd00f4b5c52 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 12:45:28 -0400 Subject: [PATCH 06/22] Use constants for provider types in inline and remote providers. --- src/llama_stack_provider_ragas/inline/provider.py | 4 +++- src/llama_stack_provider_ragas/remote/provider.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/llama_stack_provider_ragas/inline/provider.py b/src/llama_stack_provider_ragas/inline/provider.py index c289097e..d48a4518 100644 --- a/src/llama_stack_provider_ragas/inline/provider.py +++ b/src/llama_stack_provider_ragas/inline/provider.py @@ -1,10 +1,12 @@ from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec +from ..constants import PROVIDER_ID_INLINE + def get_provider_spec() -> ProviderSpec: return InlineProviderSpec( api=Api.eval, - provider_type="inline::trustyai_ragas_inline", + provider_type=f"inline::{PROVIDER_ID_INLINE}", pip_packages=["ragas==0.3.0"], config_class="llama_stack_provider_ragas.config.RagasProviderInlineConfig", module="llama_stack_provider_ragas.inline", diff --git a/src/llama_stack_provider_ragas/remote/provider.py b/src/llama_stack_provider_ragas/remote/provider.py index 63c8e5d1..4d3eda04 100644 --- a/src/llama_stack_provider_ragas/remote/provider.py +++ b/src/llama_stack_provider_ragas/remote/provider.py @@ -4,12 +4,14 @@ RemoteProviderSpec, ) +from ..constants import PROVIDER_ID_REMOTE + def get_provider_spec() -> ProviderSpec: return RemoteProviderSpec( api=Api.eval, - provider_type="remote::trustyai_ragas_remote", - adapter_type="trustyai_ragas_remote", + provider_type=f"remote::{PROVIDER_ID_REMOTE}", + adapter_type=PROVIDER_ID_REMOTE, module="llama_stack_provider_ragas.remote", pip_packages=[ "ragas==0.3.0", From 8314f45f29d4faa964c63d4caaa607bcde8413ae Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Mon, 13 Oct 2025 13:34:38 -0400 Subject: [PATCH 07/22] Update README & docs. --- README.md | 40 +++++++++++--------- docs/modules/ROOT/pages/index.adoc | 4 +- docs/modules/ROOT/pages/inline-provider.adoc | 4 +- docs/modules/ROOT/pages/remote-provider.adoc | 6 +-- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 9ba17cb6..2642691e 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ This repository implements [Ragas](https://github.com/explodinggradients/ragas) The goal is to provide all of Ragas' evaluation functionality over Llama Stack's eval API, while leveraging the Llama Stack's built-in APIs for inference (llms and embeddings), datasets, and benchmarks. There are two versions of the provider: -- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. -- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. +- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation. +- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`. ## Prerequisites - Python 3.12 @@ -41,12 +41,29 @@ There are two versions of the provider: ``` - The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands. -### Remote provider (default) +### Inline provider (default with base installation) + +Create a `.env` file with the required environment variable: +```bash +EMBEDDING_MODEL=ollama/all-minilm:l6-v2 +``` + +Run the server: +```bash +dotenv run uv run llama stack run distribution/run.yaml +``` + +### Remote provider (requires optional dependencies) + +First install the remote dependencies: +```bash +uv pip install -e ".[remote]" +``` Create a `.env` file with the following: ```bash # Required for both inline and remote -EMBEDDING_MODEL=all-MiniLM-L6-v2 +EMBEDDING_MODEL=ollama/all-minilm:l6-v2 # Required for remote provider KUBEFLOW_LLAMA_STACK_URL= @@ -73,22 +90,9 @@ Where: Run the server: ```bash -dotenv run uv run llama stack run distribution/run-remote.yaml -``` - -### Inline provider (need to specify `.inline` in the module name) - -Create a `.env` file with the required environment variable: -```bash -EMBEDDING_MODEL=all-MiniLM-L6-v2 -``` - -Run the server: -```bash -dotenv run uv run llama stack run distribution/run-inline.yaml +dotenv run uv run llama stack run distribution/run.yaml ``` -You will notice that `run-inline.yaml` file has the module name as `llama_stack_provider_ragas.inline`, in order to specify the inline provider. ## Usage See the demos in the `demos` directory. diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index 32baa94a..ffbe99e1 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -15,8 +15,8 @@ The goal is to provide all of Ragas' evaluation functionality over Llama Stack's There are two versions of the provider: -* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. This is the *default* when using the module-based import. -* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. +* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation. +* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`. == Getting Started diff --git a/docs/modules/ROOT/pages/inline-provider.adoc b/docs/modules/ROOT/pages/inline-provider.adoc index 6ed27b16..49a8bcb8 100644 --- a/docs/modules/ROOT/pages/inline-provider.adoc +++ b/docs/modules/ROOT/pages/inline-provider.adoc @@ -139,8 +139,8 @@ The inline provider is setup in the following lines of the `run-inline.yaml`: [,yaml] ---- eval: - - provider_id: trustyai_ragas - provider_type: inline::trustyai_ragas + - provider_id: trustyai_ragas_inline + provider_type: inline::trustyai_ragas_inline module: llama_stack_provider_ragas.inline config: embedding_model: ${env.EMBEDDING_MODEL} diff --git a/docs/modules/ROOT/pages/remote-provider.adoc b/docs/modules/ROOT/pages/remote-provider.adoc index f9f98cd8..93e1d56e 100644 --- a/docs/modules/ROOT/pages/remote-provider.adoc +++ b/docs/modules/ROOT/pages/remote-provider.adoc @@ -195,9 +195,9 @@ The remote provider is setup in the following lines of the `run-remote.yaml`: [,yaml] ---- eval: - - provider_id: trustyai_ragas - provider_type: remote::trustyai_ragas - module: llama_stack_provider_ragas.remote # can also just be llama_stack_provider_ragas and it will default to remote + - provider_id: trustyai_ragas_remote + provider_type: remote::trustyai_ragas_remote + module: llama_stack_provider_ragas.remote config: embedding_model: ${env.EMBEDDING_MODEL} kubeflow_config: From 7f5a56c1486e3491ce03dd96b6850fa2066d164b Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 09:07:11 -0400 Subject: [PATCH 08/22] Update pandas dependency to version <2.3.0 in pyproject.toml and uv.lock files. --- pyproject.toml | 2 +- uv.lock | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 670310aa..33e954bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "llama-stack-client @ git+https://github.com/llamastack/llama-stack-client-python.git", "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found "ragas==0.3.0", - "pandas==2.3.0", + "pandas<2.3.0", "pyarrow==20.0.0", "requests==2.32.4", ] diff --git a/uv.lock b/uv.lock index 25dbbaf6..ffbb00de 100644 --- a/uv.lock +++ b/uv.lock @@ -1531,7 +1531,7 @@ requires-dist = [ { name = "ollama", marker = "extra == 'distro'" }, { name = "opentelemetry-api", marker = "extra == 'distro'" }, { name = "opentelemetry-exporter-otlp", marker = "extra == 'distro'" }, - { name = "pandas", specifier = "==2.3.0" }, + { name = "pandas", specifier = "<2.3.0" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "pyarrow", specifier = "==20.0.0" }, { name = "pytest", marker = "extra == 'dev'" }, @@ -2012,7 +2012,7 @@ wheels = [ [[package]] name = "pandas" -version = "2.3.0" +version = "2.2.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, @@ -2020,28 +2020,28 @@ dependencies = [ { name = "pytz" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/51/48f713c4c728d7c55ef7444ba5ea027c26998d96d1a40953b346438602fc/pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133", size = 4484490, upload-time = "2025-06-05T03:27:54.133Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/46/24192607058dd607dbfacdd060a2370f6afb19c2ccb617406469b9aeb8e7/pandas-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf", size = 11573865, upload-time = "2025-06-05T03:26:46.774Z" }, - { url = "https://files.pythonhosted.org/packages/9f/cc/ae8ea3b800757a70c9fdccc68b67dc0280a6e814efcf74e4211fd5dea1ca/pandas-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027", size = 10702154, upload-time = "2025-06-05T16:50:14.439Z" }, - { url = "https://files.pythonhosted.org/packages/d8/ba/a7883d7aab3d24c6540a2768f679e7414582cc389876d469b40ec749d78b/pandas-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09", size = 11262180, upload-time = "2025-06-05T16:50:17.453Z" }, - { url = "https://files.pythonhosted.org/packages/01/a5/931fc3ad333d9d87b10107d948d757d67ebcfc33b1988d5faccc39c6845c/pandas-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d", size = 11991493, upload-time = "2025-06-05T03:26:51.813Z" }, - { url = "https://files.pythonhosted.org/packages/d7/bf/0213986830a92d44d55153c1d69b509431a972eb73f204242988c4e66e86/pandas-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20", size = 12470733, upload-time = "2025-06-06T00:00:18.651Z" }, - { url = "https://files.pythonhosted.org/packages/a4/0e/21eb48a3a34a7d4bac982afc2c4eb5ab09f2d988bdf29d92ba9ae8e90a79/pandas-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b", size = 13212406, upload-time = "2025-06-05T03:26:55.992Z" }, - { url = "https://files.pythonhosted.org/packages/1f/d9/74017c4eec7a28892d8d6e31ae9de3baef71f5a5286e74e6b7aad7f8c837/pandas-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be", size = 10976199, upload-time = "2025-06-05T03:26:59.594Z" }, - { url = "https://files.pythonhosted.org/packages/d3/57/5cb75a56a4842bbd0511c3d1c79186d8315b82dac802118322b2de1194fe/pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983", size = 11518913, upload-time = "2025-06-05T03:27:02.757Z" }, - { url = "https://files.pythonhosted.org/packages/05/01/0c8785610e465e4948a01a059562176e4c8088aa257e2e074db868f86d4e/pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd", size = 10655249, upload-time = "2025-06-05T16:50:20.17Z" }, - { url = "https://files.pythonhosted.org/packages/e8/6a/47fd7517cd8abe72a58706aab2b99e9438360d36dcdb052cf917b7bf3bdc/pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f", size = 11328359, upload-time = "2025-06-05T03:27:06.431Z" }, - { url = "https://files.pythonhosted.org/packages/2a/b3/463bfe819ed60fb7e7ddffb4ae2ee04b887b3444feee6c19437b8f834837/pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3", size = 12024789, upload-time = "2025-06-05T03:27:09.875Z" }, - { url = "https://files.pythonhosted.org/packages/04/0c/e0704ccdb0ac40aeb3434d1c641c43d05f75c92e67525df39575ace35468/pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8", size = 12480734, upload-time = "2025-06-06T00:00:22.246Z" }, - { url = "https://files.pythonhosted.org/packages/e9/df/815d6583967001153bb27f5cf075653d69d51ad887ebbf4cfe1173a1ac58/pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9", size = 13223381, upload-time = "2025-06-05T03:27:15.641Z" }, - { url = "https://files.pythonhosted.org/packages/79/88/ca5973ed07b7f484c493e941dbff990861ca55291ff7ac67c815ce347395/pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390", size = 10970135, upload-time = "2025-06-05T03:27:24.131Z" }, - { url = "https://files.pythonhosted.org/packages/24/fb/0994c14d1f7909ce83f0b1fb27958135513c4f3f2528bde216180aa73bfc/pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575", size = 12141356, upload-time = "2025-06-05T03:27:34.547Z" }, - { url = "https://files.pythonhosted.org/packages/9d/a2/9b903e5962134497ac4f8a96f862ee3081cb2506f69f8e4778ce3d9c9d82/pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042", size = 11474674, upload-time = "2025-06-05T03:27:39.448Z" }, - { url = "https://files.pythonhosted.org/packages/81/3a/3806d041bce032f8de44380f866059437fb79e36d6b22c82c187e65f765b/pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c", size = 11439876, upload-time = "2025-06-05T03:27:43.652Z" }, - { url = "https://files.pythonhosted.org/packages/15/aa/3fc3181d12b95da71f5c2537c3e3b3af6ab3a8c392ab41ebb766e0929bc6/pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67", size = 11966182, upload-time = "2025-06-05T03:27:47.652Z" }, - { url = "https://files.pythonhosted.org/packages/37/e7/e12f2d9b0a2c4a2cc86e2aabff7ccfd24f03e597d770abfa2acd313ee46b/pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f", size = 12547686, upload-time = "2025-06-06T00:00:26.142Z" }, - { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847, upload-time = "2025-06-05T03:27:51.465Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" }, ] [[package]] From 73e04967b414f1e0b44ecbf4274f826d9a7f5cd8 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 10:23:55 -0400 Subject: [PATCH 09/22] Pin to LLS 0.2.23. --- demos/basic_demo.ipynb | 223 ++++++++++++++++++++++---------- pyproject.toml | 4 +- tests/test_inline_evaluation.py | 2 +- uv.lock | 24 ++-- 4 files changed, 168 insertions(+), 85 deletions(-) diff --git a/demos/basic_demo.ipynb b/demos/basic_demo.ipynb index 2c312647..3669c7db 100644 --- a/demos/basic_demo.ipynb +++ b/demos/basic_demo.ipynb @@ -38,9 +38,18 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/dmanilof/src/llama-stack-provider-ragas/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "# Install dev packages if not already installed\n", "# !uv pip install -e \".[dev]\"\n", @@ -74,7 +83,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" ] } ], @@ -149,7 +158,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: DELETE https://7554874a6543.ngrok-free.app/v1beta/datasets/ragas_demo_dataset \"HTTP/1.1 204 No Content\"\n" + "INFO:httpx:HTTP Request: DELETE https://3cf60cfd902b.ngrok-free.app/v1/datasets/ragas_demo_dataset \"HTTP/1.1 204 No Content\"\n" ] } ], @@ -171,7 +180,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1beta/datasets \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/datasets \"HTTP/1.1 200 OK\"\n" ] }, { @@ -184,7 +193,7 @@ "│ │ 'description': 'Sample RAG evaluation dataset for Ragas demo',\n", "│ │ 'size': 3.0,\n", "│ │ 'format': 'ragas',\n", - "│ │ 'created_at': '2025-10-13T11:01:35.695103'\n", + "│ │ 'created_at': '2025-10-14T10:19:34.479954'\n", "},\n", "provider_id='localfs',\n", "purpose='eval/question-answer',\n", @@ -229,7 +238,7 @@ "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-13T11:01:35.695103'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-14T10:19:34.479954'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", @@ -304,8 +313,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -361,7 +370,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -369,6 +378,33 @@ "text/html": [ "
[\n",
        "Benchmark(\n",
+       "│   │   dataset_id='test_ragas_dataset_20251014_101301',\n",
+       "│   │   identifier='test_ragas_benchmark_20251014_101301',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101301'\n",
+       "),\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='test_ragas_dataset_20251014_101326',\n",
+       "│   │   identifier='test_ragas_benchmark_20251014_101326',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101326'\n",
+       "),\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='test_ragas_dataset_20251014_101345',\n",
+       "│   │   identifier='test_ragas_benchmark_20251014_101345',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101345'\n",
+       "),\n",
+       "Benchmark(\n",
        "│   │   dataset_id='ragas_demo_dataset',\n",
        "│   │   identifier='ragas_demo_benchmark__inline',\n",
        "│   │   metadata={},\n",
@@ -385,6 +421,15 @@
        "│   │   scoring_functions=['answer_relevancy'],\n",
        "│   │   type='benchmark',\n",
        "│   │   provider_resource_id='ragas_demo_benchmark__remote'\n",
+       "),\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='test_ragas_dataset_20251014_101830',\n",
+       "│   │   identifier='test_ragas_benchmark_20251014_101830',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101830'\n",
        ")\n",
        "]\n",
        "
\n" @@ -392,6 +437,33 @@ "text/plain": [ "\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101301'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101301'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101301'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101326'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101326'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101326'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101345'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101345'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101345'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark__inline'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", @@ -408,6 +480,15 @@ "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark__remote'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101830'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101830'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101830'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[1m]\u001b[0m\n" ] @@ -439,14 +520,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='765563f1-e68a-4122-b50a-d643c071c98f',\n",
+       "job_id='b1e4951b-342c-4fb2-9e52-26d3b3df4b77',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -486,18 +567,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://7554874a6543.ngrok-free.app',\n",
+       "│   │   │   'llama_stack_url': 'https://3cf60cfd902b.ngrok-free.app',\n",
        "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='670d2656-aafe-4ac2-a277-be1324733fe8',\n",
+       "kubeflow_run_id='25f40e3e-1ac2-424f-ab76-58e8fa124c1a',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'765563f1-e68a-4122-b50a-d643c071c98f'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'b1e4951b-342c-4fb2-9e52-26d3b3df4b77'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -537,11 +618,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://7554874a6543.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3cf60cfd902b.ngrok-free.app'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'670d2656-aafe-4ac2-a277-be1324733fe8'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'25f40e3e-1ac2-424f-ab76-58e8fa124c1a'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -555,7 +636,7 @@ "# since we can't set the embedding model in the benchmark config,\n", "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n", "\n", - "remote_job = client.alpha.eval.run_eval(\n", + "remote_job = client.eval.run_eval(\n", " benchmark_id=\"ragas_demo_benchmark__remote\",\n", " benchmark_config={\n", " \"eval_candidate\": {\n", @@ -579,14 +660,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='0',\n",
+       "job_id='5',\n",
        "status='in_progress',\n",
        "result=None,\n",
        "eval_config={\n",
@@ -604,7 +685,7 @@
       ],
       "text/plain": [
        "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n",
-       "\u001b[2;32m│   \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'0'\u001b[0m,\n",
+       "\u001b[2;32m│   \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'5'\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n",
@@ -629,7 +710,7 @@
     "# since we can't set the embedding model in the benchmark config,\n",
     "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n",
     "\n",
-    "inline_job = client.alpha.eval.run_eval(\n",
+    "inline_job = client.eval.run_eval(\n",
     "    benchmark_id=\"ragas_demo_benchmark__inline\",\n",
     "    benchmark_config={\n",
     "        \"eval_candidate\": {\n",
@@ -660,14 +741,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0 \"HTTP/1.1 200 OK\"\n"
+      "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs/5 \"HTTP/1.1 200 OK\"\n"
      ]
     },
     {
      "data": {
       "text/html": [
        "
Job(\n",
-       "job_id='0',\n",
+       "job_id='5',\n",
        "status='in_progress',\n",
        "result=None,\n",
        "eval_config={\n",
@@ -685,7 +766,7 @@
       ],
       "text/plain": [
        "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n",
-       "\u001b[2;32m│   \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'0'\u001b[0m,\n",
+       "\u001b[2;32m│   \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'5'\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n",
        "\u001b[2;32m│   \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n",
@@ -708,7 +789,7 @@
    "source": [
     "# wait a bit for the job to complete\n",
     "pprint(\n",
-    "    client.alpha.eval.jobs.status(\n",
+    "    client.eval.jobs.status(\n",
     "        benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n",
     "    )\n",
     ")"
@@ -723,14 +804,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/765563f1-e68a-4122-b50a-d643c071c98f \"HTTP/1.1 200 OK\"\n"
+      "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs/b1e4951b-342c-4fb2-9e52-26d3b3df4b77 \"HTTP/1.1 200 OK\"\n"
      ]
     },
     {
      "data": {
       "text/html": [
        "
Job(\n",
-       "job_id='765563f1-e68a-4122-b50a-d643c071c98f',\n",
+       "job_id='b1e4951b-342c-4fb2-9e52-26d3b3df4b77',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -770,18 +851,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://7554874a6543.ngrok-free.app',\n",
+       "│   │   │   'llama_stack_url': 'https://3cf60cfd902b.ngrok-free.app',\n",
        "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='670d2656-aafe-4ac2-a277-be1324733fe8',\n",
+       "kubeflow_run_id='25f40e3e-1ac2-424f-ab76-58e8fa124c1a',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'765563f1-e68a-4122-b50a-d643c071c98f'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'b1e4951b-342c-4fb2-9e52-26d3b3df4b77'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -821,11 +902,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://7554874a6543.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3cf60cfd902b.ngrok-free.app'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'670d2656-aafe-4ac2-a277-be1324733fe8'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'25f40e3e-1ac2-424f-ab76-58e8fa124c1a'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -837,7 +918,7 @@ "source": [ "# wait a bit for the job to complete\n", "pprint(\n", - " client.alpha.eval.jobs.status(\n", + " client.eval.jobs.status(\n", " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", " )\n", ")" @@ -845,14 +926,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__remote/jobs/765563f1-e68a-4122-b50a-d643c071c98f/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs/b1e4951b-342c-4fb2-9e52-26d3b3df4b77/result \"HTTP/1.1 200 OK\"\n" ] }, { @@ -886,12 +967,12 @@ "scores={\n", "│ │ 'answer_relevancy': ScoringResult(\n", "│ │ │ aggregated_results={\n", - "│ │ │ │ 'average': 0.9028964605666667,\n", + "│ │ │ │ 'average': 0.9424462732333333,\n", "│ │ │ │ 'count': 3.0,\n", - "│ │ │ │ 'min': 0.7873005565000001,\n", - "│ │ │ │ 'max': 0.9610799629000001\n", + "│ │ │ │ 'min': 0.9239711678,\n", + "│ │ │ │ 'max': 0.962092759\n", "│ │ │ },\n", - "│ │ │ score_rows=[{'score': 0.9603088623}, {'score': 0.9610799629000001}, {'score': 0.7873005565000001}]\n", + "│ │ │ score_rows=[{'score': 0.962092759}, {'score': 0.9412748929}, {'score': 0.9239711678}]\n", "│ │ )\n", "}\n", ")\n", @@ -926,12 +1007,12 @@ "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9028964605666667\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9424462732333333\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.7873005565000001\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9610799629000001\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.9239711678\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.962092759\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9603088623\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9610799629000001\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.7873005565000001\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.962092759\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9412748929\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9239711678\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" @@ -942,7 +1023,7 @@ } ], "source": [ - "remote_results = client.alpha.eval.jobs.retrieve(\n", + "remote_results = client.eval.jobs.retrieve(\n", " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", ")\n", "pprint(remote_results)" @@ -950,14 +1031,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://7554874a6543.ngrok-free.app/v1alpha/eval/benchmarks/ragas_demo_benchmark__inline/jobs/0/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs/5/result \"HTTP/1.1 200 OK\"\n" ] }, { @@ -990,11 +1071,11 @@ "],\n", "scores={\n", "│ │ 'answer_relevancy': ScoringResult(\n", - "│ │ │ aggregated_results={'answer_relevancy': 0.8827011153807643},\n", + "│ │ │ aggregated_results={'answer_relevancy': 0.9401391281654831},\n", "│ │ │ score_rows=[\n", - "│ │ │ │ {'score': 0.9603088622890463},\n", - "│ │ │ │ {'score': 0.944474574022757},\n", - "│ │ │ │ {'score': 0.7433199098304898}\n", + "│ │ │ │ {'score': 0.9873642530041437},\n", + "│ │ │ │ {'score': 0.913605429314886},\n", + "│ │ │ │ {'score': 0.9194477021774197}\n", "│ │ │ ]\n", "│ │ )\n", "}\n", @@ -1029,11 +1110,11 @@ "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.8827011153807643\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9401391281654831\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9603088622890463\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.944474574022757\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.7433199098304898\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9873642530041437\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.913605429314886\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9194477021774197\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", @@ -1045,7 +1126,7 @@ } ], "source": [ - "inline_results = client.alpha.eval.jobs.retrieve(\n", + "inline_results = client.eval.jobs.retrieve(\n", " benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n", ")\n", "pprint(inline_results)" @@ -1060,7 +1141,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1092,34 +1173,34 @@ " \n", " \n", " 0\n", - " 0.960309\n", - " 0.960309\n", - " 1.095368e-11\n", + " 0.987364\n", + " 0.962093\n", + " -0.025271\n", " \n", " \n", " 1\n", - " 0.944475\n", - " 0.961080\n", - " 1.660539e-02\n", + " 0.913605\n", + " 0.941275\n", + " 0.027669\n", " \n", " \n", " 2\n", - " 0.743320\n", - " 0.787301\n", - " 4.398065e-02\n", + " 0.919448\n", + " 0.923971\n", + " 0.004523\n", " \n", " \n", "\n", "" ], "text/plain": [ - " inline remote diff\n", - "0 0.960309 0.960309 1.095368e-11\n", - "1 0.944475 0.961080 1.660539e-02\n", - "2 0.743320 0.787301 4.398065e-02" + " inline remote diff\n", + "0 0.987364 0.962093 -0.025271\n", + "1 0.913605 0.941275 0.027669\n", + "2 0.919448 0.923971 0.004523" ] }, - "execution_count": 40, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } diff --git a/pyproject.toml b/pyproject.toml index 33e954bd..70991e65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,8 @@ authors = [ keywords = ["llama-stack", "ragas", "evaluation"] dependencies = [ "setuptools-scm", - "llama-stack @ git+https://github.com/llamastack/llama-stack.git", - "llama-stack-client @ git+https://github.com/llamastack/llama-stack-client-python.git", + "llama-stack==0.2.23", + "llama-stack-client==0.2.23", "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found "ragas==0.3.0", "pandas<2.3.0", diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py index bb0fe234..1aea4580 100644 --- a/tests/test_inline_evaluation.py +++ b/tests/test_inline_evaluation.py @@ -39,7 +39,7 @@ def test_single_metric_evaluation( provider_id=PROVIDER_ID_INLINE, ) - job = lls_client.alpha.eval.run_eval( + job = lls_client.eval.run_eval( benchmark_id=benchmark_id, benchmark_config={ "eval_candidate": { diff --git a/uv.lock b/uv.lock index ffbb00de..9a92d303 100644 --- a/uv.lock +++ b/uv.lock @@ -1414,7 +1414,7 @@ wheels = [ [[package]] name = "llama-stack" version = "0.2.23" -source = { git = "https://github.com/llamastack/llama-stack.git#26fd5dbd34ec3cea1ca3bc0da6f30513a8c55ac9" } +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "aiosqlite" }, @@ -1437,17 +1437,20 @@ dependencies = [ { name = "python-jose", extra = ["cryptography"] }, { name = "python-multipart" }, { name = "rich" }, - { name = "sqlalchemy", extra = ["asyncio"] }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "uvicorn" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/b1/d1/39838d16c3f1fa459f3fb2e171908082be2dcf885405c93c23d3b7fd8a83/llama_stack-0.2.23.tar.gz", hash = "sha256:464d29c1ae3989fce2226513c9c18648f6236cbf5a399d3cf3a6b3dc4435be8c", size = 3331049, upload-time = "2025-09-26T21:11:16.86Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/b6/ad644dcd505c6e9bcc1dd256a08fca7bcd11c87684d68bf45f2a2f0028b9/llama_stack-0.2.23-py3-none-any.whl", hash = "sha256:43be14d97ca2fff77ca1e42241acb7945a67b7d183e9938bdb7159ee9610982a", size = 3662192, upload-time = "2025-09-26T21:11:15.291Z" }, +] [[package]] name = "llama-stack-client" -version = "0.3.0a4" -source = { git = "https://github.com/llamastack/llama-stack-client-python.git#43443fe86d23a2a7e7d4e753c2fecc364dea2f7c" } +version = "0.2.23" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "click" }, @@ -1465,6 +1468,10 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/9f/8f/306d5fcf2f97b3a6251219b03c194836a2ff4e0fcc8146c9970e50a72cd3/llama_stack_client-0.2.23.tar.gz", hash = "sha256:68f34e8ac8eea6a73ed9d4977d849992b2d8bd835804d770a11843431cd5bf74", size = 322288, upload-time = "2025-09-26T21:11:08.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/75/3eb58e092a681804013dbec7b7f549d18f55acf6fd6e6b27de7e249766d8/llama_stack_client-0.2.23-py3-none-any.whl", hash = "sha256:eee42c74eee8f218f9455e5a06d5d4be43f8a8c82a7937ef51ce367f916df847", size = 379809, upload-time = "2025-09-26T21:11:06.856Z" }, +] [[package]] name = "llama-stack-provider-ragas" @@ -1523,8 +1530,8 @@ requires-dist = [ { name = "kfp", marker = "extra == 'remote'", specifier = ">=2.5.0" }, { name = "kfp-kubernetes", marker = "extra == 'remote'", specifier = ">=2.0.0" }, { name = "kubernetes", marker = "extra == 'remote'", specifier = ">=30.0.0" }, - { name = "llama-stack", git = "https://github.com/llamastack/llama-stack.git" }, - { name = "llama-stack-client", git = "https://github.com/llamastack/llama-stack-client-python.git" }, + { name = "llama-stack", specifier = "==0.2.23" }, + { name = "llama-stack-client", specifier = "==0.2.23" }, { name = "llama-stack-provider-ragas", extras = ["distro"], marker = "extra == 'dev'" }, { name = "llama-stack-provider-ragas", extras = ["remote"], marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" }, @@ -2904,11 +2911,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, ] -[package.optional-dependencies] -asyncio = [ - { name = "greenlet" }, -] - [[package]] name = "stack-data" version = "0.6.3" From 36e613c3658b2f0d6ce59bcfb8983e1c2c97193f Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 10:26:54 -0400 Subject: [PATCH 10/22] Version bump. --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 70991e65..db60e7a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "llama-stack-provider-ragas" -version = "0.3.1" +version = "0.3.2" description = "Ragas evaluation as an out-of-tree Llama Stack provider" readme = "README.md" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index 9a92d303..5bdd2367 100644 --- a/uv.lock +++ b/uv.lock @@ -1475,7 +1475,7 @@ wheels = [ [[package]] name = "llama-stack-provider-ragas" -version = "0.3.1" +version = "0.3.2" source = { editable = "." } dependencies = [ { name = "greenlet" }, From af80f2f7633ff3cb23ac66c74131994998f4beb5 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 23:26:37 -0400 Subject: [PATCH 11/22] revert changes to provider.py --- src/llama_stack_provider_ragas/provider.py | 40 +++------------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/src/llama_stack_provider_ragas/provider.py b/src/llama_stack_provider_ragas/provider.py index 98a5ef43..ad28301f 100644 --- a/src/llama_stack_provider_ragas/provider.py +++ b/src/llama_stack_provider_ragas/provider.py @@ -1,37 +1,5 @@ -import logging +# remote is the default provider +from .remote import get_adapter_impl +from .remote.provider import get_provider_spec -from .inline.provider import get_provider_spec as get_inline_provider_spec - -logger = logging.getLogger(__name__) - - -def _has_remote_dependencies() -> bool: - """Check if remote dependencies are available.""" - try: - import kfp # noqa: F401 - import kubernetes # noqa: F401 - import s3fs # noqa: F401 - from kfp import kubernetes as kfp_kubernetes # noqa: F401 - - return True - except ImportError: - return False - - -def get_provider_spec(): - providers = [get_inline_provider_spec()] - - if _has_remote_dependencies(): - from .remote.provider import get_provider_spec as get_remote_provider_spec - - providers.append(get_remote_provider_spec()) - else: - logger.info( - "Remote provider dependencies not found, returning inline provider only. " - "Enable remote evaluation with 'pip install llama-stack-provider-ragas[remote]'." - ) - - return providers - - -__all__ = ["get_provider_spec"] +__all__ = ["get_provider_spec", "get_adapter_impl"] From 3b9ec66853db5199ac50308d0338297b88aa2def Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 23:26:58 -0400 Subject: [PATCH 12/22] Update dependencies in pyproject.toml: remove llama-stack-client, upgrade pyarrow to >=21.0.0, and add datasets dependency. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db60e7a2..04ab703f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,12 +26,12 @@ keywords = ["llama-stack", "ragas", "evaluation"] dependencies = [ "setuptools-scm", "llama-stack==0.2.23", - "llama-stack-client==0.2.23", "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found "ragas==0.3.0", "pandas<2.3.0", - "pyarrow==20.0.0", + "pyarrow>=21.0.0", "requests==2.32.4", + "datasets>=2.16.0", ] [project.urls] From e1b82aaeec94fd7c0d7878b58351cdcc0a90a10a Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 23:28:20 -0400 Subject: [PATCH 13/22] revert changes to pyproject. --- pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 04ab703f..92c382e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "llama-stack-provider-ragas" -version = "0.3.2" +version = "0.3.3" description = "Ragas evaluation as an out-of-tree Llama Stack provider" readme = "README.md" requires-python = ">=3.12" @@ -85,10 +85,8 @@ ignore = [ "C901", # too complex ] -[tool.hatch.metadata] -allow-direct-references = true - [tool.mypy] +python_version = "3.12" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = false From a95e028a2c474775d4878f52dee0b0478d64cf6c Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Tue, 14 Oct 2025 23:30:20 -0400 Subject: [PATCH 14/22] revert changes to uv.lock --- uv.lock | 140 ++++++++++++++++++++++++-------------------------------- 1 file changed, 61 insertions(+), 79 deletions(-) diff --git a/uv.lock b/uv.lock index 5bdd2367..fc527cd7 100644 --- a/uv.lock +++ b/uv.lock @@ -492,12 +492,13 @@ wheels = [ [[package]] name = "datasets" -version = "2.14.4" +version = "4.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "aiohttp" }, { name = "dill" }, + { name = "filelock" }, { name = "fsspec", extra = ["http"] }, + { name = "httpx" }, { name = "huggingface-hub" }, { name = "multiprocess" }, { name = "numpy" }, @@ -509,9 +510,9 @@ dependencies = [ { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1d/69/8cc725b5d38968fd118e4ce56a483b16e75b7793854c1a392ec4a34eeb31/datasets-2.14.4.tar.gz", hash = "sha256:ef29c2b5841de488cd343cfc26ab979bff77efa4d2285af51f1ad7db5c46a83b", size = 2178719, upload-time = "2023-08-08T15:45:43.015Z" } +sdist = { url = "https://files.pythonhosted.org/packages/70/48/0186fbc4b86a4f9ecaf04eb01e877e78b53bfa0b03be9c84b2298431ba33/datasets-4.2.0.tar.gz", hash = "sha256:8333a7db9f3bb8044c1b819a35d4e3e2809596c837793b0921382efffdc36e78", size = 582256, upload-time = "2025-10-09T16:10:15.534Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/66/f8/38298237d18d4b6a8ee5dfe390e97bed5adb8e01ec6f9680c0ddf3066728/datasets-2.14.4-py3-none-any.whl", hash = "sha256:29336bd316a7d827ccd4da2236596279b20ca2ac78f64c04c9483da7cbc2459b", size = 519335, upload-time = "2023-08-08T15:45:38.837Z" }, + { url = "https://files.pythonhosted.org/packages/91/9e/0bbbd09b116fd8ee2d3617e28e6598551d2f0f24d3a2ce99cc87ec85aeb0/datasets-4.2.0-py3-none-any.whl", hash = "sha256:fdc43aaf4a73b31f64f80f72f195ab413a1141ed15555d675b2fd17926f8b026", size = 506316, upload-time = "2025-10-09T16:10:13.375Z" }, ] [[package]] @@ -851,43 +852,30 @@ wheels = [ [[package]] name = "grpcio" -version = "1.75.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9d/f7/8963848164c7604efb3a3e6ee457fdb3a469653e19002bd24742473254f8/grpcio-1.75.1.tar.gz", hash = "sha256:3e81d89ece99b9ace23a6916880baca613c03a799925afb2857887efa8b1b3d2", size = 12731327, upload-time = "2025-09-26T09:03:36.887Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/81/42be79e73a50aaa20af66731c2defeb0e8c9008d9935a64dd8ea8e8c44eb/grpcio-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:7b888b33cd14085d86176b1628ad2fcbff94cfbbe7809465097aa0132e58b018", size = 5668314, upload-time = "2025-09-26T09:01:55.424Z" }, - { url = "https://files.pythonhosted.org/packages/c5/a7/3686ed15822fedc58c22f82b3a7403d9faf38d7c33de46d4de6f06e49426/grpcio-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8775036efe4ad2085975531d221535329f5dac99b6c2a854a995456098f99546", size = 11476125, upload-time = "2025-09-26T09:01:57.927Z" }, - { url = "https://files.pythonhosted.org/packages/14/85/21c71d674f03345ab183c634ecd889d3330177e27baea8d5d247a89b6442/grpcio-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb658f703468d7fbb5dcc4037c65391b7dc34f808ac46ed9136c24fc5eeb041d", size = 6246335, upload-time = "2025-09-26T09:02:00.76Z" }, - { url = "https://files.pythonhosted.org/packages/fd/db/3beb661bc56a385ae4fa6b0e70f6b91ac99d47afb726fe76aaff87ebb116/grpcio-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b7177a1cdb3c51b02b0c0a256b0a72fdab719600a693e0e9037949efffb200b", size = 6916309, upload-time = "2025-09-26T09:02:02.894Z" }, - { url = "https://files.pythonhosted.org/packages/1e/9c/eda9fe57f2b84343d44c1b66cf3831c973ba29b078b16a27d4587a1fdd47/grpcio-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d4fa6ccc3ec2e68a04f7b883d354d7fea22a34c44ce535a2f0c0049cf626ddf", size = 6435419, upload-time = "2025-09-26T09:02:05.055Z" }, - { url = "https://files.pythonhosted.org/packages/c3/b8/090c98983e0a9d602e3f919a6e2d4e470a8b489452905f9a0fa472cac059/grpcio-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d86880ecaeb5b2f0a8afa63824de93adb8ebe4e49d0e51442532f4e08add7d6", size = 7064893, upload-time = "2025-09-26T09:02:07.275Z" }, - { url = "https://files.pythonhosted.org/packages/ec/c0/6d53d4dbbd00f8bd81571f5478d8a95528b716e0eddb4217cc7cb45aae5f/grpcio-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a8041d2f9e8a742aeae96f4b047ee44e73619f4f9d24565e84d5446c623673b6", size = 8011922, upload-time = "2025-09-26T09:02:09.527Z" }, - { url = "https://files.pythonhosted.org/packages/f2/7c/48455b2d0c5949678d6982c3e31ea4d89df4e16131b03f7d5c590811cbe9/grpcio-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3652516048bf4c314ce12be37423c79829f46efffb390ad64149a10c6071e8de", size = 7466181, upload-time = "2025-09-26T09:02:12.279Z" }, - { url = "https://files.pythonhosted.org/packages/fd/12/04a0e79081e3170b6124f8cba9b6275871276be06c156ef981033f691880/grpcio-1.75.1-cp312-cp312-win32.whl", hash = "sha256:44b62345d8403975513af88da2f3d5cc76f73ca538ba46596f92a127c2aea945", size = 3938543, upload-time = "2025-09-26T09:02:14.77Z" }, - { url = "https://files.pythonhosted.org/packages/5f/d7/11350d9d7fb5adc73d2b0ebf6ac1cc70135577701e607407fe6739a90021/grpcio-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:b1e191c5c465fa777d4cafbaacf0c01e0d5278022082c0abbd2ee1d6454ed94d", size = 4641938, upload-time = "2025-09-26T09:02:16.927Z" }, - { url = "https://files.pythonhosted.org/packages/46/74/bac4ab9f7722164afdf263ae31ba97b8174c667153510322a5eba4194c32/grpcio-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:3bed22e750d91d53d9e31e0af35a7b0b51367e974e14a4ff229db5b207647884", size = 5672779, upload-time = "2025-09-26T09:02:19.11Z" }, - { url = "https://files.pythonhosted.org/packages/a6/52/d0483cfa667cddaa294e3ab88fd2c2a6e9dc1a1928c0e5911e2e54bd5b50/grpcio-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5b8f381eadcd6ecaa143a21e9e80a26424c76a0a9b3d546febe6648f3a36a5ac", size = 11470623, upload-time = "2025-09-26T09:02:22.117Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e4/d1954dce2972e32384db6a30273275e8c8ea5a44b80347f9055589333b3f/grpcio-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5bf4001d3293e3414d0cf99ff9b1139106e57c3a66dfff0c5f60b2a6286ec133", size = 6248838, upload-time = "2025-09-26T09:02:26.426Z" }, - { url = "https://files.pythonhosted.org/packages/06/43/073363bf63826ba8077c335d797a8d026f129dc0912b69c42feaf8f0cd26/grpcio-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f82ff474103e26351dacfe8d50214e7c9322960d8d07ba7fa1d05ff981c8b2d", size = 6922663, upload-time = "2025-09-26T09:02:28.724Z" }, - { url = "https://files.pythonhosted.org/packages/c2/6f/076ac0df6c359117676cacfa8a377e2abcecec6a6599a15a672d331f6680/grpcio-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ee119f4f88d9f75414217823d21d75bfe0e6ed40135b0cbbfc6376bc9f7757d", size = 6436149, upload-time = "2025-09-26T09:02:30.971Z" }, - { url = "https://files.pythonhosted.org/packages/6b/27/1d08824f1d573fcb1fa35ede40d6020e68a04391709939e1c6f4193b445f/grpcio-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:664eecc3abe6d916fa6cf8dd6b778e62fb264a70f3430a3180995bf2da935446", size = 7067989, upload-time = "2025-09-26T09:02:33.233Z" }, - { url = "https://files.pythonhosted.org/packages/c6/98/98594cf97b8713feb06a8cb04eeef60b4757e3e2fb91aa0d9161da769843/grpcio-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c32193fa08b2fbebf08fe08e84f8a0aad32d87c3ad42999c65e9449871b1c66e", size = 8010717, upload-time = "2025-09-26T09:02:36.011Z" }, - { url = "https://files.pythonhosted.org/packages/8c/7e/bb80b1bba03c12158f9254762cdf5cced4a9bc2e8ed51ed335915a5a06ef/grpcio-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5cebe13088b9254f6e615bcf1da9131d46cfa4e88039454aca9cb65f639bd3bc", size = 7463822, upload-time = "2025-09-26T09:02:38.26Z" }, - { url = "https://files.pythonhosted.org/packages/23/1c/1ea57fdc06927eb5640f6750c697f596f26183573069189eeaf6ef86ba2d/grpcio-1.75.1-cp313-cp313-win32.whl", hash = "sha256:4b4c678e7ed50f8ae8b8dbad15a865ee73ce12668b6aaf411bf3258b5bc3f970", size = 3938490, upload-time = "2025-09-26T09:02:40.268Z" }, - { url = "https://files.pythonhosted.org/packages/4b/24/fbb8ff1ccadfbf78ad2401c41aceaf02b0d782c084530d8871ddd69a2d49/grpcio-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:5573f51e3f296a1bcf71e7a690c092845fb223072120f4bdb7a5b48e111def66", size = 4642538, upload-time = "2025-09-26T09:02:42.519Z" }, - { url = "https://files.pythonhosted.org/packages/f2/1b/9a0a5cecd24302b9fdbcd55d15ed6267e5f3d5b898ff9ac8cbe17ee76129/grpcio-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:c05da79068dd96723793bffc8d0e64c45f316248417515f28d22204d9dae51c7", size = 5673319, upload-time = "2025-09-26T09:02:44.742Z" }, - { url = "https://files.pythonhosted.org/packages/c6/ec/9d6959429a83fbf5df8549c591a8a52bb313976f6646b79852c4884e3225/grpcio-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06373a94fd16ec287116a825161dca179a0402d0c60674ceeec8c9fba344fe66", size = 11480347, upload-time = "2025-09-26T09:02:47.539Z" }, - { url = "https://files.pythonhosted.org/packages/09/7a/26da709e42c4565c3d7bf999a9569da96243ce34a8271a968dee810a7cf1/grpcio-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4484f4b7287bdaa7a5b3980f3c7224c3c622669405d20f69549f5fb956ad0421", size = 6254706, upload-time = "2025-09-26T09:02:50.4Z" }, - { url = "https://files.pythonhosted.org/packages/f1/08/dcb26a319d3725f199c97e671d904d84ee5680de57d74c566a991cfab632/grpcio-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2720c239c1180eee69f7883c1d4c83fc1a495a2535b5fa322887c70bf02b16e8", size = 6922501, upload-time = "2025-09-26T09:02:52.711Z" }, - { url = "https://files.pythonhosted.org/packages/78/66/044d412c98408a5e23cb348845979a2d17a2e2b6c3c34c1ec91b920f49d0/grpcio-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:07a554fa31c668cf0e7a188678ceeca3cb8fead29bbe455352e712ec33ca701c", size = 6437492, upload-time = "2025-09-26T09:02:55.542Z" }, - { url = "https://files.pythonhosted.org/packages/4e/9d/5e3e362815152aa1afd8b26ea613effa005962f9da0eec6e0e4527e7a7d1/grpcio-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3e71a2105210366bfc398eef7f57a664df99194f3520edb88b9c3a7e46ee0d64", size = 7081061, upload-time = "2025-09-26T09:02:58.261Z" }, - { url = "https://files.pythonhosted.org/packages/1e/1a/46615682a19e100f46e31ddba9ebc297c5a5ab9ddb47b35443ffadb8776c/grpcio-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8679aa8a5b67976776d3c6b0521e99d1c34db8a312a12bcfd78a7085cb9b604e", size = 8010849, upload-time = "2025-09-26T09:03:00.548Z" }, - { url = "https://files.pythonhosted.org/packages/67/8e/3204b94ac30b0f675ab1c06540ab5578660dc8b690db71854d3116f20d00/grpcio-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aad1c774f4ebf0696a7f148a56d39a3432550612597331792528895258966dc0", size = 7464478, upload-time = "2025-09-26T09:03:03.096Z" }, - { url = "https://files.pythonhosted.org/packages/b7/97/2d90652b213863b2cf466d9c1260ca7e7b67a16780431b3eb1d0420e3d5b/grpcio-1.75.1-cp314-cp314-win32.whl", hash = "sha256:62ce42d9994446b307649cb2a23335fa8e927f7ab2cbf5fcb844d6acb4d85f9c", size = 4012672, upload-time = "2025-09-26T09:03:05.477Z" }, - { url = "https://files.pythonhosted.org/packages/f9/df/e2e6e9fc1c985cd1a59e6996a05647c720fe8a03b92f5ec2d60d366c531e/grpcio-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:f86e92275710bea3000cb79feca1762dc0ad3b27830dd1a74e82ab321d4ee464", size = 4772475, upload-time = "2025-09-26T09:03:07.661Z" }, +version = "1.73.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/79/e8/b43b851537da2e2f03fa8be1aef207e5cbfb1a2e014fbb6b40d24c177cd3/grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87", size = 12730355, upload-time = "2025-06-26T01:53:24.622Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/41/456caf570c55d5ac26f4c1f2db1f2ac1467d5bf3bcd660cba3e0a25b195f/grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf", size = 5334621, upload-time = "2025-06-26T01:52:23.602Z" }, + { url = "https://files.pythonhosted.org/packages/2a/c2/9a15e179e49f235bb5e63b01590658c03747a43c9775e20c4e13ca04f4c4/grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887", size = 10601131, upload-time = "2025-06-26T01:52:25.691Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1d/1d39e90ef6348a0964caa7c5c4d05f3bae2c51ab429eb7d2e21198ac9b6d/grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582", size = 5759268, upload-time = "2025-06-26T01:52:27.631Z" }, + { url = "https://files.pythonhosted.org/packages/8a/2b/2dfe9ae43de75616177bc576df4c36d6401e0959833b2e5b2d58d50c1f6b/grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918", size = 6409791, upload-time = "2025-06-26T01:52:29.711Z" }, + { url = "https://files.pythonhosted.org/packages/6e/66/e8fe779b23b5a26d1b6949e5c70bc0a5fd08f61a6ec5ac7760d589229511/grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2", size = 6003728, upload-time = "2025-06-26T01:52:31.352Z" }, + { url = "https://files.pythonhosted.org/packages/a9/39/57a18fcef567784108c4fc3f5441cb9938ae5a51378505aafe81e8e15ecc/grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b", size = 6103364, upload-time = "2025-06-26T01:52:33.028Z" }, + { url = "https://files.pythonhosted.org/packages/c5/46/28919d2aa038712fc399d02fa83e998abd8c1f46c2680c5689deca06d1b2/grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1", size = 6749194, upload-time = "2025-06-26T01:52:34.734Z" }, + { url = "https://files.pythonhosted.org/packages/3d/56/3898526f1fad588c5d19a29ea0a3a4996fb4fa7d7c02dc1be0c9fd188b62/grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8", size = 6283902, upload-time = "2025-06-26T01:52:36.503Z" }, + { url = "https://files.pythonhosted.org/packages/dc/64/18b77b89c5870d8ea91818feb0c3ffb5b31b48d1b0ee3e0f0d539730fea3/grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642", size = 3668687, upload-time = "2025-06-26T01:52:38.678Z" }, + { url = "https://files.pythonhosted.org/packages/3c/52/302448ca6e52f2a77166b2e2ed75f5d08feca4f2145faf75cb768cccb25b/grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646", size = 4334887, upload-time = "2025-06-26T01:52:40.743Z" }, + { url = "https://files.pythonhosted.org/packages/37/bf/4ca20d1acbefabcaba633ab17f4244cbbe8eca877df01517207bd6655914/grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9", size = 5335615, upload-time = "2025-06-26T01:52:42.896Z" }, + { url = "https://files.pythonhosted.org/packages/75/ed/45c345f284abec5d4f6d77cbca9c52c39b554397eb7de7d2fcf440bcd049/grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5", size = 10595497, upload-time = "2025-06-26T01:52:44.695Z" }, + { url = "https://files.pythonhosted.org/packages/a4/75/bff2c2728018f546d812b755455014bc718f8cdcbf5c84f1f6e5494443a8/grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b", size = 5765321, upload-time = "2025-06-26T01:52:46.871Z" }, + { url = "https://files.pythonhosted.org/packages/70/3b/14e43158d3b81a38251b1d231dfb45a9b492d872102a919fbf7ba4ac20cd/grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182", size = 6415436, upload-time = "2025-06-26T01:52:49.134Z" }, + { url = "https://files.pythonhosted.org/packages/e5/3f/81d9650ca40b54338336fd360f36773be8cb6c07c036e751d8996eb96598/grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854", size = 6007012, upload-time = "2025-06-26T01:52:51.076Z" }, + { url = "https://files.pythonhosted.org/packages/55/f4/59edf5af68d684d0f4f7ad9462a418ac517201c238551529098c9aa28cb0/grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2", size = 6105209, upload-time = "2025-06-26T01:52:52.773Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a8/700d034d5d0786a5ba14bfa9ce974ed4c976936c2748c2bd87aa50f69b36/grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5", size = 6753655, upload-time = "2025-06-26T01:52:55.064Z" }, + { url = "https://files.pythonhosted.org/packages/1f/29/efbd4ac837c23bc48e34bbaf32bd429f0dc9ad7f80721cdb4622144c118c/grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668", size = 6287288, upload-time = "2025-06-26T01:52:57.33Z" }, + { url = "https://files.pythonhosted.org/packages/d8/61/c6045d2ce16624bbe18b5d169c1a5ce4d6c3a47bc9d0e5c4fa6a50ed1239/grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4", size = 3668151, upload-time = "2025-06-26T01:52:59.405Z" }, + { url = "https://files.pythonhosted.org/packages/c2/d7/77ac689216daee10de318db5aa1b88d159432dc76a130948a56b3aa671a2/grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f", size = 4335747, upload-time = "2025-06-26T01:53:01.233Z" }, ] [[package]] @@ -1475,12 +1463,12 @@ wheels = [ [[package]] name = "llama-stack-provider-ragas" -version = "0.3.2" +version = "0.3.3" source = { editable = "." } dependencies = [ + { name = "datasets" }, { name = "greenlet" }, { name = "llama-stack" }, - { name = "llama-stack-client" }, { name = "pandas" }, { name = "pyarrow" }, { name = "ragas" }, @@ -1525,13 +1513,13 @@ remote = [ [package.metadata] requires-dist = [ { name = "aiosqlite", marker = "extra == 'distro'" }, + { name = "datasets", specifier = ">=2.16.0" }, { name = "greenlet", specifier = "==3.2.4" }, { name = "ipykernel", marker = "extra == 'dev'" }, { name = "kfp", marker = "extra == 'remote'", specifier = ">=2.5.0" }, { name = "kfp-kubernetes", marker = "extra == 'remote'", specifier = ">=2.0.0" }, { name = "kubernetes", marker = "extra == 'remote'", specifier = ">=30.0.0" }, { name = "llama-stack", specifier = "==0.2.23" }, - { name = "llama-stack-client", specifier = "==0.2.23" }, { name = "llama-stack-provider-ragas", extras = ["distro"], marker = "extra == 'dev'" }, { name = "llama-stack-provider-ragas", extras = ["remote"], marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" }, @@ -1540,7 +1528,7 @@ requires-dist = [ { name = "opentelemetry-exporter-otlp", marker = "extra == 'distro'" }, { name = "pandas", specifier = "<2.3.0" }, { name = "pre-commit", marker = "extra == 'dev'" }, - { name = "pyarrow", specifier = "==20.0.0" }, + { name = "pyarrow", specifier = ">=21.0.0" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, { name = "pytest-cov", marker = "extra == 'dev'" }, @@ -1827,15 +1815,15 @@ wheels = [ [[package]] name = "ollama" -version = "0.6.0" +version = "0.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d6/47/f9ee32467fe92744474a8c72e138113f3b529fc266eea76abfdec9a33f3b/ollama-0.6.0.tar.gz", hash = "sha256:da2b2d846b5944cfbcee1ca1e6ee0585f6c9d45a2fe9467cbcd096a37383da2f", size = 50811, upload-time = "2025-09-24T22:46:02.417Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8d/96/c7fe0d2d1b3053be614822a7b722c7465161b3672ce90df71515137580a0/ollama-0.5.1.tar.gz", hash = "sha256:5a799e4dc4e7af638b11e3ae588ab17623ee019e496caaf4323efbaa8feeff93", size = 41112, upload-time = "2025-05-30T21:32:48.679Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/c1/edc9f41b425ca40b26b7c104c5f6841a4537bb2552bfa6ca66e81405bb95/ollama-0.6.0-py3-none-any.whl", hash = "sha256:534511b3ccea2dff419ae06c3b58d7f217c55be7897c8ce5868dfb6b219cf7a0", size = 14130, upload-time = "2025-09-24T22:46:01.19Z" }, + { url = "https://files.pythonhosted.org/packages/d6/76/3f96c8cdbf3955d7a73ee94ce3e0db0755d6de1e0098a70275940d1aff2f/ollama-0.5.1-py3-none-any.whl", hash = "sha256:4c8839f35bc173c7057b1eb2cbe7f498c1a7e134eafc9192824c8aecb3617506", size = 13369, upload-time = "2025-05-30T21:32:47.429Z" }, ] [[package]] @@ -2298,37 +2286,31 @@ wheels = [ [[package]] name = "pyarrow" -version = "20.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload-time = "2025-04-27T12:34:23.264Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload-time = "2025-04-27T12:29:44.384Z" }, - { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload-time = "2025-04-27T12:29:52.038Z" }, - { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload-time = "2025-04-27T12:29:59.452Z" }, - { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload-time = "2025-04-27T12:30:06.875Z" }, - { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload-time = "2025-04-27T12:30:13.954Z" }, - { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload-time = "2025-04-27T12:30:21.949Z" }, - { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload-time = "2025-04-27T12:30:29.551Z" }, - { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload-time = "2025-04-27T12:30:36.977Z" }, - { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload-time = "2025-04-27T12:30:42.809Z" }, - { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload-time = "2025-04-27T12:30:48.351Z" }, - { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload-time = "2025-04-27T12:30:55.238Z" }, - { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload-time = "2025-04-27T12:31:05.587Z" }, - { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload-time = "2025-04-27T12:31:15.675Z" }, - { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload-time = "2025-04-27T12:31:24.631Z" }, - { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload-time = "2025-04-27T12:31:31.311Z" }, - { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload-time = "2025-04-27T12:31:39.406Z" }, - { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload-time = "2025-04-27T12:31:45.997Z" }, - { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload-time = "2025-04-27T12:31:54.11Z" }, - { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload-time = "2025-04-27T12:31:59.215Z" }, - { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload-time = "2025-04-27T12:32:05.369Z" }, - { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload-time = "2025-04-27T12:32:11.814Z" }, - { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload-time = "2025-04-27T12:32:20.766Z" }, - { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload-time = "2025-04-27T12:32:28.1Z" }, - { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload-time = "2025-04-27T12:32:35.792Z" }, - { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload-time = "2025-04-27T12:32:46.64Z" }, - { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload-time = "2025-04-27T12:32:56.503Z" }, - { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload-time = "2025-04-27T12:33:04.72Z" }, +version = "21.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" }, + { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" }, + { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" }, + { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" }, + { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" }, + { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" }, + { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" }, + { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" }, + { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" }, + { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" }, + { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" }, + { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" }, + { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" }, + { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" }, + { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" }, ] [[package]] From cb233cbce09adedb11b956410c6ad6a90d6c1954 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:20:30 -0400 Subject: [PATCH 15/22] revert changes to provider.py modules. --- src/llama_stack_provider_ragas/inline/provider.py | 4 +--- src/llama_stack_provider_ragas/remote/provider.py | 6 ++---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/llama_stack_provider_ragas/inline/provider.py b/src/llama_stack_provider_ragas/inline/provider.py index d48a4518..c45f81ae 100644 --- a/src/llama_stack_provider_ragas/inline/provider.py +++ b/src/llama_stack_provider_ragas/inline/provider.py @@ -1,12 +1,10 @@ from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec -from ..constants import PROVIDER_ID_INLINE - def get_provider_spec() -> ProviderSpec: return InlineProviderSpec( api=Api.eval, - provider_type=f"inline::{PROVIDER_ID_INLINE}", + provider_type="inline::trustyai_ragas", pip_packages=["ragas==0.3.0"], config_class="llama_stack_provider_ragas.config.RagasProviderInlineConfig", module="llama_stack_provider_ragas.inline", diff --git a/src/llama_stack_provider_ragas/remote/provider.py b/src/llama_stack_provider_ragas/remote/provider.py index 4d3eda04..0572916d 100644 --- a/src/llama_stack_provider_ragas/remote/provider.py +++ b/src/llama_stack_provider_ragas/remote/provider.py @@ -4,14 +4,12 @@ RemoteProviderSpec, ) -from ..constants import PROVIDER_ID_REMOTE - def get_provider_spec() -> ProviderSpec: return RemoteProviderSpec( api=Api.eval, - provider_type=f"remote::{PROVIDER_ID_REMOTE}", - adapter_type=PROVIDER_ID_REMOTE, + provider_type="remote::trustyai_ragas", + adapter_type="trustyai_ragas", module="llama_stack_provider_ragas.remote", pip_packages=[ "ragas==0.3.0", From 79453f459e9fa80a11bcefcc3eb6d5f00e795b87 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:21:12 -0400 Subject: [PATCH 16/22] revert changes to vscode config. --- .vscode/launch.json | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 018ef524..99013fd3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,12 +4,23 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { - "name": "Debug Ragas Distribution", + "name": "Debug Ragas Distribution -- Remote", "type": "debugpy", "request": "launch", "module": "llama_stack.cli.llama", - "args": ["stack", "run", "distribution/run.yaml"], + "args": ["stack", "run", "distribution/run-remote.yaml"], + "cwd": "${workspaceFolder}", + "envFile": "${workspaceFolder}/.env", + "justMyCode": false + }, + { + "name": "Debug Ragas Distribution -- Inline", + "type": "debugpy", + "request": "launch", + "module": "llama_stack.cli.llama", + "args": ["stack", "run", "distribution/run-inline.yaml"], "cwd": "${workspaceFolder}", "envFile": "${workspaceFolder}/.env", "justMyCode": false From c4879481d91737654e9a5001d526a52c7a9717c3 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:23:27 -0400 Subject: [PATCH 17/22] revert distro & demo changes. --- demos/inline_demo.ipynb | 880 ++++++++++++++++++ demos/{basic_demo.ipynb => remote_demo.ipynb} | 792 ++++++---------- distribution/run-inline.yaml | 61 ++ distribution/{run.yaml => run-remote.yaml} | 11 +- 4 files changed, 1208 insertions(+), 536 deletions(-) create mode 100644 demos/inline_demo.ipynb rename demos/{basic_demo.ipynb => remote_demo.ipynb} (67%) create mode 100644 distribution/run-inline.yaml rename distribution/{run.yaml => run-remote.yaml} (86%) diff --git a/demos/inline_demo.ipynb b/demos/inline_demo.ipynb new file mode 100644 index 00000000..e24ba331 --- /dev/null +++ b/demos/inline_demo.ipynb @@ -0,0 +1,880 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ragas Evaluation with Llama Stack - Demo [inline execution]\n", + "\n", + "This notebook demonstrates how to use the Ragas out-of-tree provider.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup and Imports\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Install dev packages if not already installed\n", + "# !uv pip install -e \".[dev]\"\n", + "\n", + "from datetime import datetime\n", + "\n", + "from llama_stack_client import LlamaStackClient\n", + "from rich.pretty import pprint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Llama Stack Client Setup\n", + "\n", + "- Make sure we have an inference model (model_type='llm')\n", + "- Make sure we have an embedding model (model_type='embedding')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/models \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
[\n",
+       "Model(\n",
+       "│   │   identifier='all-MiniLM-L6-v2',\n",
+       "│   │   metadata={'embedding_dimension': 384.0},\n",
+       "│   │   api_model_type='embedding',\n",
+       "│   │   provider_id='ollama',\n",
+       "│   │   type='model',\n",
+       "│   │   provider_resource_id='all-minilm:latest',\n",
+       "│   │   model_type='embedding'\n",
+       "),\n",
+       "Model(\n",
+       "│   │   identifier='ollama/granite3.3:2b',\n",
+       "│   │   metadata={},\n",
+       "│   │   api_model_type='llm',\n",
+       "│   │   provider_id='ollama',\n",
+       "│   │   type='model',\n",
+       "│   │   provider_resource_id='granite3.3:2b',\n",
+       "│   │   model_type='llm'\n",
+       "),\n",
+       "Model(\n",
+       "│   │   identifier='ollama/all-minilm:l6-v2',\n",
+       "│   │   metadata={'embedding_dimension': 384.0, 'context_length': 512.0},\n",
+       "│   │   api_model_type='embedding',\n",
+       "│   │   provider_id='ollama',\n",
+       "│   │   type='model',\n",
+       "│   │   provider_resource_id='all-minilm:l6-v2',\n",
+       "│   │   model_type='embedding'\n",
+       "),\n",
+       "Model(\n",
+       "│   │   identifier='all-minilm',\n",
+       "│   │   metadata={'embedding_dimension': 384.0, 'context_length': 512.0},\n",
+       "│   │   api_model_type='embedding',\n",
+       "│   │   provider_id='ollama',\n",
+       "│   │   type='model',\n",
+       "│   │   provider_resource_id='all-minilm:l6-v2',\n",
+       "│   │   model_type='embedding'\n",
+       "),\n",
+       "Model(\n",
+       "│   │   identifier='nomic-embed-text',\n",
+       "│   │   metadata={'embedding_dimension': 768.0, 'context_length': 8192.0},\n",
+       "│   │   api_model_type='embedding',\n",
+       "│   │   provider_id='ollama',\n",
+       "│   │   type='model',\n",
+       "│   │   provider_resource_id='nomic-embed-text:latest',\n",
+       "│   │   model_type='embedding'\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:latest'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ollama/granite3.3:2b'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'llm'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'granite3.3:2b'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'llm'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m512.0\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'all-minilm'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m384.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m512.0\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mModel\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'nomic-embed-text'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'embedding_dimension'\u001b[0m: \u001b[1;36m768.0\u001b[0m, \u001b[32m'context_length'\u001b[0m: \u001b[1;36m8192.0\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mapi_model_type\u001b[0m=\u001b[32m'embedding'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'ollama'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'nomic-embed-text:latest'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmodel_type\u001b[0m=\u001b[32m'embedding'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "client = LlamaStackClient(base_url=\"http://localhost:8321\")\n", + "\n", + "models = client.models.list()\n", + "pprint(models)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Dataset Preparation\n", + "\n", + "Create a sample RAG evaluation dataset. In a real scenario, you would load your own dataset.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Sample Ragas evaluation dataset\n", + "evaluation_data = [\n", + " {\n", + " \"user_input\": \"What is the capital of France?\",\n", + " \"response\": \"The capital of France is Paris.\",\n", + " \"retrieved_contexts\": [\n", + " \"Paris is the capital and most populous city of France.\"\n", + " ],\n", + " \"reference\": \"Paris\",\n", + " },\n", + " {\n", + " \"user_input\": \"Who invented the telephone?\",\n", + " \"response\": \"Alexander Graham Bell invented the telephone in 1876.\",\n", + " \"retrieved_contexts\": [\n", + " \"Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.\"\n", + " ],\n", + " \"reference\": \"Alexander Graham Bell\",\n", + " },\n", + " {\n", + " \"user_input\": \"What is photosynthesis?\",\n", + " \"response\": \"Photosynthesis is the process by which plants convert sunlight into energy.\",\n", + " \"retrieved_contexts\": [\n", + " \"Photosynthesis is a process used by plants to convert light energy into chemical energy.\"\n", + " ],\n", + " \"reference\": \"Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.\",\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Dataset Registration\n", + "\n", + "Register the dataset with Llama Stack's Datasets API using the direct rows approach.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/datasets \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
DatasetRegisterResponse(\n",
+       "identifier='ragas_demo_dataset_inline',\n",
+       "metadata={\n",
+       "│   │   'provider_id': 'localfs',\n",
+       "│   │   'description': 'Sample RAG evaluation dataset for Ragas demo',\n",
+       "│   │   'size': 3.0,\n",
+       "│   │   'format': 'ragas',\n",
+       "│   │   'created_at': '2025-09-25T19:25:53.739691'\n",
+       "},\n",
+       "provider_id='localfs',\n",
+       "purpose='eval/question-answer',\n",
+       "source=SourceRowsDataSource(\n",
+       "│   │   rows=[\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   │   'reference': 'Paris'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   │   }\n",
+       "│   │   ],\n",
+       "│   │   type='rows'\n",
+       "),\n",
+       "type='dataset',\n",
+       "provider_resource_id='ragas_demo_dataset_inline',\n",
+       "owner=None\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mDatasetRegisterResponse\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'localfs'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-09-25T19:25:53.739691'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33msource\u001b[0m=\u001b[1;35mSourceRowsDataSource\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mrows\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'rows'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'dataset'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mowner\u001b[0m=\u001b[3;35mNone\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Register the dataset\n", + "dataset_id = \"ragas_demo_dataset_inline\"\n", + "\n", + "dataset_response = client.datasets.register(\n", + " dataset_id=dataset_id,\n", + " purpose=\"eval/question-answer\", # RAG evaluation purpose\n", + " source={\"type\": \"rows\", \"rows\": evaluation_data},\n", + " metadata={\n", + " \"provider_id\": \"localfs\", # seems there's a bug in datasets\n", + " \"description\": \"Sample RAG evaluation dataset for Ragas demo\",\n", + " \"size\": len(evaluation_data),\n", + " \"format\": \"ragas\",\n", + " \"created_at\": datetime.now().isoformat(),\n", + " },\n", + ")\n", + "pprint(dataset_response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Benchmark Registration\n", + "\n", + "Register a benchmark that defines what metrics to use for evaluation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
None\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3;35mNone\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "benchmark_id = f\"ragas_demo_benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}\"\n", + "\n", + "ragas_metrics = [\n", + " \"answer_relevancy\", # How relevant is the answer to the question?\n", + " # \"context_precision\", # How precise are the retrieved contexts?\n", + " # \"faithfulness\", # How faithful is the answer to the contexts?\n", + " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", + " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", + "]\n", + "\n", + "benchmark_response = client.benchmarks.register(\n", + " benchmark_id=benchmark_id,\n", + " dataset_id=dataset_id,\n", + " scoring_functions=ragas_metrics,\n", + " provider_id=\"trustyai_ragas\",\n", + " # metadata={\n", + " # \"provider\": \"ragas\",\n", + " # \"version\": \"1.0\",\n", + " # \"metrics_count\": len(ragas_metrics),\n", + " # \"created_at\": datetime.now().isoformat()\n", + " # }\n", + ")\n", + "\n", + "pprint(benchmark_response)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
[\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='ragas_demo_dataset_inline',\n",
+       "│   │   identifier='ragas_demo_benchmark_20250925_192402',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='ragas_demo_benchmark_20250925_192402'\n",
+       "),\n",
+       "Benchmark(\n",
+       "│   │   dataset_id='ragas_demo_dataset_inline',\n",
+       "│   │   identifier='ragas_demo_benchmark_20250925_192553',\n",
+       "│   │   metadata={},\n",
+       "│   │   provider_id='trustyai_ragas',\n",
+       "│   │   scoring_functions=['answer_relevancy'],\n",
+       "│   │   type='benchmark',\n",
+       "│   │   provider_resource_id='ragas_demo_benchmark_20250925_192553'\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192402'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192402'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192553'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_20250925_192553'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "benchmarks = client.benchmarks.list()\n", + "pprint(benchmarks[-2:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Evaluation Execution\n", + "\n", + "Run the evaluation using our Ragas out-of-tree provider.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
Job(\n",
+       "job_id='1',\n",
+       "status='in_progress',\n",
+       "result=None,\n",
+       "eval_config={\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'ragas_config': {\n",
+       "│   │   │   'batch_size': None,\n",
+       "│   │   │   'show_progress': True,\n",
+       "│   │   │   'raise_exceptions': True,\n",
+       "│   │   │   'experiment_name': None,\n",
+       "│   │   │   'column_map': None\n",
+       "│   │   }\n",
+       "}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# since we can't set the embedding model in the benchmark config,\n", + "# the embedding model is set in the distribution run.yaml file (all-MiniLM-L6-v2)\n", + "\n", + "job = client.eval.run_eval(\n", + " benchmark_id=benchmark_id,\n", + " benchmark_config={\n", + " \"eval_candidate\": {\n", + " \"type\": \"model\",\n", + " # \"model\": \"meta-llama/Llama-3.2-3B-Instruct\",\n", + " \"model\": \"ollama/granite3.3:2b\",\n", + " \"sampling_params\": {\"temperature\": 0.1, \"max_tokens\": 100},\n", + " },\n", + " \"scoring_params\": {},\n", + " },\n", + ")\n", + "pprint(job)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Results Display\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
Job(\n",
+       "job_id='1',\n",
+       "status='in_progress',\n",
+       "result=None,\n",
+       "eval_config={\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'ragas_config': {\n",
+       "│   │   │   'batch_size': None,\n",
+       "│   │   │   'show_progress': True,\n",
+       "│   │   │   'raise_exceptions': True,\n",
+       "│   │   │   'experiment_name': None,\n",
+       "│   │   │   'column_map': None\n",
+       "│   │   }\n",
+       "}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "job = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id)\n", + "pprint(job)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1 \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
Job(\n",
+       "job_id='1',\n",
+       "status='completed',\n",
+       "result={\n",
+       "│   │   'generations': [\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   │   'reference': 'Paris'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   │   }\n",
+       "│   │   ],\n",
+       "│   │   'scores': {\n",
+       "│   │   │   'answer_relevancy': {\n",
+       "│   │   │   │   'score_rows': [\n",
+       "│   │   │   │   │   {'score': 0.9567410688422774},\n",
+       "│   │   │   │   │   {'score': 0.9262221944126402},\n",
+       "│   │   │   │   │   {'score': 0.8774825363469155}\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'aggregated_results': {'answer_relevancy': 0.9201485998672777}\n",
+       "│   │   │   }\n",
+       "│   │   }\n",
+       "},\n",
+       "eval_config={\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'ragas_config': {\n",
+       "│   │   │   'batch_size': None,\n",
+       "│   │   │   'show_progress': True,\n",
+       "│   │   │   'raise_exceptions': True,\n",
+       "│   │   │   'experiment_name': None,\n",
+       "│   │   │   'column_map': None\n",
+       "│   │   }\n",
+       "}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'1'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'completed'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'generations'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'scores'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'score_rows'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688422774\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944126402\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363469155\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'aggregated_results'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9201485998672777\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# wait a bit for the job to complete\n", + "pprint(client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: GET http://localhost:8321/v1/eval/benchmarks/ragas_demo_benchmark_20250925_192553/jobs/1/result \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "data": { + "text/html": [ + "
EvaluateResponse(\n",
+       "generations=[\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   'reference': 'Paris'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   },\n",
+       "│   │   {\n",
+       "│   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   ],\n",
+       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   }\n",
+       "],\n",
+       "scores={\n",
+       "│   │   'answer_relevancy': ScoringResult(\n",
+       "│   │   │   aggregated_results={'answer_relevancy': 0.9201485998672777},\n",
+       "│   │   │   score_rows=[\n",
+       "│   │   │   │   {'score': 0.9567410688422774},\n",
+       "│   │   │   │   {'score': 0.9262221944126402},\n",
+       "│   │   │   │   {'score': 0.8774825363469155}\n",
+       "│   │   │   ]\n",
+       "│   │   )\n",
+       "}\n",
+       ")\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9201485998672777\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688422774\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944126402\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363469155\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = client.eval.jobs.retrieve(benchmark_id=benchmark_id, job_id=job.job_id)\n", + "pprint(results)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/demos/basic_demo.ipynb b/demos/remote_demo.ipynb similarity index 67% rename from demos/basic_demo.ipynb rename to demos/remote_demo.ipynb index 3669c7db..b88d67cf 100644 --- a/demos/basic_demo.ipynb +++ b/demos/remote_demo.ipynb @@ -18,7 +18,7 @@ "Nuke any old distro config files you might have lying around (I find these get in the way whenever I change my `.env` variables): \n", "```bash\n", "ls ~/.llama/distributions/\n", - "rm -r ~/.llama/distributions/\n", + "rm -r ~/.llama/distributions/trustyai_ragas_distro\n", "```\n", "\n", "Then, run your llama stack server with:\n", @@ -40,16 +40,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dmanilof/src/llama-stack-provider-ragas/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "# Install dev packages if not already installed\n", "# !uv pip install -e \".[dev]\"\n", @@ -57,11 +48,8 @@ "import os\n", "from datetime import datetime\n", "\n", - "import pandas as pd\n", "from llama_stack_client import LlamaStackClient\n", - "from rich.pretty import pprint\n", - "\n", - "from llama_stack_provider_ragas.constants import PROVIDER_ID_INLINE, PROVIDER_ID_REMOTE" + "from rich.pretty import pprint" ] }, { @@ -83,17 +71,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/models \"HTTP/1.1 200 OK\"\n" ] } ], "source": [ - "# If usingf the remote provider, you will need ngrok to enable remote access to your Llama Stack server\n", - "# Otherwise, the base_url is just http://localhost:8321\n", + "# You will need ngrok to enable remote access to your Llama Stack server\n", "client = LlamaStackClient(base_url=os.getenv(\"KUBEFLOW_LLAMA_STACK_URL\"))\n", - "available_models = client.models.list()\n", - "assert any(model.model_type == \"llm\" for model in available_models)\n", - "assert any(model.model_type == \"embedding\" for model in available_models)" + "assert client.models.list()" ] }, { @@ -158,42 +143,20 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: DELETE https://3cf60cfd902b.ngrok-free.app/v1/datasets/ragas_demo_dataset \"HTTP/1.1 204 No Content\"\n" - ] - } - ], - "source": [ - "# De-register the dataset if it already exists\n", - "dataset_id = \"ragas_demo_dataset\"\n", - "try:\n", - " client.datasets.unregister(dataset_id)\n", - "except Exception:\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/datasets \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/datasets \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
DatasetRegisterResponse(\n",
-       "identifier='ragas_demo_dataset',\n",
+       "identifier='ragas_demo_dataset_remote',\n",
        "metadata={\n",
        "│   │   'provider_id': 'localfs',\n",
        "│   │   'description': 'Sample RAG evaluation dataset for Ragas demo',\n",
        "│   │   'size': 3.0,\n",
        "│   │   'format': 'ragas',\n",
-       "│   │   'created_at': '2025-10-14T10:19:34.479954'\n",
+       "│   │   'created_at': '2025-09-25T19:26:49.309288'\n",
        "},\n",
        "provider_id='localfs',\n",
        "purpose='eval/question-answer',\n",
@@ -225,20 +188,20 @@
        "│   │   type='rows'\n",
        "),\n",
        "type='dataset',\n",
-       "provider_resource_id='ragas_demo_dataset',\n",
+       "provider_resource_id='ragas_demo_dataset_remote',\n",
        "owner=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mDatasetRegisterResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Sample RAG evaluation dataset for Ragas demo'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'size'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'format'\u001b[0m: \u001b[32m'ragas'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-10-14T10:19:34.479954'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'created_at'\u001b[0m: \u001b[32m'2025-09-25T19:26:49.309288'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'localfs'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mpurpose\u001b[0m=\u001b[32m'eval/question-answer'\u001b[0m,\n", @@ -270,7 +233,7 @@ "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'rows'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'dataset'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mowner\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -280,6 +243,9 @@ } ], "source": [ + "# Register the dataset\n", + "dataset_id = \"ragas_demo_dataset_remote\"\n", + "\n", "dataset_response = client.datasets.register(\n", " dataset_id=dataset_id,\n", " purpose=\"eval/question-answer\", # RAG evaluation purpose\n", @@ -306,15 +272,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -332,45 +297,40 @@ } ], "source": [ - "# comment out the provider you don't want to run\n", - "benchmarks_providers = [\n", - " (\"ragas_demo_benchmark__inline\", PROVIDER_ID_INLINE),\n", - " (\"ragas_demo_benchmark__remote\", PROVIDER_ID_REMOTE),\n", - "]\n", + "benchmark_id = \"ragas_demo_benchmark_remote\"\n", "\n", - "for benchmark_id, provider_id in benchmarks_providers:\n", - " benchmark_response = client.benchmarks.register(\n", - " benchmark_id=benchmark_id,\n", - " dataset_id=dataset_id,\n", - " scoring_functions=[\n", - " \"answer_relevancy\", # How relevant is the answer to the question?\n", - " # \"context_precision\", # How precise are the retrieved contexts?\n", - " # \"faithfulness\", # How faithful is the answer to the contexts?\n", - " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", - " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", - " ],\n", - " provider_id=provider_id,\n", - " # metadata={\n", - " # \"provider\": \"ragas\",\n", - " # \"version\": \"1.0\",\n", - " # \"metrics_count\": len(ragas_metrics),\n", - " # \"created_at\": datetime.now().isoformat()\n", - " # }\n", - " )\n", + "benchmark_response = client.benchmarks.register(\n", + " benchmark_id=benchmark_id,\n", + " dataset_id=dataset_id,\n", + " scoring_functions=[\n", + " \"answer_relevancy\", # How relevant is the answer to the question?\n", + " # \"context_precision\", # How precise are the retrieved contexts?\n", + " # \"faithfulness\", # How faithful is the answer to the contexts?\n", + " # \"context_recall\", # How much of the ground truth is covered by contexts?\n", + " # \"answer_correctness\" # How correct is the answer compared to ground truth?\n", + " ],\n", + " provider_id=\"trustyai_ragas\",\n", + " # metadata={\n", + " # \"provider\": \"ragas\",\n", + " # \"version\": \"1.0\",\n", + " # \"metrics_count\": len(ragas_metrics),\n", + " # \"created_at\": datetime.now().isoformat()\n", + " # }\n", + ")\n", "\n", "pprint(benchmark_response)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks \"HTTP/1.1 200 OK\"\n" ] }, { @@ -378,58 +338,13 @@ "text/html": [ "
[\n",
        "Benchmark(\n",
-       "│   │   dataset_id='test_ragas_dataset_20251014_101301',\n",
-       "│   │   identifier='test_ragas_benchmark_20251014_101301',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_inline',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101301'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='test_ragas_dataset_20251014_101326',\n",
-       "│   │   identifier='test_ragas_benchmark_20251014_101326',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_inline',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101326'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='test_ragas_dataset_20251014_101345',\n",
-       "│   │   identifier='test_ragas_benchmark_20251014_101345',\n",
+       "│   │   dataset_id='ragas_demo_dataset_remote',\n",
+       "│   │   identifier='ragas_demo_benchmark_remote',\n",
        "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_inline',\n",
+       "│   │   provider_id='trustyai_ragas',\n",
        "│   │   scoring_functions=['answer_relevancy'],\n",
        "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101345'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='ragas_demo_dataset',\n",
-       "│   │   identifier='ragas_demo_benchmark__inline',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_inline',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='ragas_demo_benchmark__inline'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='ragas_demo_dataset',\n",
-       "│   │   identifier='ragas_demo_benchmark__remote',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_remote',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='ragas_demo_benchmark__remote'\n",
-       "),\n",
-       "Benchmark(\n",
-       "│   │   dataset_id='test_ragas_dataset_20251014_101830',\n",
-       "│   │   identifier='test_ragas_benchmark_20251014_101830',\n",
-       "│   │   metadata={},\n",
-       "│   │   provider_id='trustyai_ragas_inline',\n",
-       "│   │   scoring_functions=['answer_relevancy'],\n",
-       "│   │   type='benchmark',\n",
-       "│   │   provider_resource_id='test_ragas_benchmark_20251014_101830'\n",
+       "│   │   provider_resource_id='ragas_demo_benchmark_remote'\n",
        ")\n",
        "]\n",
        "
\n" @@ -437,58 +352,13 @@ "text/plain": [ "\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101301'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101301'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101301'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101326'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101326'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101326'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101345'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101345'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101345'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark__inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark__inline'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'ragas_demo_dataset'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark__remote'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mBenchmark\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mdataset_id\u001b[0m=\u001b[32m'test_ragas_dataset_20251014_101830'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33midentifier\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101830'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_id\u001b[0m=\u001b[32m'trustyai_ragas_inline'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mscoring_functions\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtype\u001b[0m=\u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'test_ragas_benchmark_20251014_101830'\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mprovider_resource_id\u001b[0m=\u001b[32m'ragas_demo_benchmark_remote'\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[1m]\u001b[0m\n" ] @@ -499,7 +369,7 @@ ], "source": [ "benchmarks = client.benchmarks.list()\n", - "pprint(benchmarks)" + "pprint(benchmarks[-1:])" ] }, { @@ -513,21 +383,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='b1e4951b-342c-4fb2-9e52-26d3b3df4b77',\n",
+       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
        "status='in_progress',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
@@ -545,13 +415,13 @@
        "│   │   │   'scoring_params': {},\n",
        "│   │   │   'num_examples': None\n",
        "│   │   },\n",
-       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
        "│   │   'benchmark': {\n",
-       "│   │   │   'identifier': 'ragas_demo_benchmark__remote',\n",
-       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark__remote',\n",
-       "│   │   │   'provider_id': 'trustyai_ragas_remote',\n",
+       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_id': 'trustyai_ragas',\n",
        "│   │   │   'type': 'benchmark',\n",
-       "│   │   │   'dataset_id': 'ragas_demo_dataset',\n",
+       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
        "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
        "│   │   │   'metadata': {}\n",
        "│   │   },\n",
@@ -567,18 +437,18 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://3cf60cfd902b.ngrok-free.app',\n",
+       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
        "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='25f40e3e-1ac2-424f-ab76-58e8fa124c1a',\n",
+       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
        "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'b1e4951b-342c-4fb2-9e52-26d3b3df4b77'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -596,13 +466,13 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", @@ -618,11 +488,11 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3cf60cfd902b.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'25f40e3e-1ac2-424f-ab76-58e8fa124c1a'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] @@ -636,82 +506,8 @@ "# since we can't set the embedding model in the benchmark config,\n", "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n", "\n", - "remote_job = client.eval.run_eval(\n", - " benchmark_id=\"ragas_demo_benchmark__remote\",\n", - " benchmark_config={\n", - " \"eval_candidate\": {\n", - " \"type\": \"model\",\n", - " \"model\": \"ollama/granite3.3:2b\",\n", - " \"sampling_params\": {\"temperature\": 0.1, \"max_tokens\": 100},\n", - " },\n", - " \"scoring_params\": {},\n", - " # \"num_examples\": 1,\n", - " },\n", - ")\n", - "pprint(remote_job)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
Job(\n",
-       "job_id='5',\n",
-       "status='in_progress',\n",
-       "result=None,\n",
-       "eval_config={\n",
-       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
-       "│   │   'ragas_config': {\n",
-       "│   │   │   'batch_size': None,\n",
-       "│   │   │   'show_progress': True,\n",
-       "│   │   │   'raise_exceptions': True,\n",
-       "│   │   │   'experiment_name': None,\n",
-       "│   │   │   'column_map': None\n",
-       "│   │   }\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'5'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Review settings in distributinon/run.yaml, eg., note that\n", - "# since we can't set the embedding model in the benchmark config,\n", - "# the embedding model is set in the distribution run.yaml file(all-MiniLM-L6-v2)\n", - "\n", - "inline_job = client.eval.run_eval(\n", - " benchmark_id=\"ragas_demo_benchmark__inline\",\n", + "job = client.eval.run_eval(\n", + " benchmark_id=benchmark_id,\n", " benchmark_config={\n", " \"eval_candidate\": {\n", " \"type\": \"model\",\n", @@ -722,7 +518,7 @@ " # \"num_examples\": 1,\n", " },\n", ")\n", - "pprint(inline_job)" + "pprint(job)" ] }, { @@ -734,51 +530,117 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs/5 \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='5',\n",
+       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
        "status='in_progress',\n",
-       "result=None,\n",
-       "eval_config={\n",
-       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
+       "runtime_config={\n",
+       "│   │   'benchmark_config': {\n",
+       "│   │   │   'eval_candidate': {\n",
+       "│   │   │   │   'type': 'model',\n",
+       "│   │   │   │   'model': 'ollama/granite3.3:2b',\n",
+       "│   │   │   │   'sampling_params': {\n",
+       "│   │   │   │   │   'strategy': {'type': 'greedy'},\n",
+       "│   │   │   │   │   'max_tokens': 100,\n",
+       "│   │   │   │   │   'repetition_penalty': 1.0,\n",
+       "│   │   │   │   │   'stop': None\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   'system_message': None\n",
+       "│   │   │   },\n",
+       "│   │   │   'scoring_params': {},\n",
+       "│   │   │   'num_examples': None\n",
+       "│   │   },\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
+       "│   │   'benchmark': {\n",
+       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_id': 'trustyai_ragas',\n",
+       "│   │   │   'type': 'benchmark',\n",
+       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
+       "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
+       "│   │   │   'metadata': {}\n",
+       "│   │   },\n",
        "│   │   'ragas_config': {\n",
        "│   │   │   'batch_size': None,\n",
        "│   │   │   'show_progress': True,\n",
        "│   │   │   'raise_exceptions': True,\n",
        "│   │   │   'experiment_name': None,\n",
        "│   │   │   'column_map': None\n",
+       "│   │   },\n",
+       "│   │   'kubeflow_config': {\n",
+       "│   │   │   'results_s3_prefix': 's3://public-rhods/ragas-evaluation-pipeline/test-two',\n",
+       "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
+       "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
+       "│   │   │   'namespace': 'ragas-eval-v3',\n",
+       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
+       "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
-       "}\n",
+       "},\n",
+       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
+       "result=None\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'5'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33meval_config\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'eval_candidate'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'model'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'model'\u001b[0m: \u001b[32m'ollama/granite3.3:2b'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'sampling_params'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'strategy'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'greedy'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'max_tokens'\u001b[0m: \u001b[1;36m100\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'repetition_penalty'\u001b[0m: \u001b[1;36m1.0\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'stop'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'system_message'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'ragas_config'\u001b[0m: \u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'batch_size'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'show_progress'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'raise_exceptions'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'experiment_name'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'column_map'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'kubeflow_config'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'results_s3_prefix'\u001b[0m: \u001b[32m's3://public-rhods/ragas-evaluation-pipeline/test-two'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -787,12 +649,8 @@ } ], "source": [ - "# wait a bit for the job to complete\n", - "pprint(\n", - " client.eval.jobs.status(\n", - " benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n", - " )\n", - ")" + "job = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id)\n", + "pprint(job)" ] }, { @@ -804,15 +662,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs/b1e4951b-342c-4fb2-9e52-26d3b3df4b77 \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e \"HTTP/1.1 200 OK\"\n" ] }, { "data": { "text/html": [ "
Job(\n",
-       "job_id='b1e4951b-342c-4fb2-9e52-26d3b3df4b77',\n",
-       "status='in_progress',\n",
+       "job_id='afc31c92-5887-4b84-933c-009e8ae0594e',\n",
+       "status='completed',\n",
        "runtime_config={\n",
        "│   │   'benchmark_config': {\n",
        "│   │   │   'eval_candidate': {\n",
@@ -829,13 +687,13 @@
        "│   │   │   'scoring_params': {},\n",
        "│   │   │   'num_examples': None\n",
        "│   │   },\n",
-       "│   │   'embedding_model': 'ollama/all-minilm:l6-v2',\n",
+       "│   │   'embedding_model': 'all-MiniLM-L6-v2',\n",
        "│   │   'benchmark': {\n",
-       "│   │   │   'identifier': 'ragas_demo_benchmark__remote',\n",
-       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark__remote',\n",
-       "│   │   │   'provider_id': 'trustyai_ragas_remote',\n",
+       "│   │   │   'identifier': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_resource_id': 'ragas_demo_benchmark_remote',\n",
+       "│   │   │   'provider_id': 'trustyai_ragas',\n",
        "│   │   │   'type': 'benchmark',\n",
-       "│   │   │   'dataset_id': 'ragas_demo_dataset',\n",
+       "│   │   │   'dataset_id': 'ragas_demo_dataset_remote',\n",
        "│   │   │   'scoring_functions': ['answer_relevancy'],\n",
        "│   │   │   'metadata': {}\n",
        "│   │   },\n",
@@ -851,19 +709,59 @@
        "│   │   │   's3_credentials_secret_name': 'aws-credentials',\n",
        "│   │   │   'pipelines_endpoint': 'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com',\n",
        "│   │   │   'namespace': 'ragas-eval-v3',\n",
-       "│   │   │   'llama_stack_url': 'https://3cf60cfd902b.ngrok-free.app',\n",
+       "│   │   │   'llama_stack_url': 'https://3e112b480059.ngrok-free.app',\n",
        "│   │   │   'base_image': 'quay.io/diegosquayorg/my-ragas-provider-image:latest'\n",
        "│   │   }\n",
        "},\n",
-       "kubeflow_run_id='25f40e3e-1ac2-424f-ab76-58e8fa124c1a',\n",
-       "result=None\n",
+       "kubeflow_run_id='738a0fcf-8780-4a9d-b661-0874de18b96c',\n",
+       "result={\n",
+       "│   │   'generations': [\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is the capital of France?',\n",
+       "│   │   │   │   'response': 'The capital of France is Paris.',\n",
+       "│   │   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
+       "│   │   │   │   'reference': 'Paris'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'Who invented the telephone?',\n",
+       "│   │   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'reference': 'Alexander Graham Bell'\n",
+       "│   │   │   },\n",
+       "│   │   │   {\n",
+       "│   │   │   │   'user_input': 'What is photosynthesis?',\n",
+       "│   │   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
+       "│   │   │   │   'retrieved_contexts': [\n",
+       "│   │   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
+       "│   │   │   }\n",
+       "│   │   ],\n",
+       "│   │   'scores': {\n",
+       "│   │   │   'answer_relevancy': {\n",
+       "│   │   │   │   'score_rows': [\n",
+       "│   │   │   │   │   {'score': 0.9567410688},\n",
+       "│   │   │   │   │   {'score': 0.9262221944000001},\n",
+       "│   │   │   │   │   {'score': 0.8774825363000001}\n",
+       "│   │   │   │   ],\n",
+       "│   │   │   │   'aggregated_results': {\n",
+       "│   │   │   │   │   'average': 0.9201485998333334,\n",
+       "│   │   │   │   │   'count': 3,\n",
+       "│   │   │   │   │   'min': 0.8774825363000001,\n",
+       "│   │   │   │   │   'max': 0.9567410688\n",
+       "│   │   │   │   }\n",
+       "│   │   │   }\n",
+       "│   │   }\n",
+       "}\n",
        ")\n",
        "
\n" ], "text/plain": [ "\u001b[1;35mJob\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'b1e4951b-342c-4fb2-9e52-26d3b3df4b77'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'in_progress'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mjob_id\u001b[0m=\u001b[32m'afc31c92-5887-4b84-933c-009e8ae0594e'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mstatus\u001b[0m=\u001b[32m'completed'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mruntime_config\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark_config'\u001b[0m: \u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'eval_candidate'\u001b[0m: \u001b[1m{\u001b[0m\n", @@ -880,13 +778,13 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_params'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'num_examples'\u001b[0m: \u001b[3;35mNone\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'ollama/all-minilm:l6-v2'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'embedding_model'\u001b[0m: \u001b[32m'all-MiniLM-L6-v2'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'benchmark'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark__remote'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'identifier'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_resource_id'\u001b[0m: \u001b[32m'ragas_demo_benchmark_remote'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'provider_id'\u001b[0m: \u001b[32m'trustyai_ragas'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'benchmark'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dataset_id'\u001b[0m: \u001b[32m'ragas_demo_dataset_remote'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'scoring_functions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", @@ -902,12 +800,52 @@ "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m's3_credentials_secret_name'\u001b[0m: \u001b[32m'aws-credentials'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'pipelines_endpoint'\u001b[0m: \u001b[32m'https://ds-pipeline-dspa-ragas-eval-v3.apps.rosa.diego-ragas.zch5.p3.openshiftapps.com'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'namespace'\u001b[0m: \u001b[32m'ragas-eval-v3'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3cf60cfd902b.ngrok-free.app'\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'llama_stack_url'\u001b[0m: \u001b[32m'https://3e112b480059.ngrok-free.app'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'base_image'\u001b[0m: \u001b[32m'quay.io/diegosquayorg/my-ragas-provider-image:latest'\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'25f40e3e-1ac2-424f-ab76-58e8fa124c1a'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[3;35mNone\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mkubeflow_run_id\u001b[0m=\u001b[32m'738a0fcf-8780-4a9d-b661-0874de18b96c'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[33mresult\u001b[0m=\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'generations'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'scores'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'score_rows'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944000001\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'aggregated_results'\u001b[0m: \u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9201485998333334\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, @@ -917,23 +855,19 @@ ], "source": [ "# wait a bit for the job to complete\n", - "pprint(\n", - " client.eval.jobs.status(\n", - " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", - " )\n", - ")" + "pprint(client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job.job_id))" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__remote/jobs/b1e4951b-342c-4fb2-9e52-26d3b3df4b77/result \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: GET https://3e112b480059.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark_remote/jobs/afc31c92-5887-4b84-933c-009e8ae0594e/result \"HTTP/1.1 200 OK\"\n" ] }, { @@ -967,12 +901,12 @@ "scores={\n", "│ │ 'answer_relevancy': ScoringResult(\n", "│ │ │ aggregated_results={\n", - "│ │ │ │ 'average': 0.9424462732333333,\n", + "│ │ │ │ 'average': 0.9201485998333334,\n", "│ │ │ │ 'count': 3.0,\n", - "│ │ │ │ 'min': 0.9239711678,\n", - "│ │ │ │ 'max': 0.962092759\n", + "│ │ │ │ 'min': 0.8774825363000001,\n", + "│ │ │ │ 'max': 0.9567410688\n", "│ │ │ },\n", - "│ │ │ score_rows=[{'score': 0.962092759}, {'score': 0.9412748929}, {'score': 0.9239711678}]\n", + "│ │ │ score_rows=[{'score': 0.9567410688}, {'score': 0.9262221944000001}, {'score': 0.8774825363000001}]\n", "│ │ )\n", "}\n", ")\n", @@ -1007,12 +941,12 @@ "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9424462732333333\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'average'\u001b[0m: \u001b[1;36m0.9201485998333334\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'count'\u001b[0m: \u001b[1;36m3.0\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.9239711678\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.962092759\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'min'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'max'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.962092759\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9412748929\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9239711678\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9567410688\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9262221944000001\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.8774825363000001\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", "\u001b[1m)\u001b[0m\n" @@ -1023,207 +957,9 @@ } ], "source": [ - "remote_results = client.eval.jobs.retrieve(\n", - " benchmark_id=\"ragas_demo_benchmark__remote\", job_id=remote_job.job_id\n", - ")\n", - "pprint(remote_results)" + "results = client.eval.jobs.retrieve(benchmark_id=benchmark_id, job_id=job.job_id)\n", + "pprint(results)" ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: GET https://3cf60cfd902b.ngrok-free.app/v1/eval/benchmarks/ragas_demo_benchmark__inline/jobs/5/result \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "data": { - "text/html": [ - "
EvaluateResponse(\n",
-       "generations=[\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is the capital of France?',\n",
-       "│   │   │   'retrieved_contexts': ['Paris is the capital and most populous city of France.'],\n",
-       "│   │   │   'response': 'The capital of France is Paris.',\n",
-       "│   │   │   'reference': 'Paris'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'Who invented the telephone?',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'response': 'Alexander Graham Bell invented the telephone in 1876.',\n",
-       "│   │   │   'reference': 'Alexander Graham Bell'\n",
-       "│   │   },\n",
-       "│   │   {\n",
-       "│   │   │   'user_input': 'What is photosynthesis?',\n",
-       "│   │   │   'retrieved_contexts': [\n",
-       "│   │   │   │   'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\n",
-       "│   │   │   ],\n",
-       "│   │   │   'response': 'Photosynthesis is the process by which plants convert sunlight into energy.',\n",
-       "│   │   │   'reference': 'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\n",
-       "│   │   }\n",
-       "],\n",
-       "scores={\n",
-       "│   │   'answer_relevancy': ScoringResult(\n",
-       "│   │   │   aggregated_results={'answer_relevancy': 0.9401391281654831},\n",
-       "│   │   │   score_rows=[\n",
-       "│   │   │   │   {'score': 0.9873642530041437},\n",
-       "│   │   │   │   {'score': 0.913605429314886},\n",
-       "│   │   │   │   {'score': 0.9194477021774197}\n",
-       "│   │   │   ]\n",
-       "│   │   )\n",
-       "}\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is the capital of France?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'Paris is the capital and most populous city of France.'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'The capital of France is Paris.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Paris'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'Who invented the telephone?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Alexander Graham Bell was a Scottish-American inventor who patented the first practical telephone.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Alexander Graham Bell invented the telephone in 1876.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Alexander Graham Bell'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'user_input'\u001b[0m: \u001b[32m'What is photosynthesis?'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'retrieved_contexts'\u001b[0m: \u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[32m'Photosynthesis is a process used by plants to convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'response'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants convert sunlight into energy.'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'reference'\u001b[0m: \u001b[32m'Photosynthesis is the process by which plants and other organisms convert light energy into chemical energy.'\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'answer_relevancy'\u001b[0m: \u001b[1;36m0.9401391281654831\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9873642530041437\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.913605429314886\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.9194477021774197\u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "inline_results = client.eval.jobs.retrieve(\n", - " benchmark_id=\"ragas_demo_benchmark__inline\", job_id=inline_job.job_id\n", - ")\n", - "pprint(inline_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inline vs Remote Side-by-side" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
inlineremotediff
00.9873640.962093-0.025271
10.9136050.9412750.027669
20.9194480.9239710.004523
\n", - "
" - ], - "text/plain": [ - " inline remote diff\n", - "0 0.987364 0.962093 -0.025271\n", - "1 0.913605 0.941275 0.027669\n", - "2 0.919448 0.923971 0.004523" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(\n", - " {\n", - " \"inline\": [\n", - " r[\"score\"] for r in inline_results.scores[\"answer_relevancy\"].score_rows\n", - " ],\n", - " \"remote\": [\n", - " r[\"score\"] for r in remote_results.scores[\"answer_relevancy\"].score_rows\n", - " ],\n", - " },\n", - ").assign(diff=lambda df: df[\"remote\"] - df[\"inline\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1242,7 +978,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.7" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/distribution/run-inline.yaml b/distribution/run-inline.yaml new file mode 100644 index 00000000..7a40e2de --- /dev/null +++ b/distribution/run-inline.yaml @@ -0,0 +1,61 @@ +version: "2" +image_name: trustyai_ragas_distro_inline +apis: + - eval + - inference + - telemetry + - datasetio + - files + - benchmarks +providers: + eval: + - provider_id: trustyai_ragas + provider_type: inline::trustyai_ragas + module: llama_stack_provider_ragas.inline + config: + embedding_model: ${env.EMBEDDING_MODEL} + datasetio: + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline}/localfs_datasetio.db + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/trustyai_ragas_distro_inline/files} + metadata_store: + type: sqlite + db_path: ${env.METADATA_STORE_DB_PATH:=~/.llama/distributions/trustyai_ragas_distro_inline}/registry.db} + +models: + - metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding + - metadata: {} + model_id: granite3.3:2b + provider_id: ollama + provider_model_id: granite3.3:2b + model_type: llm +server: + host: localhost + port: 8321 diff --git a/distribution/run.yaml b/distribution/run-remote.yaml similarity index 86% rename from distribution/run.yaml rename to distribution/run-remote.yaml index d13d4ce9..bbdaaaa3 100644 --- a/distribution/run.yaml +++ b/distribution/run-remote.yaml @@ -9,8 +9,8 @@ apis: - datasetio providers: eval: - - provider_id: trustyai_ragas_remote - provider_type: remote::trustyai_ragas_remote + - provider_id: trustyai_ragas + provider_type: remote::trustyai_ragas module: llama_stack_provider_ragas config: embedding_model: ${env.EMBEDDING_MODEL} @@ -20,12 +20,7 @@ providers: pipelines_endpoint: ${env.KUBEFLOW_PIPELINES_ENDPOINT} namespace: ${env.KUBEFLOW_NAMESPACE} llama_stack_url: ${env.KUBEFLOW_LLAMA_STACK_URL} - base_image: ${env.KUBEFLOW_BASE_IMAGE:=} - - provider_id: trustyai_ragas_inline - provider_type: inline::trustyai_ragas_inline - module: llama_stack_provider_ragas.inline - config: - embedding_model: ${env.EMBEDDING_MODEL} + base_image: ${env.KUBEFLOW_BASE_IMAGE} datasetio: - provider_id: localfs provider_type: inline::localfs From 5a055bb45d0a06d1e9fb989e3f5bfaa3c4130b6c Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:24:11 -0400 Subject: [PATCH 18/22] revert changes to constants.py --- src/llama_stack_provider_ragas/constants.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llama_stack_provider_ragas/constants.py b/src/llama_stack_provider_ragas/constants.py index 35b9ea23..800166d2 100644 --- a/src/llama_stack_provider_ragas/constants.py +++ b/src/llama_stack_provider_ragas/constants.py @@ -5,9 +5,6 @@ faithfulness, ) -PROVIDER_ID_INLINE = "trustyai_ragas_inline" -PROVIDER_ID_REMOTE = "trustyai_ragas_remote" - METRIC_MAPPING = { metric_func.name: metric_func for metric_func in [ @@ -23,6 +20,7 @@ # "rouge_score": RougeScore(), ] } + AVAILABLE_METRICS = list(METRIC_MAPPING.keys()) # Kubeflow ConfigMap keys and defaults for base image resolution From 0d14b2af958b791cb20dd869f49e50ae4bfa0f63 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:24:31 -0400 Subject: [PATCH 19/22] revert doc changes. --- README.md | 40 +++++++++----------- docs/modules/ROOT/pages/index.adoc | 4 +- docs/modules/ROOT/pages/inline-provider.adoc | 4 +- docs/modules/ROOT/pages/remote-provider.adoc | 6 +-- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 2642691e..9ba17cb6 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ This repository implements [Ragas](https://github.com/explodinggradients/ragas) The goal is to provide all of Ragas' evaluation functionality over Llama Stack's eval API, while leveraging the Llama Stack's built-in APIs for inference (llms and embeddings), datasets, and benchmarks. There are two versions of the provider: -- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation. -- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`. +- `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. +- `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. ## Prerequisites - Python 3.12 @@ -41,29 +41,12 @@ There are two versions of the provider: ``` - The sample LS distributions (one for inline and one for remote provider) is a simple LS distribution that uses Ollama for inference and embeddings. See the provider-specific sections below for setup and run commands. -### Inline provider (default with base installation) - -Create a `.env` file with the required environment variable: -```bash -EMBEDDING_MODEL=ollama/all-minilm:l6-v2 -``` - -Run the server: -```bash -dotenv run uv run llama stack run distribution/run.yaml -``` - -### Remote provider (requires optional dependencies) - -First install the remote dependencies: -```bash -uv pip install -e ".[remote]" -``` +### Remote provider (default) Create a `.env` file with the following: ```bash # Required for both inline and remote -EMBEDDING_MODEL=ollama/all-minilm:l6-v2 +EMBEDDING_MODEL=all-MiniLM-L6-v2 # Required for remote provider KUBEFLOW_LLAMA_STACK_URL= @@ -90,9 +73,22 @@ Where: Run the server: ```bash -dotenv run uv run llama stack run distribution/run.yaml +dotenv run uv run llama stack run distribution/run-remote.yaml +``` + +### Inline provider (need to specify `.inline` in the module name) + +Create a `.env` file with the required environment variable: +```bash +EMBEDDING_MODEL=all-MiniLM-L6-v2 +``` + +Run the server: +```bash +dotenv run uv run llama stack run distribution/run-inline.yaml ``` +You will notice that `run-inline.yaml` file has the module name as `llama_stack_provider_ragas.inline`, in order to specify the inline provider. ## Usage See the demos in the `demos` directory. diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index ffbe99e1..32baa94a 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -15,8 +15,8 @@ The goal is to provide all of Ragas' evaluation functionality over Llama Stack's There are two versions of the provider: -* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. This is always available with the base installation. -* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. Only available when remote dependencies are installed with `pip install llama-stack-provider-ragas[remote]`. +* `remote`: runs the Ragas evaluation in a remote process, using Kubeflow Pipelines. This is the *default* when using the module-based import. +* `inline`: runs the Ragas evaluation in the same process as the Llama Stack server. == Getting Started diff --git a/docs/modules/ROOT/pages/inline-provider.adoc b/docs/modules/ROOT/pages/inline-provider.adoc index 49a8bcb8..6ed27b16 100644 --- a/docs/modules/ROOT/pages/inline-provider.adoc +++ b/docs/modules/ROOT/pages/inline-provider.adoc @@ -139,8 +139,8 @@ The inline provider is setup in the following lines of the `run-inline.yaml`: [,yaml] ---- eval: - - provider_id: trustyai_ragas_inline - provider_type: inline::trustyai_ragas_inline + - provider_id: trustyai_ragas + provider_type: inline::trustyai_ragas module: llama_stack_provider_ragas.inline config: embedding_model: ${env.EMBEDDING_MODEL} diff --git a/docs/modules/ROOT/pages/remote-provider.adoc b/docs/modules/ROOT/pages/remote-provider.adoc index 93e1d56e..f9f98cd8 100644 --- a/docs/modules/ROOT/pages/remote-provider.adoc +++ b/docs/modules/ROOT/pages/remote-provider.adoc @@ -195,9 +195,9 @@ The remote provider is setup in the following lines of the `run-remote.yaml`: [,yaml] ---- eval: - - provider_id: trustyai_ragas_remote - provider_type: remote::trustyai_ragas_remote - module: llama_stack_provider_ragas.remote + - provider_id: trustyai_ragas + provider_type: remote::trustyai_ragas + module: llama_stack_provider_ragas.remote # can also just be llama_stack_provider_ragas and it will default to remote config: embedding_model: ${env.EMBEDDING_MODEL} kubeflow_config: From 6982453f7c6f1df656832147688780c02b6f3e8d Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:40:32 -0400 Subject: [PATCH 20/22] part of revert changes to constants.py. --- tests/test_inline_evaluation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_inline_evaluation.py b/tests/test_inline_evaluation.py index 1aea4580..aa66dcbd 100644 --- a/tests/test_inline_evaluation.py +++ b/tests/test_inline_evaluation.py @@ -3,8 +3,6 @@ import pytest from ragas.metrics import answer_relevancy -from llama_stack_provider_ragas.constants import PROVIDER_ID_INLINE - # mark as integration, see tool.pytest.ini_options in pyproject.toml pytestmark = pytest.mark.integration_test @@ -36,7 +34,7 @@ def test_single_metric_evaluation( benchmark_id=benchmark_id, dataset_id=dataset_id, scoring_functions=[metric_to_test.name], - provider_id=PROVIDER_ID_INLINE, + provider_id="trustyai_ragas", ) job = lls_client.eval.run_eval( From ab6253295e7b2b680c5932af1017e76d0cbf5220 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Wed, 15 Oct 2025 11:55:04 -0400 Subject: [PATCH 21/22] version bump. --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 589e8302..26c7ec57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "llama-stack-provider-ragas" -version = "0.3.4" +version = "0.4.0" description = "Ragas evaluation as an out-of-tree Llama Stack provider" readme = "README.md" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index 1cad8b5c..d33a0db1 100644 --- a/uv.lock +++ b/uv.lock @@ -1463,7 +1463,7 @@ wheels = [ [[package]] name = "llama-stack-provider-ragas" -version = "0.3.4" +version = "0.4.0" source = { editable = "." } dependencies = [ { name = "datasets" }, From 208c880faad461030555da499384e214e9935f88 Mon Sep 17 00:00:00 2001 From: Diego Maniloff Date: Thu, 16 Oct 2025 12:36:17 -0400 Subject: [PATCH 22/22] Update llama-stack dependency to allow versions >=0.2.23 in pyproject.toml and uv.lock --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 26c7ec57..31a1b244 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ authors = [ keywords = ["llama-stack", "ragas", "evaluation"] dependencies = [ "setuptools-scm", - "llama-stack==0.2.23", + "llama-stack>=0.2.23", "greenlet==3.2.4", # inline/files/localfs errors saying greenlet not found "ragas==0.3.0", "pandas<2.3.0", diff --git a/uv.lock b/uv.lock index d33a0db1..f57b0c2e 100644 --- a/uv.lock +++ b/uv.lock @@ -1519,7 +1519,7 @@ requires-dist = [ { name = "kfp", marker = "extra == 'remote'", specifier = ">=2.5.0" }, { name = "kfp-kubernetes", marker = "extra == 'remote'", specifier = ">=2.0.0" }, { name = "kubernetes", marker = "extra == 'remote'", specifier = ">=30.0.0" }, - { name = "llama-stack", specifier = "==0.2.23" }, + { name = "llama-stack", specifier = ">=0.2.23" }, { name = "llama-stack-provider-ragas", extras = ["distro"], marker = "extra == 'dev'" }, { name = "llama-stack-provider-ragas", extras = ["remote"], marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" },