update readme

mike0sv · mike0sv · commit 72d6aa8820c8 · 2025-06-18T19:23:43.000+01:00
diff --git a/.github/workflows/cloud.yml b/.github/workflows/cloud.yml
@@ -1,9 +1,9 @@
 name: Cloud Evidently Check
 on:
   push:
-    branches: [main]
+    branches: [ main ]
   pull_request:
-    branches: [main]
+    branches: [ main ]
     types:
       - opened
       - reopened
diff --git a/.github/workflows/local.yml b/.github/workflows/local.yml
@@ -1,9 +1,9 @@
 name: Local Evidently Check
 on:
   push:
-    branches: [main]
+    branches: [ main ]
   pull_request:
-    branches: [main]
+    branches: [ main ]
     types:
       - opened
       - reopened
diff --git a/README.md b/README.md
@@ -64,7 +64,6 @@ my-llm-agent/
 │       └── evidently.yml
 ├── src/
 │   ├── my_agent.py
-│   ├── run_agent.py
 │   └── evidently_config.py
 ├── requirements.txt
 └── README.md
@@ -114,45 +113,72 @@ Create `src/my_agent.py`:
 ```python
 # src/my_agent.py
 
-```
-
-(*👉 Replace the function logic with your actual agent code later.*)
-
----
+from agents import Agent, WebSearchTool, Runner
 
-## 📑 Step 4: Write the Agent Runner
+my_agent = Agent(
+    name="Assistant",
+    instructions="You are a helpful assistant",
+    model="gpt-4.1",
+    tools=[
+        WebSearchTool(),
+    ],
+)
 
-We also need a way to run your custom agent code on test dataset. See *this section* if you want to do it via custom descriptor. 
 
-Create `src/run_agent.py`.
-This will load the dataset from cloud, run your agent on each row, and save the results locally for evidently action to use.
+def answer(question: str) -> str:
+    response = Runner.run_sync(my_agent, question)
+    return response.final_output
 
-```python
-# src/run_agent.py
 
 
 ```
 
+(*👉 Replace the function logic with your actual agent code later.*)
+
 ---
 
-## 📑 Step 5: Define Evidently Config
+## 📑 Step 4: Define Evidently Config
 
 Create `src/evidently_config.py`.
 This config defines:
 
+* A descriptor to run your agent
 * A descriptor to check your agent’s answers
 * Optionally a test summary metric
 
 ```python
 # src/evidently_config.py
 
+from evidently import ColumnType
+from evidently.cli.report import ReportConfig
+from evidently.core.datasets import DatasetColumn
+from evidently.metrics import MinValue
+from evidently.tests import gte, eq
+from evidently.descriptors import NegativityLLMEval, WordCount, CustomColumnDescriptor
+from src.my_agent import answer
+
+
+def answer_descriptor(col: DatasetColumn) -> DatasetColumn:
+    return DatasetColumn(ColumnType.Text, col.data.apply(answer))
+
+
+descr_conf = ReportConfig(descriptors=[
+    CustomColumnDescriptor("question", answer_descriptor, alias="answer"),
+    NegativityLLMEval("answer", provider="openai", model="gpt-4o-mini", alias="answer_negativity",
+                      tests=[eq("POSITIVE", column="answer_negativity")]),
+    WordCount("answer", alias="answer_word_count"),
+],
+    metrics=[MinValue(column="answer_word_count", tests=[gte(2)])]
+)
+
+
 ```
 
 (*👉 Adjust descriptor params to your needs later.*)
 
 ---
 
-## 📦 Step 6: Define Dependencies
+## 📦 Step 5: Define Dependencies
 
 Create `requirements.txt`:
 
@@ -163,25 +189,60 @@ openai-agents
 
 ---
 
-## ⚙️ Step 7: Create GitHub Actions Workflow
+## ⚙️ Step 6: Create GitHub Actions Workflow
 
 Create `.github/workflows/evidently.yml`:
 
 ```yaml
+name: Cloud Evidently Check
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  statuses: write
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  evidently-check:
+    name: Evidently Report
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+          cache-dependency-path: requirements.txt
+      - name: "Install requirements"
+        run: pip install -r requirements.txt
+      - uses: evidentlyai/evidently-report-action@main
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        with:
+          config_path: src/evidently_config.py
+          api_key: ${{ secrets.EVIDENTLY_API_KEY }}
+          input_path: cloud://<dataset_id>
+          output: cloud://<project_id>
 
 ```
 
 **Notes:**
-
-* Replace `YOUR_DATASET_ID` with the dataset ID from Evidently Cloud
-* This workflow:
-
-  * Downloads the base test dataset
-  * Runs your agent to produce answers
-  * Runs Evidently descriptors and metrics
-  * Uploads results to Evidently Cloud
-  * Uploads artifacts with report files
-  * Fails CI if any test fails
+Replace `YOUR_DATASET_ID` with the dataset ID from Evidently Cloud
 
 ---
 
@@ -196,13 +257,10 @@ Every time you push a commit or open a PR:
 * Fail workflow if test fails
 * Attach a link to the report in GitHub Check Annotations
 
----
-
-Excellent — here’s the **Local run** tutorial section in the same clean format. I’ll start with a short intro describing the difference from the Cloud run.
 
 ---
 
-## 📦 Tutorial: Continuous LLM Agent Evaluation with Evidently Local Run
+## 📦 Continuous LLM Agent Evaluation with Evidently Local Run
 
 In this tutorial, you’ll set up a GitHub Actions workflow to evaluate your LLM agent locally using Evidently’s CLI inside CI, without using Evidently Cloud.
 
@@ -251,39 +309,70 @@ question
 
 ---
 
-## 📑 Step 2: Use the Same Agent Code and Runner
+## 📑 Step 2: Use the Same Agent Code and Config
 
 Re-use the same:
 
 * `src/my_agent.py`
 * `src/run_agent.py`
+* `src/evidently_config.py`
+* `requirements.txt`
 
 No changes needed.
 
 ---
 
-## 📑 Step 3: Use the Same Evidently Config
-
-Re-use the same `src/evidently_config.py` from the cloud tutorial.
-
-No changes needed.
-
----
-
-## 📦 Step 4: Use the Same Dependencies
-
-Re-use the same `requirements.txt`.
-
-No changes needed.
-
----
-
-## ⚙️ Step 5: Create Local-Mode GitHub Actions Workflow
+## ⚙️ Step 3: Create Local-Mode GitHub Actions Workflow
 
 Create `.github/workflows/evidently.yml`:
 
 ```yaml
 
+name: Local Evidently Check
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  statuses: write
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  evidently-check:
+    name: Evidently Report
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+          cache-dependency-path: requirements.txt
+      - name: "Install requirements"
+        run: pip install -r requirements.txt
+      - uses: evidentlyai/evidently-report-action@main
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        with:
+          config_path: src/evidently_config.py
+          input_path: data/test_questions.csv
+          output: report.json
+          upload_artifacts: 'true'
+
 ```
 
 **Notes:**
@@ -308,24 +397,6 @@ On every commit:
 * Fails workflow if test fails
 
 
----
-
-## 🔥 Advanced: Custom Descriptor for Generating Responses
-
-If you'd like to integrate inference directly as an Evidently Descriptor:
-
-```python
-
-```
-
-Add it to config:
-
-```python
-
-```
-
-This runs your agent inference as part of the Evidently workflow inside CI.
-
 ---
 
 
@@ -340,5 +411,3 @@ This runs your agent inference as part of the Evidently workflow inside CI.
 ## ✅ Summary
 
 This setup makes it possible to automatically check that changes to your LLM agent’s prompt or logic still produce acceptable, positive results on a test set — in CI, using either local files or integrated Evidently Cloud datasets and storage.
-
-Would you like me to wrap this into a clean `.md` file now? I can draft it immediately.
diff --git a/src/evidently_config.py b/src/evidently_config.py
@@ -1,13 +1,21 @@
+from evidently import ColumnType
 from evidently.cli.report import ReportConfig
+from evidently.core.datasets import DatasetColumn
 from evidently.metrics import MinValue
 from evidently.tests import gte, eq
 from evidently.descriptors import NegativityLLMEval, WordCount, CustomColumnDescriptor
-from src.my_agent import answer_descriptor
+from src.my_agent import answer
+
+
+def answer_descriptor(col: DatasetColumn) -> DatasetColumn:
+    return DatasetColumn(ColumnType.Text, col.data.apply(answer))
+
 
 descr_conf = ReportConfig(descriptors=[
-   CustomColumnDescriptor("question", answer_descriptor, alias="answer"),
-   NegativityLLMEval("answer", provider="openai", model="gpt-4o-mini", alias="answer_negativity", tests=[eq("POSITIVE", column="answer_negativity")]),
-   WordCount("answer", alias="answer_word_count"),
+    CustomColumnDescriptor("question", answer_descriptor, alias="answer"),
+    NegativityLLMEval("answer", provider="openai", model="gpt-4o-mini", alias="answer_negativity",
+                      tests=[eq("POSITIVE", column="answer_negativity")]),
+    WordCount("answer", alias="answer_word_count"),
 ],
     metrics=[MinValue(column="answer_word_count", tests=[gte(2)])]
 )
diff --git a/src/my_agent.py b/src/my_agent.py
@@ -1,8 +1,5 @@
 from agents import Agent, WebSearchTool, Runner
 
-from evidently import ColumnType
-from evidently.core.datasets import DatasetColumn
-
 my_agent = Agent(
     name="Assistant",
     instructions="You are a helpful assistant",
@@ -17,5 +14,3 @@ def answer(question: str) -> str:
     response = Runner.run_sync(my_agent, question)
     return response.final_output
 
-def answer_descriptor(col: DatasetColumn) -> DatasetColumn:
-    return DatasetColumn(ColumnType.Text, col.data.apply(answer))