Skip to content

Commit a1b219f

Browse files
chore: merge upstream main and resolve conflicts
2 parents 1d85b51 + 805cd56 commit a1b219f

919 files changed

Lines changed: 59306 additions & 32346 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.cursor-plugin/plugin.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"name": "deepeval",
3+
"displayName": "DeepEval",
4+
"version": "1.0.0",
5+
"description": "Skills for adding DeepEval evaluations, tracing, datasets, Confident AI reports, and iterative improvement loops to AI applications.",
6+
"author": {
7+
"name": "Confident AI",
8+
"email": "founders@confident-ai.com"
9+
},
10+
"homepage": "https://deepeval.com",
11+
"repository": "https://github.com/confident-ai/deepeval",
12+
"license": "Apache-2.0",
13+
"keywords": [
14+
"deepeval",
15+
"llm",
16+
"evaluation",
17+
"tracing",
18+
"datasets",
19+
"confident-ai"
20+
],
21+
"category": "developer-tools",
22+
"skills": "./skills/"
23+
}

.github/workflows/changelog.yml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
name: Generate Changelog
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
mode:
7+
description: "Mode: year or range"
8+
required: true
9+
default: "year"
10+
year:
11+
description: "Year (e.g. 2025)"
12+
required: false
13+
from_tag:
14+
description: "From tag (e.g. v3.7.0)"
15+
required: false
16+
to_tag:
17+
description: "To tag (e.g. v3.9.0)"
18+
required: false
19+
20+
jobs:
21+
changelog:
22+
runs-on: ubuntu-latest
23+
24+
permissions:
25+
contents: write
26+
pull-requests: write
27+
28+
steps:
29+
- name: Checkout repo
30+
uses: actions/checkout@v4
31+
with:
32+
fetch-depth: 0
33+
34+
- name: Fetch tags
35+
run: git fetch --tags --force
36+
37+
- name: Set up Python
38+
uses: actions/setup-python@v5
39+
with:
40+
python-version: "3.11"
41+
42+
- name: Install dependencies
43+
run: |
44+
pip install rich pydantic deepeval
45+
46+
- name: Run changelog generator
47+
env:
48+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
50+
run: |
51+
if [ "${{ github.event.inputs.mode }}" = "year" ]; then
52+
python .scripts/changelog/generate.py \
53+
--year ${{ github.event.inputs.year }} \
54+
--github --ai
55+
else
56+
python .scripts/changelog/generate.py \
57+
--range ${{ github.event.inputs.from_tag }} ${{ github.event.inputs.to_tag }} \
58+
--github --ai
59+
fi
60+
61+
- name: Create PR
62+
uses: peter-evans/create-pull-request@v6
63+
with:
64+
branch: chore/changelog-update
65+
title: "chore: update changelog"
66+
commit-message: "chore: update changelog"
67+
body: "Auto-generated changelog updates"

.github/workflows/full_test_core_for_pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,5 @@ jobs:
102102
if: ${{ env.OPENAI_API_KEY != '' }}
103103
run: |
104104
poetry run pytest \
105-
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
105+
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/
106106

.github/workflows/test_core.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,15 @@ jobs:
8989
if: ${{ env.OPENAI_API_KEY != '' }}
9090
run: |
9191
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
92-
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
92+
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/
9393
9494
# Dev tests (no secrets)
9595
- name: Run dev tests (no secrets)
9696
if: ${{ env.OPENAI_API_KEY == '' }}
9797
run: |
98-
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ \
98+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/ \
9999
--ignore=tests/test_core/test_tracing/test_dataset_iterator.py \
100100
--ignore=tests/test_core/test_synthesizer/test_context_generator.py \
101-
--ignore=tests/test_core/test_synthesizer/test_conversation_simulator.py \
101+
--ignore=tests/test_core/test_simulator/test_conversation_simulator.py \
102102
--ignore=tests/test_core/test_synthesizer/test_generate_from_goldens.py \
103103
--ignore=tests/test_core/test_synthesizer/test_synthesizer.py

.github/workflows/test_integrations.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
- name: Install Dependencies
4545
run: |
4646
poetry install --no-interaction --no-root --only main
47-
poetry install --with langchain
47+
poetry run pip install -U langgraph langchain langchain-openai
4848
4949
- name: Install Project
5050
run: poetry install --no-interaction --only main

.gitignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ dist/
1414
downloads/
1515
eggs/
1616
.eggs/
17-
lib/
18-
lib64/
17+
/lib/
18+
/lib64/
1919
parts/
2020
sdist/
2121
var/
@@ -173,3 +173,6 @@ secrets
173173
# Mac OS system files
174174
**/.DS_Store
175175

176+
# Cursor IDE local config (rules, etc.)
177+
.cursor/
178+

.scripts/changelog/generate.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@
2525
from typing import Callable, Dict, Iterable, List, Optional, Tuple
2626
from pydantic import BaseModel, Field, field_validator
2727

28-
2928
#################
3029
# Configuration #
3130
#################
3231

3332
OWNER = "confident-ai"
3433
REPO = "deepeval"
3534

36-
START_MARKER = "<!-- DeepEval release notes start -->"
35+
START_MARKER = "{/* DeepEval release notes start */}"
36+
LEGACY_START_MARKER = "<!-- DeepEval release notes start -->"
3737

3838
CATEGORY_ORDER = [
3939
"Backward Incompatible Change",
@@ -149,21 +149,25 @@ class AiMonthSummary(BaseModel):
149149
# - Prefer the stable marker (lets humans edit the visible link/text)
150150
# - Fall back to parsing the link if the marker is missing
151151
BULLET_PR_RE = re.compile(r"\[#(\d+)\]\(")
152-
BULLET_PR_MARKER_RE = re.compile(r"<!--\s*pr:(\d+)\s*-->")
152+
BULLET_PR_MARKER_RE = re.compile(
153+
r"(?:<!--\s*pr:(\d+)\s*-->|\{/\*\s*pr:(\d+)\s*\*/\})"
154+
)
153155
BULLET_TAIL_RE = re.compile(
154-
r"\s*\(\[#\d+\]\([^)]+\)\)\s*<!--\s*pr:\d+\s*-->.*$"
156+
r"\s*\(\[#\d+\]\([^)]+\)\)\s*(?:<!--\s*pr:\d+\s*-->|\{/\*\s*pr:\d+\s*\*/\}).*$"
155157
)
156158

157159
# Optional ignore list to be placed right after START_MARKER to avoid confusing the parser:
158160
# add a list of PR numbers you would like to be excluded from the generated changelog.
159-
# <!-- changelog-ignore:
161+
# {/* changelog-ignore:
160162
# - 1234
161163
# - 5678
162-
# -->
164+
# */}
163165
IGNORE_BLOCK_TOP_RE = re.compile(
164-
r"(?is)^\s*<!--\s*changelog-ignore:.*?-->\s*\n*"
166+
r"(?is)^\s*(?:<!--\s*changelog-ignore:.*?-->|\{/\*\s*changelog-ignore:.*?\*/\})\s*\n*"
167+
)
168+
IGNORE_BLOCK_ANY_RE = re.compile(
169+
r"(?is)(?:<!--\s*changelog-ignore:(.*?)-->|\{/\*\s*changelog-ignore:(.*?)\*/\})"
165170
)
166-
IGNORE_BLOCK_ANY_RE = re.compile(r"(?is)<!--\s*changelog-ignore:(.*?)-->")
167171

168172
###############
169173
# Git helpers #
@@ -762,8 +766,16 @@ def _pull_top_ignore_block(s: str) -> Tuple[str, str]:
762766
rest = s2[matched.end() :]
763767
return ignore_block.rstrip("\n") + "\n", rest
764768

765-
if START_MARKER in text:
766-
before, _, after = text.partition(START_MARKER)
769+
marker_in_text = next(
770+
(
771+
marker
772+
for marker in (START_MARKER, LEGACY_START_MARKER)
773+
if marker in text
774+
),
775+
None,
776+
)
777+
if marker_in_text:
778+
before, _, after = text.partition(marker_in_text)
767779
ignore_block, rest = _pull_top_ignore_block(after)
768780
prefix = before.rstrip() + "\n\n" + START_MARKER + "\n"
769781
if ignore_block:
@@ -793,21 +805,21 @@ def _pull_top_ignore_block(s: str) -> Tuple[str, str]:
793805

794806
def parse_ignore_prs(text: str) -> set[int]:
795807
"""
796-
Parse PR numbers from one or more `<!-- changelog-ignore: ... -->` HTML comment blocks.
808+
Parse PR numbers from one or more changelog-ignore comment blocks.
797809
798810
Should be placed immediately after the `START_MARKER`, for example:
799811
800-
<!-- changelog-ignore:
812+
{/* changelog-ignore:
801813
- 1234
802814
- 5678
803-
-->
815+
*/}
804816
805817
Lines may contain comments which can be used to document why a PR is being ignored
806818
Any integers found in the block are treated as PR numbers.
807819
"""
808820
ignored: set[int] = set()
809821
for matched in IGNORE_BLOCK_ANY_RE.finditer(text):
810-
block = matched.group(1)
822+
block = next(group for group in matched.groups() if group is not None)
811823
for line in block.splitlines():
812824
line = line.strip()
813825
if not line or line.startswith("#"):
@@ -876,7 +888,7 @@ def parse_body(body: str) -> ChangelogIndex:
876888
)
877889
if not matched:
878890
continue
879-
pr = int(matched.group(1))
891+
pr = int(next(group for group in matched.groups() if group))
880892
idx[month][category][version][pr] = line.rstrip()
881893

882894
return idx
@@ -1144,7 +1156,7 @@ def _tick() -> None:
11441156
author = f" ({user_display})"
11451157
line = (
11461158
f"- {title_out} ([#{pr_num}](https://github.com/{OWNER}/{REPO}/pull/{pr_num})) "
1147-
f"<!-- pr:{pr_num} -->{author}"
1159+
f"{{/* pr:{pr_num} */}}{author}"
11481160
)
11491161
idx[month][category][tag][pr_num] = line
11501162
_status(f"[{tag}] PR #{pr_num}: done")

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ authors:
66
- family-names: Vongthongsri
77
given-names: Kritin
88
title: deepeval
9-
version: 3.8.9
10-
date-released: "2026-01-07"
9+
version: 3.9.9
10+
date-released: "2026-04-26"
1111
url: https://confident-ai.com
1212
repository-code: https://github.com/confident-ai/deepeval
1313
license: Apache-2.0

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<a href="#-metrics-and-features">Metrics and Features</a> |
2323
<a href="#-quickstart">Getting Started</a> |
2424
<a href="#-integrations">Integrations</a> |
25-
<a href="https://confident-ai.com?utm_source=GitHub">Confident AI</a>
25+
<a href="https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=header_nav">Confident AI</a>
2626
<p>
2727
</h4>
2828

@@ -58,7 +58,7 @@
5858
Whether you're building AI agents, RAG pipelines, or chatbots, implemented via LangChain or OpenAI, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your AI quality, prevent prompt drifting, or even transition from OpenAI to Claude with confidence.
5959

6060
> [!IMPORTANT]
61-
> Need a place for your DeepEval testing data to live 🏡❤️? [Sign up to the DeepEval platform](https://confident-ai.com?utm_source=GitHub) to compare iterations of your LLM app, generate & share testing reports, and more.
61+
> Need a place for your DeepEval testing data to live 🏡❤️? [Sign up to the DeepEval platform](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=signup_callout) to compare iterations of your LLM app, generate & share testing reports, and more.
6262
>
6363
> ![Demo GIF](assets/demo.gif)
6464
@@ -171,7 +171,7 @@ DeepEval plugs into any LLM framework — OpenAI Agents, LangChain, CrewAI, and
171171

172172
## ☁️ Platform + Ecosystem
173173

174-
[Confident AI](https://confident-ai.com?utm_source=GitHub) is an all-in-one platform that integrates natively with DeepEval.
174+
[Confident AI](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=platform_section) is an all-in-one platform that integrates natively with DeepEval.
175175

176176
- Manage datasets, trace LLM applications, run evaluations, and monitor responses in production — all from one platform.
177177
- Don't need a UI? Confident AI can also be your data persistant layer - run evals, pull datasets, and inspect traces straight from claude code, cursor, via Confident AI's [MCP server](https://github.com/confident-ai/confident-mcp-server).
@@ -220,13 +220,13 @@ Open `test_chatbot.py` and write your first test case to run an **end-to-end** e
220220
import pytest
221221
from deepeval import assert_test
222222
from deepeval.metrics import GEval
223-
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
223+
from deepeval.test_case import LLMTestCase, SingleTurnParams
224224

225225
def test_case():
226226
correctness_metric = GEval(
227227
name="Correctness",
228228
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
229-
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
229+
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
230230
threshold=0.5
231231
)
232232
test_case = LLMTestCase(
@@ -268,14 +268,14 @@ Use the `@observe` decorator to trace components (LLM calls, retrievers, tool ca
268268

269269
```python
270270
from deepeval.tracing import observe, update_current_span
271-
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
271+
from deepeval.test_case import LLMTestCase, SingleTurnParams
272272
from deepeval.dataset import EvaluationDataset, Golden
273273
from deepeval.metrics import GEval
274274

275275
correctness = GEval(
276276
name="Correctness",
277277
criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
278-
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
278+
evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
279279
)
280280

281281
@observe(metrics=[correctness])
@@ -397,7 +397,7 @@ cp .env.example .env.local
397397

398398
# DeepEval With Confident AI
399399

400-
[Confident AI](https://confident-ai.com?utm_source=GitHub) is an all-in-one platform to manage datasets, trace LLM applications, and run evaluations in production. Log in from the CLI to get started:
400+
[Confident AI](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=cli_login_section) is an all-in-one platform to manage datasets, trace LLM applications, and run evaluations in production. Log in from the CLI to get started:
401401

402402
```bash
403403
deepeval login
@@ -417,7 +417,7 @@ Prefer to stay in your IDE? Use DeepEval via [Confident AI's MCP server](https:/
417417
<img src="assets/confident-mcp-architecture.png" alt="Confident AI MCP Architecture" width="500">
418418
</p>
419419

420-
Everything on Confident AI is available [here](https://www.confident-ai.com/docs?utm_source=GitHub).
420+
Everything on Confident AI is available [here](https://www.confident-ai.com/docs?utm_source=deepeval&utm_medium=github&utm_content=cloud_docs).
421421

422422
<br />
423423

0 commit comments

Comments
 (0)