confident-ai
diff --git a/‎.cursor-plugin/plugin.json‎
Lines changed: 23 additions & 0 deletions b/‎.cursor-plugin/plugin.json‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎.github/workflows/changelog.yml‎
Lines changed: 67 additions & 0 deletions b/‎.github/workflows/changelog.yml‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎.github/workflows/full_test_core_for_pr.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/full_test_core_for_pr.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test_core.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/test_core.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/test_integrations.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test_integrations.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 2 deletions b/‎.gitignore‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎.scripts/changelog/generate.py‎
Lines changed: 28 additions & 16 deletions b/‎.scripts/changelog/generate.py‎
Lines changed: 28 additions & 16 deletions
diff --git a/‎CITATION.cff‎
Lines changed: 2 additions & 2 deletions b/‎CITATION.cff‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 9 deletions b/‎README.md‎
Lines changed: 9 additions & 9 deletions
@@ -0,0 +1,23 @@
+{
+  "name": "deepeval",
+  "displayName": "DeepEval",
+  "version": "1.0.0",
+  "description": "Skills for adding DeepEval evaluations, tracing, datasets, Confident AI reports, and iterative improvement loops to AI applications.",
+  "author": {
+    "name": "Confident AI",
+    "email": "founders@confident-ai.com"
+  },
+  "homepage": "https://deepeval.com",
+  "repository": "https://github.com/confident-ai/deepeval",
+  "license": "Apache-2.0",
+  "keywords": [
+    "deepeval",
+    "llm",
+    "evaluation",
+    "tracing",
+    "datasets",
+    "confident-ai"
+  ],
+  "category": "developer-tools",
+  "skills": "./skills/"
+}
@@ -0,0 +1,67 @@
+name: Generate Changelog
+
+on:
+  workflow_dispatch:
+    inputs:
+      mode:
+        description: "Mode: year or range"
+        required: true
+        default: "year"
+      year:
+        description: "Year (e.g. 2025)"
+        required: false
+      from_tag:
+        description: "From tag (e.g. v3.7.0)"
+        required: false
+      to_tag:
+        description: "To tag (e.g. v3.9.0)"
+        required: false
+
+jobs:
+  changelog:
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch tags
+        run: git fetch --tags --force
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          pip install rich pydantic deepeval
+
+      - name: Run changelog generator
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          if [ "${{ github.event.inputs.mode }}" = "year" ]; then
+            python .scripts/changelog/generate.py \
+              --year ${{ github.event.inputs.year }} \
+              --github --ai
+          else
+            python .scripts/changelog/generate.py \
+              --range ${{ github.event.inputs.from_tag }} ${{ github.event.inputs.to_tag }} \
+              --github --ai
+          fi
+
+      - name: Create PR
+        uses: peter-evans/create-pull-request@v6
+        with:
+          branch: chore/changelog-update
+          title: "chore: update changelog"
+          commit-message: "chore: update changelog"
+          body: "Auto-generated changelog updates"
@@ -102,5 +102,5 @@ jobs:
         if: ${{ env.OPENAI_API_KEY != '' }}
         run: |
           poetry run pytest \
-            tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
+            tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/
 
@@ -89,15 +89,15 @@ jobs:
         if: ${{ env.OPENAI_API_KEY != '' }}
         run: |
           poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
-          tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
+          tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/
 
       # Dev tests (no secrets)
       - name: Run dev tests (no secrets)
         if: ${{ env.OPENAI_API_KEY == '' }}
         run: |
-          poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ \
+          poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/test_synthesizer/ tests/test_core/test_datasets/ tests/test_core/test_simulator/ \
           --ignore=tests/test_core/test_tracing/test_dataset_iterator.py            \
           --ignore=tests/test_core/test_synthesizer/test_context_generator.py       \
-          --ignore=tests/test_core/test_synthesizer/test_conversation_simulator.py  \
+          --ignore=tests/test_core/test_simulator/test_conversation_simulator.py    \
           --ignore=tests/test_core/test_synthesizer/test_generate_from_goldens.py   \
           --ignore=tests/test_core/test_synthesizer/test_synthesizer.py
@@ -44,7 +44,7 @@ jobs:
       - name: Install Dependencies
         run: |
           poetry install --no-interaction --no-root --only main
-          poetry install --with langchain
+          poetry run pip install -U langgraph langchain langchain-openai
 
       - name: Install Project
         run: poetry install --no-interaction --only main
 
@@ -14,8 +14,8 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
-lib64/
+/lib/
+/lib64/
 parts/
 sdist/
 var/
@@ -173,3 +173,6 @@ secrets
 # Mac OS system files
 **/.DS_Store
 
+# Cursor IDE local config (rules, etc.)
+.cursor/
+
@@ -25,15 +25,15 @@
 from typing import Callable, Dict, Iterable, List, Optional, Tuple
 from pydantic import BaseModel, Field, field_validator
 
-
 #################
 # Configuration #
 #################
 
 OWNER = "confident-ai"
 REPO = "deepeval"
 
-START_MARKER = "<!-- DeepEval release notes start -->"
+START_MARKER = "{/* DeepEval release notes start */}"
+LEGACY_START_MARKER = "<!-- DeepEval release notes start -->"
 
 CATEGORY_ORDER = [
     "Backward Incompatible Change",
@@ -149,21 +149,25 @@ class AiMonthSummary(BaseModel):
 # - Prefer the stable marker (lets humans edit the visible link/text)
 # - Fall back to parsing the link if the marker is missing
 BULLET_PR_RE = re.compile(r"\[#(\d+)\]\(")
-BULLET_PR_MARKER_RE = re.compile(r"<!--\s*pr:(\d+)\s*-->")
+BULLET_PR_MARKER_RE = re.compile(
+    r"(?:<!--\s*pr:(\d+)\s*-->|\{/\*\s*pr:(\d+)\s*\*/\})"
+)
 BULLET_TAIL_RE = re.compile(
-    r"\s*\(\[#\d+\]\([^)]+\)\)\s*<!--\s*pr:\d+\s*-->.*$"
+    r"\s*\(\[#\d+\]\([^)]+\)\)\s*(?:<!--\s*pr:\d+\s*-->|\{/\*\s*pr:\d+\s*\*/\}).*$"
 )
 
 # Optional ignore list to be placed right after START_MARKER to avoid confusing the parser:
 # add a list of PR numbers you would like to be excluded from the generated changelog.
-# <!-- changelog-ignore:
+# {/* changelog-ignore:
 # - 1234
 # - 5678
-# -->
+# */}
 IGNORE_BLOCK_TOP_RE = re.compile(
-    r"(?is)^\s*<!--\s*changelog-ignore:.*?-->\s*\n*"
+    r"(?is)^\s*(?:<!--\s*changelog-ignore:.*?-->|\{/\*\s*changelog-ignore:.*?\*/\})\s*\n*"
+)
+IGNORE_BLOCK_ANY_RE = re.compile(
+    r"(?is)(?:<!--\s*changelog-ignore:(.*?)-->|\{/\*\s*changelog-ignore:(.*?)\*/\})"
 )
-IGNORE_BLOCK_ANY_RE = re.compile(r"(?is)<!--\s*changelog-ignore:(.*?)-->")
 
 ###############
 # Git helpers #
@@ -762,8 +766,16 @@ def _pull_top_ignore_block(s: str) -> Tuple[str, str]:
         rest = s2[matched.end() :]
         return ignore_block.rstrip("\n") + "\n", rest
 
-    if START_MARKER in text:
-        before, _, after = text.partition(START_MARKER)
+    marker_in_text = next(
+        (
+            marker
+            for marker in (START_MARKER, LEGACY_START_MARKER)
+            if marker in text
+        ),
+        None,
+    )
+    if marker_in_text:
+        before, _, after = text.partition(marker_in_text)
         ignore_block, rest = _pull_top_ignore_block(after)
         prefix = before.rstrip() + "\n\n" + START_MARKER + "\n"
         if ignore_block:
@@ -793,21 +805,21 @@ def _pull_top_ignore_block(s: str) -> Tuple[str, str]:
 
 def parse_ignore_prs(text: str) -> set[int]:
     """
-    Parse PR numbers from one or more `<!-- changelog-ignore: ... -->` HTML comment blocks.
+    Parse PR numbers from one or more changelog-ignore comment blocks.
 
     Should be placed immediately after the `START_MARKER`, for example:
 
-        <!-- changelog-ignore:
+        {/* changelog-ignore:
         - 1234
         - 5678
-        -->
+        */}
 
     Lines may contain comments which can be used to document why a PR is being ignored
     Any integers found in the block are treated as PR numbers.
     """
     ignored: set[int] = set()
     for matched in IGNORE_BLOCK_ANY_RE.finditer(text):
-        block = matched.group(1)
+        block = next(group for group in matched.groups() if group is not None)
         for line in block.splitlines():
             line = line.strip()
             if not line or line.startswith("#"):
@@ -876,7 +888,7 @@ def parse_body(body: str) -> ChangelogIndex:
             )
             if not matched:
                 continue
-            pr = int(matched.group(1))
+            pr = int(next(group for group in matched.groups() if group))
             idx[month][category][version][pr] = line.rstrip()
 
     return idx
@@ -1144,7 +1156,7 @@ def _tick() -> None:
                 author = f" ({user_display})"
         line = (
             f"- {title_out} ([#{pr_num}](https://github.com/{OWNER}/{REPO}/pull/{pr_num})) "
-            f"<!-- pr:{pr_num} -->{author}"
+            f"{{/* pr:{pr_num} */}}{author}"
         )
         idx[month][category][tag][pr_num] = line
         _status(f"[{tag}] PR #{pr_num}: done")
 
@@ -6,8 +6,8 @@ authors:
   - family-names: Vongthongsri
     given-names: Kritin
 title: deepeval
-version: 3.8.9
-date-released: "2026-01-07"
+version: 3.9.9
+date-released: "2026-04-26"
 url: https://confident-ai.com
 repository-code: https://github.com/confident-ai/deepeval
 license: Apache-2.0
 
@@ -22,7 +22,7 @@
         <a href="#-metrics-and-features">Metrics and Features</a> |
         <a href="#-quickstart">Getting Started</a> |
         <a href="#-integrations">Integrations</a> |
-        <a href="https://confident-ai.com?utm_source=GitHub">Confident AI</a>
+        <a href="https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=header_nav">Confident AI</a>
     <p>
 </h4>
 
@@ -58,7 +58,7 @@
 Whether you're building AI agents, RAG pipelines, or chatbots, implemented via LangChain or OpenAI, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your AI quality, prevent prompt drifting, or even transition from OpenAI to Claude with confidence.
 
 > [!IMPORTANT]
-> Need a place for your DeepEval testing data to live 🏡❤️? [Sign up to the DeepEval platform](https://confident-ai.com?utm_source=GitHub) to compare iterations of your LLM app, generate & share testing reports, and more.
+> Need a place for your DeepEval testing data to live 🏡❤️? [Sign up to the DeepEval platform](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=signup_callout) to compare iterations of your LLM app, generate & share testing reports, and more.
 >
 > ![Demo GIF](assets/demo.gif)
 
@@ -171,7 +171,7 @@ DeepEval plugs into any LLM framework — OpenAI Agents, LangChain, CrewAI, and
 
 ## ☁️ Platform + Ecosystem
 
-[Confident AI](https://confident-ai.com?utm_source=GitHub) is an all-in-one platform that integrates natively with DeepEval.
+[Confident AI](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=platform_section) is an all-in-one platform that integrates natively with DeepEval.
 
 - Manage datasets, trace LLM applications, run evaluations, and monitor responses in production — all from one platform.
 - Don't need a UI? Confident AI can also be your data persistant layer - run evals, pull datasets, and inspect traces straight from claude code, cursor, via Confident AI's [MCP server](https://github.com/confident-ai/confident-mcp-server).
@@ -220,13 +220,13 @@ Open `test_chatbot.py` and write your first test case to run an **end-to-end** e
 import pytest
 from deepeval import assert_test
 from deepeval.metrics import GEval
-from deepeval.test_case import LLMTestCase, LLMTestCaseParams
+from deepeval.test_case import LLMTestCase, SingleTurnParams
 
 def test_case():
     correctness_metric = GEval(
         name="Correctness",
         criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
-        evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
+        evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
         threshold=0.5
     )
     test_case = LLMTestCase(
@@ -268,14 +268,14 @@ Use the `@observe` decorator to trace components (LLM calls, retrievers, tool ca
 
 ```python
 from deepeval.tracing import observe, update_current_span
-from deepeval.test_case import LLMTestCase, LLMTestCaseParams
+from deepeval.test_case import LLMTestCase, SingleTurnParams
 from deepeval.dataset import EvaluationDataset, Golden
 from deepeval.metrics import GEval
 
 correctness = GEval(
     name="Correctness",
     criteria="Determine if the 'actual output' is correct based on the 'expected output'.",
-    evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
+    evaluation_params=[SingleTurnParams.ACTUAL_OUTPUT, SingleTurnParams.EXPECTED_OUTPUT],
 )
 
 @observe(metrics=[correctness])
@@ -397,7 +397,7 @@ cp .env.example .env.local
 
 # DeepEval With Confident AI
 
-[Confident AI](https://confident-ai.com?utm_source=GitHub) is an all-in-one platform to manage datasets, trace LLM applications, and run evaluations in production. Log in from the CLI to get started:
+[Confident AI](https://www.confident-ai.com?utm_source=deepeval&utm_medium=github&utm_content=cli_login_section) is an all-in-one platform to manage datasets, trace LLM applications, and run evaluations in production. Log in from the CLI to get started:
 
 ```bash
 deepeval login
@@ -417,7 +417,7 @@ Prefer to stay in your IDE? Use DeepEval via [Confident AI's MCP server](https:/
   <img src="assets/confident-mcp-architecture.png" alt="Confident AI MCP Architecture" width="500">
 </p>
 
-Everything on Confident AI is available [here](https://www.confident-ai.com/docs?utm_source=GitHub).
+Everything on Confident AI is available [here](https://www.confident-ai.com/docs?utm_source=deepeval&utm_medium=github&utm_content=cloud_docs).
 
 <br />