Skip to content

Commit 1f78f4a

Browse files
committed
fix(async,ci): prevent coroutine re-await & stabilize async evals
- Track and gather only tasks created on the current loop during evals to avoid “cannot reuse already awaited coroutine” and cross loop errors. - Introduce coerce_to_task() to normalize Task/Future/coroutine/awaitable inputs. - Refactor EvaluationTasks to store asyncio Futures, return copies from get_tasks(), and provide a clear_tasks() that cancels pending tasks before clearing. - call loop.shutdown_asyncgens() in tracing worker before close. - Replace blocking time.sleep() with await asyncio.sleep() in async test helper. - CI: add maintainer only full test workflow with secrets gating & concurrency - Ruff cleanups
1 parent 7c9c985 commit 1f78f4a

9 files changed

Lines changed: 243 additions & 45 deletions

File tree

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
name: Full Tests (maintainer only)
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
pr:
7+
description: "PR number"
8+
required: true
9+
ref_kind:
10+
description: "Which ref to test (merge|head)"
11+
required: false
12+
default: "merge"
13+
14+
permissions:
15+
contents: read
16+
17+
concurrency:
18+
group: full-tests-pr-${{ github.event.inputs.pr }}-${{ github.event.inputs.ref_kind }}
19+
cancel-in-progress: true
20+
21+
22+
jobs:
23+
full-tests:
24+
if: ${{ github.repository_owner == 'confident-ai' }}
25+
runs-on: ubuntu-latest
26+
timeout-minutes: 60
27+
environment: ci-secrets
28+
env:
29+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
30+
31+
steps:
32+
- name: Resolve ref
33+
id: refsel
34+
run: |
35+
if [ "${{ github.event.inputs.ref_kind }}" = "head" ]; then
36+
echo "ref=refs/pull/${{ github.event.inputs.pr }}/head" >> $GITHUB_OUTPUT
37+
else
38+
# test what would merge
39+
echo "ref=refs/pull/${{ github.event.inputs.pr }}/merge" >> $GITHUB_OUTPUT
40+
fi
41+
42+
- name: Checkout PR ref
43+
uses: actions/checkout@v4
44+
with:
45+
ref: ${{ steps.refsel.outputs.ref }}
46+
fetch-depth: 0
47+
48+
- name: Set up Python
49+
id: setup-python
50+
uses: actions/setup-python@v4
51+
with:
52+
python-version: "3.11"
53+
54+
- name: Install Poetry
55+
uses: snok/install-poetry@v1
56+
with:
57+
virtualenvs-create: true
58+
virtualenvs-in-project: true
59+
installer-parallel: true
60+
61+
- name: Cache virtualenv
62+
id: cached-poetry-dependencies
63+
uses: actions/cache@v3
64+
with:
65+
path: .venv
66+
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
67+
68+
# Core deps only (main)
69+
- name: Install dependencies (main)
70+
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
71+
run: poetry install --no-interaction --no-root --only main
72+
73+
- name: Install project (main)
74+
run: poetry install --no-interaction --only main
75+
76+
#----------------------------------------------
77+
# run test suite
78+
#----------------------------------------------
79+
80+
# Run Core tests
81+
- name: Run core tests (with secrets)
82+
if: ${{ env.OPENAI_API_KEY != '' }}
83+
run: |
84+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys \
85+
tests/test_core/ \
86+
--ignore=tests/test_core/test_synthesizer/ \
87+
--ignore=tests/test_core/test_datasets/
88+
89+
- name: Run core tests (no secrets)
90+
if: ${{ env.OPENAI_API_KEY == '' }}
91+
run: |
92+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys \
93+
tests/test_core/ \
94+
--ignore=tests/test_core/test_synthesizer/ \
95+
--ignore=tests/test_core/test_datasets/ \
96+
--ignore=tests/test_core/test_evaluation/test_end_to_end/test_configs.py \
97+
--ignore=tests/test_core/test_tracing/test_dataset_iterator.py
98+
99+
# Install dev dependencies and run dev tests
100+
- name: Install dev dependencies
101+
run: poetry install --no-interaction --with dev
102+
103+
- name: Run dev tests (with secrets)
104+
if: ${{ env.OPENAI_API_KEY != '' }}
105+
run: |
106+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
107+
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
108+
109+
- name: Run dev tests (no secrets)
110+
if: ${{ env.OPENAI_API_KEY == '' }}
111+
run: |
112+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
113+
tests/test_core/test_datasets/

.github/workflows/test_core.yml

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ on:
88
jobs:
99
test:
1010
runs-on: ubuntu-latest
11+
env:
12+
# Expose once at job level because forked PRs can't use secrets.* in `if:` conditions.
13+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
14+
1115
steps:
1216
#----------------------------------------------
1317
# check-out repo and set-up python
@@ -54,20 +58,43 @@ jobs:
5458
#----------------------------------------------
5559
# run test suite
5660
#----------------------------------------------
61+
62+
# Run tests (with secrets): full suite
5763
- name: Run tests
58-
env:
59-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
64+
if: ${{ env.OPENAI_API_KEY != '' }}
65+
run: |
66+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys \
67+
tests/test_core/ \
68+
--ignore=tests/test_core/test_synthesizer/ \
69+
--ignore=tests/test_core/test_datasets/
70+
71+
# Run tests (no secrets): skip e2e that require API keys
72+
- name: Run tests (no secrets)
73+
if: ${{ env.OPENAI_API_KEY == '' }}
6074
run: |
61-
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/ --ignore=tests/test_core/test_synthesizer/ --ignore=tests/test_core/test_datasets/
75+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys \
76+
tests/test_core/ \
77+
--ignore=tests/test_core/test_synthesizer/ \
78+
--ignore=tests/test_core/test_datasets/ \
79+
--ignore=tests/test_core/test_evaluation/test_end_to_end/test_configs.py \
80+
--ignore=tests/test_core/test_tracing/test_dataset_iterator.py
6281
6382
#----------------------------------------------
6483
# install dev dependencies (including chromadb) and run synthesizer tests
6584
#----------------------------------------------
6685
- name: Install dev dependencies
6786
run: poetry install --no-interaction --with dev
6887

69-
- name: Run core tests with dev dependencies
70-
env:
71-
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
88+
# Dev tests (with secrets)
89+
- name: Run dev tests
90+
if: ${{ env.OPENAI_API_KEY != '' }}
91+
run: |
92+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
93+
tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
94+
95+
# Dev tests (no secrets)
96+
- name: Run dev tests (no secrets)
97+
if: ${{ env.OPENAI_API_KEY == '' }}
7298
run: |
73-
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys tests/test_core/test_synthesizer/ tests/test_core/test_datasets/
99+
poetry run pytest -vv -rA --maxfail=1 --capture=tee-sys -o faulthandler_timeout=300 \
100+
tests/test_core/test_datasets/

deepeval/dataset/dataset.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from asyncio import Task
2-
from typing import Iterator, List, Optional, Union, Literal
2+
from typing import TYPE_CHECKING, Iterator, List, Optional, Union, Literal
33
from dataclasses import dataclass, field
44
from opentelemetry.trace import Tracer
55
from opentelemetry.context import Context, attach, detach
66
from rich.console import Console
77
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
88
import json
99
import csv
10-
import webbrowser
1110
import os
1211
import datetime
1312
import time
@@ -17,6 +16,7 @@
1716

1817
from deepeval.confident.api import Api, Endpoints, HttpMethods
1918
from deepeval.dataset.utils import (
19+
coerce_to_task,
2020
convert_test_cases_to_goldens,
2121
convert_goldens_to_test_cases,
2222
convert_convo_goldens_to_convo_test_cases,
@@ -54,6 +54,14 @@
5454
from deepeval.tracing import trace_manager
5555
from deepeval.tracing.tracing import EVAL_DUMMY_SPAN_NAME
5656

57+
if TYPE_CHECKING:
58+
from deepeval.evaluate.configs import (
59+
AsyncConfig,
60+
DisplayConfig,
61+
CacheConfig,
62+
ErrorConfig,
63+
)
64+
5765

5866
valid_file_types = ["csv", "json", "jsonl"]
5967

@@ -1230,7 +1238,7 @@ def evals_iterator(
12301238
)
12311239

12321240
def evaluate(self, task: Task):
1233-
global_evaluation_tasks.append(task)
1241+
global_evaluation_tasks.append(coerce_to_task(task))
12341242

12351243
def _start_otel_test_run(self, tracer: Optional[Tracer] = None) -> Context:
12361244
_tracer = check_tracer(tracer)

deepeval/dataset/types.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
1+
import asyncio
2+
3+
from typing import Any
4+
from deepeval.dataset.utils import coerce_to_task
5+
6+
17
class EvaluationTasks:
2-
tasks: list = []
38

4-
def append(self, t):
5-
self.tasks.append(t)
9+
def __init__(self):
10+
self._tasks: list[asyncio.Future] = []
11+
12+
def append(self, obj: Any):
13+
self._tasks.append(coerce_to_task(obj))
614

7-
def get_tasks(self):
8-
return self.tasks
15+
def get_tasks(self) -> list[asyncio.Future]:
16+
return list(self._tasks)
917

1018
def num_tasks(self):
11-
return len(self.tasks)
19+
return len(self._tasks)
1220

13-
def clear_tasks(self):
14-
self.tasks.clear()
21+
def clear_tasks(self) -> None:
22+
for t in self._tasks:
23+
if not t.done():
24+
t.cancel()
25+
self._tasks.clear()
1526

1627

1728
global_evaluation_tasks = EvaluationTasks()

deepeval/dataset/utils.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from typing import List, Optional, Any
1+
import asyncio
2+
import inspect
23
import json
34
import re
45

6+
from typing import List, Optional, Any
57
from opentelemetry.trace import Tracer
6-
from opentelemetry import trace
7-
from opentelemetry.trace import NoOpTracerProvider
88

99
from deepeval.dataset.api import Golden
1010
from deepeval.dataset.golden import ConversationalGolden
@@ -174,3 +174,31 @@ def check_tracer(tracer: Optional[Tracer] = None) -> Tracer:
174174
)
175175

176176
return GLOBAL_TEST_RUN_TRACER
177+
178+
179+
def coerce_to_task(obj: Any) -> asyncio.Future[Any]:
180+
# already a Task so just return it
181+
if isinstance(obj, asyncio.Task):
182+
return obj
183+
184+
# If it is a future, it is already scheduled, so just return it
185+
if asyncio.isfuture(obj):
186+
# type: ignore[return-value] # it is an awaitable, gather accepts it
187+
return obj
188+
189+
# bare coroutine must be explicitly scheduled using create_task to bind to loop & track
190+
if asyncio.iscoroutine(obj):
191+
return asyncio.create_task(obj)
192+
193+
# generic awaitable (any object with __await__) will need to be wrapped so create_task accepts it
194+
if inspect.isawaitable(obj):
195+
196+
async def _wrap(awaitable):
197+
return await awaitable
198+
199+
return asyncio.create_task(_wrap(obj))
200+
201+
# not awaitable, so time to sound the alarm!
202+
raise TypeError(
203+
f"Expected Task/Future/coroutine/awaitable, got {type(obj).__name__}"
204+
)

0 commit comments

Comments
 (0)