Skip to content

Commit 1fcfa81

Browse files
committed
fix(ci): add a test for example_bench
Signed-off-by: Tarek <[email protected]>
1 parent 07971b4 commit 1fcfa81

File tree

9 files changed

+129
-119
lines changed

9 files changed

+129
-119
lines changed

.github/workflows/test.yml

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ jobs:
1515
fail-fast: false
1616
matrix:
1717
benchmark:
18-
- arteval_bench
19-
- cache_bench
20-
- course_exam_bench
21-
- course_project_bench
2218
- example_bench
19+
# TODO: For now, we comment out other benchmarks as they have no tests
20+
# - arteval_bench
21+
# - cache_bench
22+
# - course_exam_bench
23+
# - course_project_bench
2324

2425
steps:
2526
- name: Checkout code
@@ -30,7 +31,7 @@ jobs:
3031
with:
3132
python-version: '3.9'
3233

33-
- name: Install dependencies for ${{ matrix.benchmark }}
34+
- name: Install dependencies
3435
working-directory: benchmarks/${{ matrix.benchmark }}
3536
run: |
3637
python -m venv env${{ matrix.benchmark }}
@@ -40,16 +41,10 @@ jobs:
4041
if [ -f requirements.txt ]; then
4142
pip install -r requirements.txt
4243
fi
43-
pip install -e ../../sdk
4444
deactivate
4545
46-
- name: Run tests for ${{ matrix.benchmark }}
47-
working-directory: benchmarks/${{ matrix.benchmark }}
46+
- name: Run tests
4847
run: |
49-
source env${{ matrix.benchmark }}/bin/activate
50-
pytest --version
51-
pytest -v
48+
source benchmarks/${{ matrix.benchmark }}/env${{ matrix.benchmark }}/bin/activate
49+
pytest benchmarks/${{ matrix.benchmark }}/tests -v
5250
deactivate
53-
54-
- name: Test completed
55-
run: echo "${{ matrix.benchmark }} tests completed successfully."

.gitignore

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
pip-wheel-metadata/
20+
share/python-wheels/
21+
*.egg-info/
22+
.installed.cfg
23+
*.egg
24+
25+
# Virtual environments
26+
.venv
27+
venv/
28+
ENV/
29+
env/
30+
.env
31+
env*/
32+
33+
# IDE
34+
.vscode/
35+
.idea/
36+
*.swp
37+
*.swo
38+
*~
39+
40+
# Testing
41+
.pytest_cache/
42+
.coverage
43+
htmlcov/
44+
.tox/
45+
46+
# Logs
47+
logs/
48+
*.log
49+
50+
# OS
51+
.DS_Store
52+
Thumbs.db
53+
54+
# Project specific
55+
outputs/
56+
*.jsonl
57+
!benchmarks/*/data/**/*.jsonl

benchmarks/arteval_bench/tests/test_sdk.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

benchmarks/cache_bench/tests/test_sdk.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

benchmarks/course_exam_bench/tests/test_sdk.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

benchmarks/course_project_bench/tests/test_sdk.py

Lines changed: 0 additions & 21 deletions
This file was deleted.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Tests for the example benchmark."""
2+
3+
import json
4+
import unittest
5+
from pathlib import Path
6+
7+
8+
class TestExampleBenchmark(unittest.TestCase):
9+
def test_data_format(self):
10+
"""Test that benchmark data is in the correct format."""
11+
data_path = (
12+
Path(__file__).parent.parent
13+
/ "data"
14+
/ "benchmark"
15+
/ "example_bench_benchmark_timestamp.jsonl"
16+
)
17+
18+
self.assertTrue(
19+
data_path.exists(), f"Benchmark data file not found: {data_path}"
20+
)
21+
22+
with open(data_path, encoding="utf-8") as f:
23+
for line_num, line in enumerate(f, 1):
24+
data = json.loads(line)
25+
26+
# Check required fields
27+
self.assertIn("id", data, f'Line {line_num}: missing "id" field')
28+
self.assertIn(
29+
"sys_prompt", data, f'Line {line_num}: missing "sys_prompt" field'
30+
)
31+
self.assertIn(
32+
"user_prompt", data, f'Line {line_num}: missing "user_prompt" field'
33+
)
34+
self.assertIn(
35+
"response", data, f'Line {line_num}: missing "response" field'
36+
)
37+
38+
# Check field types
39+
self.assertIsInstance(
40+
data["id"], str, f'Line {line_num}: "id" must be a string'
41+
)
42+
self.assertIsInstance(
43+
data["sys_prompt"],
44+
str,
45+
f'Line {line_num}: "sys_prompt" must be a string',
46+
)
47+
self.assertIsInstance(
48+
data["user_prompt"],
49+
str,
50+
f'Line {line_num}: "user_prompt" must be a string',
51+
)
52+
self.assertIsInstance(
53+
data["response"],
54+
str,
55+
f'Line {line_num}: "response" must be a string',
56+
)
57+
58+
59+
if __name__ == "__main__":
60+
unittest.main()

benchmarks/example_bench/tests/test_sdk.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,6 @@ convention = "google"
4949
[tool.ruff.format]
5050
docstring-code-format = true
5151
quote-style = "single"
52+
53+
[tool.pytest.ini_options]
54+
pythonpath = ["."]

0 commit comments

Comments
 (0)