Skip to content

Commit 24d1347

Browse files
committed
add tests for scoring script
1 parent 82d992b commit 24d1347

File tree

1 file changed

+258
-0
lines changed

1 file changed

+258
-0
lines changed

tests/test_score.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
"""Test for score script.
2+
3+
These tests are designed to test the general functionality for
4+
interacting with ORCA, NOT the actual scoring process written.
5+
"""
6+
7+
import json
8+
import os
9+
from unittest.mock import patch
10+
11+
import pytest
12+
import typer
13+
from typer.testing import CliRunner
14+
15+
from score import main, score
16+
17+
app = typer.Typer()
18+
app.command()(main)
19+
20+
runner = CliRunner()
21+
22+
23+
# ----- Tests for score() function -----
24+
def test_score_valid_task_number(gt_file, pred_file):
25+
"""Test: score() returns a dict for valid task number."""
26+
task_number = 1
27+
res = score(
28+
task_number=task_number,
29+
gt_file=gt_file,
30+
pred_file=pred_file,
31+
)
32+
assert isinstance(res, dict)
33+
34+
35+
def test_score_invalid_task_number():
36+
"""Test: score() raises KeyError for invalid task number."""
37+
task_number = 99999
38+
with pytest.raises(KeyError):
39+
score(
40+
task_number=task_number,
41+
gt_file="dummy_gt.csv",
42+
pred_file="dummy_pred.csv",
43+
)
44+
45+
46+
# ----- Tests for main() function -----
47+
@patch("score.extract_gt_file")
48+
@patch("score.score")
49+
def test_main_invalid_task_number(
50+
mock_score, mock_extract_gt_file, gt_file, pred_file, temp_dir
51+
):
52+
"""Test: final results should be INVALID for invalid task number."""
53+
task_number = 99999
54+
mock_extract_gt_file.return_value = gt_file
55+
mock_score.side_effect = KeyError
56+
57+
groundtruth_dir = os.path.dirname(gt_file)
58+
output_file = os.path.join(temp_dir, "results.json")
59+
with open(output_file, "w") as f:
60+
pass
61+
result = runner.invoke(
62+
app,
63+
[
64+
"-p",
65+
pred_file,
66+
"-g",
67+
groundtruth_dir,
68+
"-t",
69+
task_number,
70+
"-o",
71+
output_file,
72+
],
73+
)
74+
assert result.exit_code == 0
75+
assert result.stdout.strip() == "INVALID"
76+
with open(output_file, "r") as f:
77+
output_data = json.load(f)
78+
assert output_data["score_status"] == "INVALID"
79+
assert (
80+
output_data["score_errors"]
81+
== f"Invalid challenge task number specified: `{task_number}`"
82+
)
83+
mock_extract_gt_file.assert_called_once_with(groundtruth_dir)
84+
mock_score.assert_called_once_with(
85+
task_number=task_number, gt_file=gt_file, pred_file=pred_file
86+
)
87+
88+
89+
@patch("score.extract_gt_file")
90+
@patch("score.score")
91+
def test_main_prior_validation_failed(
92+
mock_score,
93+
mock_extract_gt_file,
94+
temp_dir,
95+
invalid_predictions_json,
96+
groundtruth_dir,
97+
):
98+
"""Test: final results should be INVALID for invalid predictions file."""
99+
output_file = os.path.join(temp_dir, "results.json")
100+
with open(output_file, "w") as f:
101+
f.write(invalid_predictions_json)
102+
result = runner.invoke(
103+
app,
104+
[
105+
"-p",
106+
"dummy_pred.csv",
107+
"-g",
108+
groundtruth_dir,
109+
"-t",
110+
"1",
111+
"-o",
112+
output_file,
113+
],
114+
)
115+
assert result.exit_code == 0
116+
assert result.stdout.strip() == "INVALID"
117+
with open(output_file, "r") as f:
118+
output_data = json.load(f)
119+
assert output_data["score_status"] == "INVALID"
120+
assert (
121+
output_data["score_errors"]
122+
== "Submission could not be evaluated due to validation errors."
123+
)
124+
mock_extract_gt_file.assert_not_called()
125+
mock_score.assert_not_called()
126+
127+
128+
@patch("score.extract_gt_file")
129+
@patch("score.score")
130+
def test_main_no_prior_validations(
131+
mock_score,
132+
mock_extract_gt_file,
133+
gt_file,
134+
pred_file,
135+
temp_dir,
136+
):
137+
"""Test: notice about no prior validation results should be given."""
138+
mock_extract_gt_file.return_value = gt_file
139+
groundtruth_dir = os.path.dirname(gt_file)
140+
output_file = os.path.join(temp_dir, "dummy_results.json")
141+
result = runner.invoke(
142+
app,
143+
[
144+
"-p",
145+
pred_file,
146+
"-g",
147+
groundtruth_dir,
148+
"-t",
149+
"1",
150+
"-o",
151+
output_file,
152+
],
153+
)
154+
assert result.exit_code == 0
155+
assert result.stdout.strip() in {"SCORED", "INVALID"}
156+
with open(output_file) as f:
157+
output_data = json.load(f)
158+
assert output_data["validation_status"] == ""
159+
assert output_data["validation_errors"] == (
160+
"Validation results not found. Proceeding with scoring but it "
161+
"may fail or results may be inaccurate."
162+
)
163+
mock_extract_gt_file.assert_called_once_with(groundtruth_dir)
164+
mock_score.assert_called_once_with(
165+
task_number=1, gt_file=gt_file, pred_file=pred_file
166+
)
167+
168+
169+
@patch("score.extract_gt_file")
170+
@patch("score.score")
171+
def test_main_valid_predictions_cannot_score(
172+
mock_score,
173+
mock_extract_gt_file,
174+
valid_predictions_json,
175+
gt_file,
176+
pred_file,
177+
temp_dir,
178+
):
179+
"""
180+
Test: final results should be INVALID when predictions cannot be scored
181+
(indicated by ValueError).
182+
"""
183+
mock_extract_gt_file.return_value = gt_file
184+
mock_score.side_effect = ValueError
185+
186+
groundtruth_dir = os.path.dirname(gt_file)
187+
output_file = os.path.join(temp_dir, "results.json")
188+
with open(output_file, "w") as f:
189+
f.write(valid_predictions_json)
190+
result = runner.invoke(
191+
app,
192+
[
193+
"-p",
194+
pred_file,
195+
"-g",
196+
groundtruth_dir,
197+
"-t",
198+
"1",
199+
"-o",
200+
output_file,
201+
],
202+
)
203+
assert result.exit_code == 0
204+
assert result.stdout.strip() == "INVALID"
205+
with open(output_file) as f:
206+
output_data = json.load(f)
207+
assert output_data["score_status"] == "INVALID"
208+
assert (
209+
output_data["score_errors"]
210+
== "Error encountered during scoring; submission not evaluated."
211+
)
212+
mock_extract_gt_file.assert_called_once_with(groundtruth_dir)
213+
mock_score.assert_called_once_with(
214+
task_number=1, gt_file=gt_file, pred_file=pred_file
215+
)
216+
217+
218+
@patch("score.extract_gt_file")
219+
@patch("score.score")
220+
def test_main_valid_predictions_can_score(
221+
mock_score,
222+
mock_extract_gt_file,
223+
valid_predictions_json,
224+
gt_file,
225+
pred_file,
226+
temp_dir,
227+
):
228+
"""
229+
Test: final results should be SCORED for successful scoring.
230+
"""
231+
mock_extract_gt_file.return_value = gt_file
232+
groundtruth_dir = os.path.dirname(gt_file)
233+
output_file = os.path.join(temp_dir, "results.json")
234+
with open(output_file, "w") as f:
235+
f.write(valid_predictions_json)
236+
result = runner.invoke(
237+
app,
238+
[
239+
"-p",
240+
pred_file,
241+
"-g",
242+
groundtruth_dir,
243+
"-t",
244+
"1",
245+
"-o",
246+
output_file,
247+
],
248+
)
249+
assert result.exit_code == 0
250+
assert result.stdout.strip() == "SCORED"
251+
with open(output_file) as f:
252+
output_data = json.load(f)
253+
assert output_data["score_status"] == "SCORED"
254+
assert output_data["score_errors"] == ""
255+
mock_extract_gt_file.assert_called_once_with(groundtruth_dir)
256+
mock_score.assert_called_once_with(
257+
task_number=1, gt_file=gt_file, pred_file=pred_file
258+
)

0 commit comments

Comments
 (0)