Skip to content

Commit f40a97a

Browse files
authored
Add module sandbox fixture with improved architecture (#16)
1 parent dd44e60 commit f40a97a

File tree

9 files changed

+441
-37
lines changed

9 files changed

+441
-37
lines changed

.github/workflows/lint.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ name: Lint
33
on:
44
push:
55
branches: [main]
6+
pull_request:
7+
branches: [main]
68

79
jobs:
810
Lint:

.github/workflows/test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ name: Tests
33
on:
44
push:
55
branches: [main]
6+
pull_request:
7+
branches: [main]
68

79
jobs:
810
Tests:

src/pytest_pl_grader/plugin.py

Lines changed: 141 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .fixture import StudentFixture
2323
from .utils import GradingOutputLevel
2424
from .utils import NamesForUserInfo
25+
from .utils import ProcessStartResponse
2526
from .utils import ProcessStatusCode
2627
from .utils import get_output_level_marker
2728

@@ -81,9 +82,16 @@ def data_json(request: pytest.FixtureRequest) -> dict[str, Any] | None:
8182
# raise ValueError(f"Data file '{data_file.name}' not found in '{datadir}'.")
8283

8384

84-
# TODO maybe change to the module scope??
85-
@pytest.fixture
86-
def sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) -> Iterable[StudentFixture]:
85+
def _initialize_sandbox_fixture(
86+
request: pytest.FixtureRequest,
87+
data_json: dict[str, Any] | None,
88+
file_names: StudentFiles,
89+
) -> tuple[StudentFixture, int]:
90+
"""
91+
Common initialization logic for both sandbox and module_sandbox fixtures.
92+
Handles parameter parsing, timeout configuration, and StudentFixture creation.
93+
Returns the fixture and the initialization timeout.
94+
"""
8795
# Default timeout TODO make this a command line option?
8896
initialization_timeout = 1
8997

@@ -97,10 +105,8 @@ def sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) ->
97105

98106
# TODO make sure this contains only valid builtins
99107
builtin_whitelist = params_dict.get("builtin_whitelist")
100-
101108
names_for_user_list = cast(list[NamesForUserInfo] | None, params_dict.get("names_for_user"))
102109

103-
# TODO maybe make it possible to add custom generators for starting variables?
104110
starting_vars: dict[str, Any] = {
105111
"__data_params": deepcopy(params_dict) if data_json is not None else {},
106112
}
@@ -120,14 +126,11 @@ def sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) ->
120126

121127
# Check for the custom mark
122128
marker = request.node.get_closest_marker("sandbox_timeout")
123-
if marker:
124-
# Markers can have positional arguments (args) or keyword arguments (kwargs)
125-
# We'll assume the timeout is the first positional argument
126-
if marker.args:
127-
initialization_timeout = marker.args[0]
129+
if marker and marker.args:
130+
initialization_timeout = marker.args[0]
128131

129132
fixture = StudentFixture(
130-
file_names=request.param,
133+
file_names=file_names,
131134
import_whitelist=import_whitelist,
132135
import_blacklist=import_blacklist,
133136
starting_vars=starting_vars,
@@ -136,51 +139,150 @@ def sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) ->
136139
worker_username=request.config.getoption("--worker-username"),
137140
)
138141

142+
return fixture, initialization_timeout
143+
144+
145+
def _handle_sandbox_startup_errors(
146+
request: pytest.FixtureRequest,
147+
response: ProcessStartResponse,
148+
initialization_timeout: int,
149+
) -> None:
150+
"""
151+
Common error handling logic for sandbox fixture startup failures.
152+
Handles exceptions, timeouts, and other error conditions.
153+
"""
154+
response_status = response["status"]
155+
156+
if response_status == ProcessStatusCode.EXCEPTION:
157+
output_level: GradingOutputLevel = get_output_level_marker(request.node.get_closest_marker("output"))
158+
159+
logger.debug(f"Grading output level set to: {output_level}")
160+
exception_name = response.get("execution_error", "Unknown error")
161+
fail_message = f"Student code execution failed with an exception: {exception_name}"
162+
163+
if output_level == GradingOutputLevel.ExceptionName:
164+
pytest.fail(fail_message, pytrace=False)
165+
166+
exception_message = response.get("execution_message", "")
167+
fail_message += f"{os.linesep}Exception Message: {exception_message}"
168+
169+
if output_level == GradingOutputLevel.ExceptionMessage:
170+
pytest.fail(fail_message, pytrace=False)
171+
172+
assert output_level == GradingOutputLevel.FullTraceback
173+
174+
if exception_traceback := response.get("execution_traceback", ""):
175+
fail_message += f"{os.linesep * 2}{exception_traceback}"
176+
177+
pytest.fail(fail_message, pytrace=False)
178+
179+
elif response_status == ProcessStatusCode.TIMEOUT:
180+
pytest.fail("Student code initialization timed out", pytrace=False)
181+
182+
elif response_status == ProcessStatusCode.NO_RESPONSE:
183+
pytest.fail(f"No response from initialization with timeout {initialization_timeout}", pytrace=False)
184+
185+
elif response_status != ProcessStatusCode.SUCCESS:
186+
logger.warning(f"Unexpected status in response from student code server: {response}")
187+
pytest.fail(f"Unexpected status from student code server: {response_status}", pytrace=False)
188+
189+
190+
@pytest.fixture
191+
def sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) -> Iterable[StudentFixture]:
192+
fixture, initialization_timeout = _initialize_sandbox_fixture(request, data_json, request.param)
193+
139194
try:
140195
# TODO make sure to read student output and include in the exception message
141196
# TODO also get this configuration by reading from the marker
142197
response = fixture.start_student_code_server(initialization_timeout=initialization_timeout)
143-
response_status = response["status"]
198+
_handle_sandbox_startup_errors(request, response, initialization_timeout)
144199

145-
if response_status == ProcessStatusCode.EXCEPTION:
146-
output_level: GradingOutputLevel = get_output_level_marker(request.node.get_closest_marker("output"))
200+
yield fixture
201+
finally:
202+
fixture._cleanup()
147203

148-
logger.debug(f"Grading output level set to: {output_level}")
149-
exception_name = response.get("execution_error", "Unknown error")
150-
fail_message = f"Student code execution failed with an exception: {exception_name}"
151204

152-
if output_level == GradingOutputLevel.ExceptionName:
153-
pytest.fail(fail_message, pytrace=False)
205+
@pytest.fixture(scope="module")
206+
def module_sandbox(request: pytest.FixtureRequest, data_json: dict[str, Any] | None) -> Iterable[StudentFixture]:
207+
"""
208+
Module-scoped sandbox fixture that shares the same student code server across all tests in a module.
209+
Each different student code instance gets its own sandbox, but tests within the same module
210+
using the same student code instance share the sandbox for better performance.
154211
155-
exception_message = response.get("execution_message", "")
156-
fail_message += f"{os.linesep}Exception Message: {exception_message}"
212+
Important: This fixture only supports scenarios with a single student_code.py file.
213+
If multiple student code files are detected (e.g., student_code_1.py, student_code_2.py),
214+
it will raise an error. Use the regular 'sandbox' fixture for parameterized testing
215+
across multiple student code variants.
157216
158-
if output_level == GradingOutputLevel.ExceptionMessage:
159-
pytest.fail(fail_message, pytrace=False)
217+
Note: This fixture doesn't support parameterization like the regular sandbox fixture
218+
because module-scoped fixtures are created once per module before parameterization occurs.
219+
"""
220+
# Get the plugin instance to access the cache
221+
plugin = request.config.result_collector_plugin # type: ignore[attr-defined]
222+
223+
# Get the test module directory to find student code
224+
module_name = request.module.__name__
225+
module_file = Path(request.module.__file__)
226+
data_dir = module_file.parent
227+
228+
# Find student code files - first try the same directory as the test module
229+
student_code_pattern = "student_code*.py"
230+
student_code_files = list(data_dir.glob(student_code_pattern))
231+
232+
# If not found in the same directory, look for a subdirectory with the module name
233+
if not student_code_files:
234+
subdirectory = data_dir / module_file.stem # e.g., test_module_sandbox
235+
if subdirectory.is_dir():
236+
student_code_files = list(subdirectory.glob(student_code_pattern))
237+
data_dir = subdirectory # Update data_dir to the subdirectory
238+
239+
if not student_code_files:
240+
pytest.fail(f"No student code files found matching pattern '{student_code_pattern}' in {data_dir} or {data_dir}/{module_file.stem}")
241+
242+
# Check for multiple student code files and raise an error
243+
if len(student_code_files) > 1:
244+
student_file_names = [f.name for f in student_code_files]
245+
pytest.fail(
246+
f"Multiple student code files found: {student_file_names}. "
247+
f"The module_sandbox fixture does not support parameterization across multiple student files. "
248+
f"Use the regular 'sandbox' fixture instead to test all student code variants, "
249+
f"or reduce to a single student_code.py file for module-level caching."
250+
)
160251

161-
# TODO make this not an assert?
162-
assert output_level == GradingOutputLevel.FullTraceback
252+
# Use the first (and only) student code file found
253+
student_code_file = student_code_files[0]
163254

164-
if exception_traceback := response.get("execution_traceback", ""):
165-
fail_message += f"{os.linesep * 2}{exception_traceback}"
255+
# Create cache key
256+
cache_key = (module_name, str(student_code_file))
166257

167-
pytest.fail(fail_message, pytrace=False)
258+
# Check if we already have a cached sandbox for this module/student code combination
259+
if cache_key in plugin.module_sandbox_cache:
260+
yield plugin.module_sandbox_cache[cache_key]
261+
return
262+
263+
# Set up file paths (similar to regular sandbox)
264+
leading_file = data_dir / "leading_code.py"
265+
trailing_file = data_dir / "trailing_code.py"
266+
setup_code_file = data_dir / "setup_code.py"
267+
268+
student_files = StudentFiles(leading_file, trailing_file, student_code_file, setup_code_file)
168269

169-
elif response_status == ProcessStatusCode.TIMEOUT:
170-
# Don't get the exception message since there usually isn't one for timeouts
171-
pytest.fail("Student code initialization timed out", pytrace=False)
270+
# Use shared initialization logic
271+
fixture, initialization_timeout = _initialize_sandbox_fixture(request, data_json, student_files)
172272

173-
elif response_status == ProcessStatusCode.NO_RESPONSE:
174-
# Don't get the exception message since there usually isn't one for timeouts
175-
pytest.fail(f"No response from initialization with timeout {initialization_timeout}", pytrace=False)
273+
try:
274+
response = fixture.start_student_code_server(initialization_timeout=initialization_timeout)
275+
_handle_sandbox_startup_errors(request, response, initialization_timeout)
176276

177-
elif response_status != ProcessStatusCode.SUCCESS:
178-
logger.warning(f"Unexpected status in response from student code server: {response}")
179-
pytest.fail(f"Unexpected status from student code server: {response_status}", pytrace=False)
277+
# Cache the fixture for reuse within this module
278+
plugin.module_sandbox_cache[cache_key] = fixture
180279

181280
yield fixture
182281
finally:
282+
# Only cleanup when the module scope ends, not after each test
183283
fixture._cleanup()
284+
# Remove from cache when cleaned up
285+
plugin.module_sandbox_cache.pop(cache_key, None)
184286

185287

186288
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
@@ -301,11 +403,13 @@ class ResultCollectorPlugin:
301403
collected_results: dict[str, TestResult]
302404
student_feedback_data: dict[str, FeedbackFixture]
303405
grading_data: dict[str, Any]
406+
module_sandbox_cache: dict[tuple[str, str], StudentFixture]
304407

305408
def __init__(self) -> None:
306409
self.collected_results = {}
307410
self.student_feedback_data = {}
308411
self.grading_data = {}
412+
self.module_sandbox_cache = {}
309413

310414
def pytest_configure(self, config: Config) -> None:
311415
"""

tests/scenario_root/test_module_sandbox/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"params": {
3+
"multiplier": 3,
4+
"base_value": 42,
5+
"greeting_message": "Hello from data.json!",
6+
"counter_increment": 1,
7+
"test_array": [1, 2, 3, 4, 5],
8+
"names_for_user": [
9+
{
10+
"name": "MULTIPLIER",
11+
"type": "int",
12+
"description": "Multiplier value from data.json"
13+
},
14+
{
15+
"name": "BASE_VALUE",
16+
"type": "int",
17+
"description": "Base value from data.json"
18+
},
19+
{
20+
"name": "SETUP_CONSTANT",
21+
"type": "str",
22+
"description": "Setup completion message"
23+
},
24+
{
25+
"name": "GREETING_MESSAGE",
26+
"type": "str",
27+
"description": "Greeting message from data.json"
28+
},
29+
{
30+
"name": "TEST_ARRAY",
31+
"type": "list",
32+
"description": "Test array from data.json"
33+
},
34+
{
35+
"name": "get_setup_value",
36+
"type": "function",
37+
"description": "Function to get setup value"
38+
},
39+
{
40+
"name": "multiply_by_setup_value",
41+
"type": "function",
42+
"description": "Function to multiply using setup value"
43+
}
44+
]
45+
}
46+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"expected_data_object": {
3+
"score": 1.0,
4+
"tests": [
5+
{
6+
"max_points": 1,
7+
"name": "test_module_shared_counter_1",
8+
"points_frac": 1.0,
9+
"points": 1.0,
10+
"test_id": "test_module_sandbox.py::test_module_shared_counter_1",
11+
"outcome": "passed"
12+
},
13+
{
14+
"max_points": 1,
15+
"name": "test_module_shared_counter_2",
16+
"points_frac": 1.0,
17+
"points": 1.0,
18+
"test_id": "test_module_sandbox.py::test_module_shared_counter_2",
19+
"outcome": "passed"
20+
},
21+
{
22+
"max_points": 1,
23+
"name": "test_module_shared_counter_3",
24+
"points_frac": 1.0,
25+
"points": 1.0,
26+
"test_id": "test_module_sandbox.py::test_module_shared_counter_3",
27+
"outcome": "passed"
28+
},
29+
{
30+
"max_points": 1,
31+
"name": "test_basic_functionality",
32+
"points_frac": 1.0,
33+
"points": 1.0,
34+
"test_id": "test_module_sandbox.py::test_basic_functionality",
35+
"outcome": "passed"
36+
},
37+
{
38+
"max_points": 1,
39+
"name": "test_function_with_output",
40+
"points_frac": 1.0,
41+
"points": 1.0,
42+
"test_id": "test_module_sandbox.py::test_function_with_output",
43+
"outcome": "passed"
44+
},
45+
{
46+
"max_points": 1,
47+
"name": "test_data_json_integration",
48+
"points_frac": 1.0,
49+
"points": 1.0,
50+
"test_id": "test_module_sandbox.py::test_data_json_integration",
51+
"outcome": "passed"
52+
},
53+
{
54+
"max_points": 1,
55+
"name": "test_setup_code_integration",
56+
"points_frac": 1.0,
57+
"points": 1.0,
58+
"test_id": "test_module_sandbox.py::test_setup_code_integration",
59+
"outcome": "passed"
60+
},
61+
{
62+
"max_points": 1,
63+
"name": "test_array_processing",
64+
"points_frac": 1.0,
65+
"points": 1.0,
66+
"test_id": "test_module_sandbox.py::test_array_processing",
67+
"outcome": "passed"
68+
},
69+
{
70+
"max_points": 1,
71+
"name": "test_setup_function_usage",
72+
"points_frac": 1.0,
73+
"points": 1.0,
74+
"test_id": "test_module_sandbox.py::test_setup_function_usage",
75+
"outcome": "passed"
76+
}
77+
]
78+
}
79+
}

0 commit comments

Comments
 (0)