forked from allenai/asta-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstateful_python.py
More file actions
109 lines (79 loc) · 3.09 KB
/
Copy pathstateful_python.py
File metadata and controls
109 lines (79 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import json
import logging
from contextvars import ContextVar
from inspect_ai.tool import tool
from inspect_ai.util import sandbox
from astabench.util.sandbox import SandboxJupyter
from astabench.util.sandbox.static.sandbox_types import (
JUPYTER_HISTORY_FILE,
CellHistoryItem,
)
logger = logging.getLogger(__name__)
sandbox_jupyter = ContextVar[SandboxJupyter]("sandbox_jupyter")
async def get_sandbox_jupyter():
"""
Get the current SandboxJupyter instance from the context variable.
If it does not exist, create a new one and set it in the context variable.
"""
if not sandbox_jupyter.get(None):
sj = SandboxJupyter("default")
await sj.setup()
sandbox_jupyter.set(sj)
return sandbox_jupyter.get()
async def exec_python_session(code: str):
"""
Run python code in a notebook environment. This tool is stateful and
maintains variables across calls. Like Jupyter, it returns the value of
the final statement in the code as well as any print statements. It works
with notebook syntax like "!" to run shell commands.
Args:
code: The python code to execute.
"""
# sandbox_jupyter contextvar to have a different SandboxJupyter per sample
res = await (await get_sandbox_jupyter()).run_code(code=code)
return res
@tool
def python_session():
return exec_python_session
async def continue_wait_impl():
"""
Continue waiting for the previous code from the stateful python tool to
finish. This tool has the same timeout as the execution tool and can be
used any number of times to allow arbitrary waiting. It will return any
subsequent output from the command.
This will only work if the previous code timed out and is still running
(not interrupted).
"""
# TODO
raise NotImplementedError(
"continue_wait can't currently be used as a standalone tool, because the jupyter contextvar is not in a session"
)
# res = await (await get_sandbox_jupyter()).continue_after_timeout()
# return res
@tool
def continue_wait():
raise NotImplementedError(
"continue_wait can't currently be used as a standalone tool, because the jupyter contextvar is not in a session"
)
return continue_wait_impl
async def interrupt_kernel_impl():
"""
Interrupt the currently running code in the stateful python tool, if it
previously timed out. This is typically not needed, because the code it
automatically interrupted if a new code block is executed.
"""
res = await (await get_sandbox_jupyter()).interrupt_kernel()
return res
@tool
def interrupt_kernel():
return interrupt_kernel_impl
async def get_cell_history(
filename: str = JUPYTER_HISTORY_FILE,
) -> list[CellHistoryItem] | None:
"""Get the history of code cells executed in the Jupyter environment."""
try:
raw_history = json.loads(await sandbox().read_file(filename))
return [CellHistoryItem(**item) for item in raw_history]
except FileNotFoundError:
logger.warning(f"Missing {filename} in sandbox; can't score trajectory metrics")
return None