Skip to content

Commit f16a1dd

Browse files
authored
Merge pull request #120 from bastoica/bugfix-wasabi
[arteval] Bugfix for WASABI
2 parents 0bc0c00 + f5aba88 commit f16a1dd

File tree

6 files changed

+964
-876
lines changed

6 files changed

+964
-876
lines changed

benchmarks/arteval_bench/data/benchmark/sosp24_wasabi/_agent_eval/main.py

Lines changed: 135 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,130 @@
11
#!/usr/bin/env python3
2-
"""Runs environment setup checks for WASABI."""
2+
"""Runs environment setup, build, benchmark prep, and experiment runs checks for WASABI."""
33

44
from __future__ import annotations
5+
56
from pathlib import Path
67
from typing import Dict
78
import os
89
import sys
910

10-
11-
_AGENT_EVAL_DIR = Path(__file__).resolve().parent
12-
_AGENT_SRC_DIR = _AGENT_EVAL_DIR.parents[3] / "src"
13-
sys.path.append(str(_AGENT_SRC_DIR))
14-
15-
1611
from evaluator.utils import (
17-
EntryConfig,
18-
LoggerConfig,
19-
get_logger,
20-
record_result,
12+
EntryConfig,
13+
LoggerConfig,
14+
get_logger,
15+
record_result,
2116
)
2217
from oracle_artifact_build import OracleArtifactBuild
2318
from oracle_env_setup import OracleEnvSetup
2419
from oracle_benchmark_prep import OracleBenchmarkPrep
2520
from oracle_experiment_runs import OracleExperimentRuns
2621

2722

28-
# NOTE: WASABI bundle layout mirrors the legacy constants, but we build it directly
29-
# from EntryConfig rather than importing legacy globals.
30-
_WASABI_HOME = Path.home() / "sosp24_wasabi"
31-
_WASABI_REPO = _WASABI_HOME / "wasabi"
32-
_WASABI_BENCH = _WASABI_HOME / "benchmarks"
33-
34-
35-
WASABI_CONFIG = EntryConfig(
36-
name = "sosp24-wasabi",
37-
home_dir = _WASABI_HOME,
38-
repository_paths = {
39-
"sosp24-wasabi": _WASABI_REPO,
40-
"benchmarks": _WASABI_BENCH,
41-
},
42-
results_paths = {
43-
"results_root": _WASABI_REPO / "results",
44-
},
45-
ground_truth_paths = {
46-
"bugs_ground_truth": _WASABI_REPO / "bugs_ground_truth.txt",
47-
},
48-
similarity_ratio = 0.75,
49-
)
23+
def _resolve_workspace_paths() -> tuple[Path, Path, Path]:
24+
"""Resolve and validate _agent_eval/ and wasabi/ locations.
25+
This expectes that either:
26+
(1) _agent_eval/ and wasabi/ are located in the same root directory; or
27+
(2) _AGENT_EVAL_DIR and _WASABI_HOME are set by the user
28+
"""
29+
try:
30+
env_agent_eval = os.environ.get("_AGENT_EVAL_DIR")
31+
env_wasabi_home = os.environ.get("_WASABI_HOME")
32+
33+
if env_agent_eval:
34+
agent_eval_dir = Path(env_agent_eval).expanduser().resolve()
35+
else:
36+
agent_eval_dir = Path(__file__).resolve().parent
37+
38+
if env_wasabi_home:
39+
wasabi_home = Path(env_wasabi_home).expanduser().resolve()
40+
else:
41+
wasabi_home = agent_eval_dir.parent.resolve()
42+
43+
if not agent_eval_dir.exists() or not agent_eval_dir.is_dir():
44+
raise RuntimeError(
45+
f"Invalid _agent_eval dir: {agent_eval_dir}\n"
46+
f"This runner expects _agent_eval/ and wasabi/ to be located in the same root directory.\n"
47+
f"Set _AGENT_EVAL_DIR to the directory containing main.py if needed."
48+
)
49+
50+
wasabi_repo_root = wasabi_home / "wasabi"
51+
if not wasabi_repo_root.exists() or not wasabi_repo_root.is_dir():
52+
raise RuntimeError(
53+
f"Invalid WASABI workspace: {wasabi_home}\n"
54+
f"Expected to find a 'wasabi/' directory at: {wasabi_repo_root}\n"
55+
f"This runner expects _agent_eval/ and wasabi/ to be located in the same root directory.\n"
56+
f"Set _WASABI_HOME to the workspace root if needed."
57+
)
58+
59+
workspace_root = wasabi_home
60+
return agent_eval_dir, wasabi_home, workspace_root
61+
62+
except OSError as exc:
63+
raise RuntimeError(f"Failed to resolve workspace paths: {exc}") from exc
64+
65+
66+
def _build_configs(*, agent_eval_dir: Path, workspace_root: Path) -> EntryConfig:
67+
"""Constructs EntryConfig for the WASABI evaluation bundle from resolved paths."""
68+
wasabi_repo = (workspace_root / "wasabi").resolve()
69+
benchmarks_dir = (workspace_root / "benchmarks").resolve()
70+
71+
return EntryConfig(
72+
name="sosp24-wasabi",
73+
home_dir=workspace_root,
74+
repository_paths={
75+
"sosp24-wasabi": wasabi_repo,
76+
"benchmarks": benchmarks_dir,
77+
},
78+
results_paths={
79+
"results_root": wasabi_repo / "results",
80+
},
81+
ground_truth_paths={
82+
"bugs_ground_truth": agent_eval_dir / "refs" / "bugs_ground_truth.csv",
83+
},
84+
similarity_ratio=0.75,
85+
metadata={
86+
"maven_repo_dir": Path.home() / ".m2" / "repository",
87+
"weaving_plugin_signature": "aspectj-maven-plugin",
88+
"primary_artifact": "edu.uchicago.cs.systems:wasabi",
89+
"benchmarks": {
90+
"hadoop": {
91+
"repo_url": "https://github.com/apache/hadoop.git",
92+
"commit": "60867de",
93+
"pom_file": "pom.xml",
94+
"pom_backup": "pom-original.xml",
95+
},
96+
"hbase": {
97+
"repo_url": "https://github.com/apache/hbase.git",
98+
"commit": "89ca7f4",
99+
"pom_file": "pom.xml",
100+
"pom_backup": "pom-original.xml",
101+
},
102+
"hive": {
103+
"repo_url": "https://github.com/apache/hive.git",
104+
"commit": "e08a600",
105+
"pom_file": "pom.xml",
106+
"pom_backup": "pom-original.xml",
107+
},
108+
},
109+
"aspectj_markers": [
110+
"ajc$preClinit",
111+
"ajc$initFailureCause",
112+
"ajc$tjp",
113+
"ajc$before$",
114+
"ajc$after$",
115+
"ajc$around$",
116+
"ajc$interField$",
117+
"ajc$interMethod$",
118+
"org.aspectj.runtime.reflect.Factory",
119+
"org.aspectj.runtime.internal.AroundClosure",
120+
"org.aspectj.lang.JoinPoint",
121+
"org.aspectj.lang.JoinPoint$StaticPart",
122+
"org.aspectj.lang.ProceedingJoinPoint",
123+
"org.aspectj.lang.Signature",
124+
"org.aspectj.lang.NoAspectBoundException",
125+
],
126+
},
127+
)
50128

51129

52130
def main(argv: list[str]) -> int:
@@ -56,27 +134,30 @@ def main(argv: list[str]) -> int:
56134
score = 0
57135

58136
logger_name = os.environ.get("EVAL_LOGGER_NAME", "WASABI-AGENT-EVALUATOR")
59-
logger = get_logger(LoggerConfig(root_name = logger_name))
60-
61-
env_checker = OracleEnvSetup(config = WASABI_CONFIG, logger = logger)
62-
score += record_result(
63-
logger, results, type(env_checker).__name__, env_checker.run(verbose = verbose)
64-
)
65-
66-
build_checker = OracleArtifactBuild(config = WASABI_CONFIG, logger = logger)
67-
score += record_result(
68-
logger, results, type(build_checker).__name__, build_checker.run(verbose = verbose)
69-
)
70-
71-
prep_checker = OracleBenchmarkPrep(config = WASABI_CONFIG, logger = logger)
72-
score += record_result(
73-
logger, results, type(prep_checker).__name__, prep_checker.run(verbose = verbose)
74-
)
75-
76-
runs_checker = OracleExperimentRuns(config = WASABI_CONFIG, logger = logger)
77-
score += record_result(
78-
logger, results, type(runs_checker).__name__, runs_checker.run(verbose = verbose)
79-
)
137+
logger = get_logger(LoggerConfig(root_name=logger_name))
138+
139+
try:
140+
agent_eval_dir, _wasabi_home, workspace_root = _resolve_workspace_paths()
141+
wasabi_config = _build_configs(agent_eval_dir=agent_eval_dir, workspace_root=workspace_root)
142+
except RuntimeError as exc:
143+
# Keep failure message clean and actionable
144+
raise SystemExit(str(exc)) from exc
145+
146+
env_checker = OracleEnvSetup(config=wasabi_config, logger=logger)
147+
env_ok = env_checker.run(verbose=verbose)
148+
score += record_result(results, type(env_checker).__name__, env_ok)
149+
150+
build_checker = OracleArtifactBuild(config=wasabi_config, logger=logger)
151+
build_ok = build_checker.run(verbose=verbose)
152+
score += record_result(results, type(build_checker).__name__, build_ok)
153+
154+
prep_checker = OracleBenchmarkPrep(config=wasabi_config, logger=logger)
155+
prep_ok = prep_checker.run(verbose=verbose)
156+
score += record_result(results, type(prep_checker).__name__, prep_ok)
157+
158+
runs_checker = OracleExperimentRuns(config=wasabi_config, logger=logger)
159+
runs_ok = runs_checker.run(verbose=verbose)
160+
score += record_result(results, type(runs_checker).__name__, runs_ok)
80161

81162
logger.info("Agent scores: %s", results)
82163
return score

0 commit comments

Comments
 (0)