Skip to content

Commit 17b4c1f

Browse files
authored
Merge pull request #126 from bastoica/bugfix-egwalker
[arteval] Bugfix for EGWALKER
2 parents f16a1dd + edbad61 commit 17b4c1f

File tree

6 files changed

+514
-432
lines changed

6 files changed

+514
-432
lines changed

benchmarks/arteval_bench/data/benchmark/eurosys25_egwalker/_agent_eval/main.py

Lines changed: 91 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,124 @@
11
#!/usr/bin/env python3
2-
"""Runs environment setup, build, benchmark prep, and experiment runs checks for EGWALKER."""
2+
"""Runs environment setup, build, benchmark prep, and experiment runs checks for EGWALKER (EuroSys'25)."""
33

44
from __future__ import annotations
55

6-
import os
7-
import sys
86
from pathlib import Path
97
from typing import Dict
8+
import os
9+
import sys
10+
1011

1112
_AGENT_EVAL_DIR = Path(__file__).resolve().parent
1213
_AGENT_SRC_DIR = _AGENT_EVAL_DIR.parents[3] / "src"
1314
sys.path.append(str(_AGENT_SRC_DIR))
1415

16+
17+
from evaluator.utils import (
18+
EntryConfig,
19+
LoggerConfig,
20+
get_logger,
21+
record_result,
22+
)
1523
from oracle_artifact_build import OracleArtifactBuild
16-
from oracle_benchmark_prep import OracleBenchmarkPrep
1724
from oracle_env_setup import OracleEnvSetup
25+
from oracle_benchmark_prep import OracleBenchmarkPrep
1826
from oracle_experiment_runs import OracleExperimentRuns
19-
from evaluator.utils import EntryConfig, LoggerConfig, get_logger, record_result
2027

2128

22-
EGWALKER_CONFIG = EntryConfig(
23-
name="eurosys25-egwalker",
24-
home_dir=Path.home() / "eurosys25_egwalker",
25-
repository_paths={
26-
"eurosys25-egwalker": Path.home() / "eurosys25_egwalker" / "egwalker",
29+
def _resolve_workspace_paths() -> tuple[Path, Path, Path]:
30+
"""Resolve and validate _agent_eval/ and egwalker/ locations.
31+
This expectes that either:
32+
(1) _agent_eval/ and egwalker/ are located in the same root directory; or
33+
(2) _AGENT_EVAL_DIR and _EGWALKER_HOME are set by the user
34+
"""
35+
try:
36+
env_agent_eval = os.environ.get("_AGENT_EVAL_DIR")
37+
env_egwalker_home = os.environ.get("_EGWALKER_HOME")
38+
39+
if env_agent_eval:
40+
agent_eval_dir = Path(env_agent_eval).expanduser().resolve()
41+
else:
42+
agent_eval_dir = Path(__file__).resolve().parent
43+
44+
if env_egwalker_home:
45+
egwalker_home = Path(env_egwalker_home).expanduser().resolve()
46+
else:
47+
egwalker_home = agent_eval_dir.parent.resolve()
48+
49+
if not agent_eval_dir.exists() or not agent_eval_dir.is_dir():
50+
raise RuntimeError(
51+
f"Invalid _agent_eval dir: {agent_eval_dir}\n"
52+
f"This runner expects _agent_eval/ and egwalker/ to be located in the same root directory.\n"
53+
f"Set _AGENT_EVAL_DIR to the directory containing main.py if needed."
54+
)
55+
56+
egwalker_repo_root = egwalker_home / "egwalker"
57+
if not egwalker_repo_root.exists() or not egwalker_repo_root.is_dir():
58+
raise RuntimeError(
59+
f"Invalid EGWALKER workspace: {egwalker_home}\n"
60+
f"Expected to find a 'egwalker/' directory at: {egwalker_repo_root}\n"
61+
f"This runner expects _agent_eval/ and egwalker/ to be located in the same root directory.\n"
62+
f"Set _EGWALKER_HOME to the workspace root if needed."
63+
)
64+
65+
workspace_root = egwalker_home
66+
return agent_eval_dir, workspace_root
67+
68+
except OSError as exc:
69+
raise RuntimeError(f"Failed to resolve workspace paths: {exc}") from exc
70+
71+
72+
def _build_egwalker_config(*, agent_eval_dir: Path, workspace_root: Path) -> EntryConfig:
73+
"""Constructs EntryConfig for the EGWALKER evaluation bundle from resolved paths."""
74+
egwalker_repo = (workspace_root / "egwalker").resolve()
75+
egwalker_agent_eval = agent_eval_dir.resolve()
76+
egwalker_refs = (egwalker_agent_eval / "refs").resolve()
77+
egwalker_results = (egwalker_repo / "results").resolve()
78+
79+
return EntryConfig(
80+
name = "eurosys25-egwalker",
81+
home_dir = workspace_root,
82+
repository_paths = {
83+
"eurosys25-egwalker": egwalker_repo,
2784
},
28-
results_paths={
29-
# Matches legacy: <repo>/results/timings.json
30-
"timings": Path.home()
31-
/ "eurosys25_egwalker"
32-
/ "egwalker"
33-
/ "results"
34-
/ "timings.json",
85+
results_paths = {
86+
"timings": egwalker_results / "timings.json",
3587
},
36-
ground_truth_paths={
37-
"datasets": (
38-
Path.home()
39-
/ "eurosys25_egwalker"
40-
/ "_agent_eval"
41-
/ "refs"
42-
/ "datasets.ref.json"
43-
),
44-
"timings": (
45-
Path.home()
46-
/ "eurosys25_egwalker"
47-
/ "_agent_eval"
48-
/ "refs"
49-
/ "timings.ref.json"
50-
),
88+
ground_truth_paths = {
89+
"datasets": egwalker_refs / "datasets.ref.json",
90+
"timings": egwalker_refs / "timings.ref.json",
5191
},
52-
similarity_ratio=0.75,
53-
)
92+
similarity_ratio = 0.75,
93+
)
5494

5595

5696
def main(argv: list[str]) -> int:
97+
verbose = "--verbose" in argv
98+
5799
results: Dict[str, int] = {}
58100
score = 0
59101

60-
verbose = "--verbose" in argv
102+
logger_name = os.environ.get("EVAL_LOGGER_NAME", "EGWALKER-AGENT-EVALUATOR")
103+
logger = get_logger(LoggerConfig(root_name = logger_name))
61104

62-
logger_name = os.environ.get("EVAL_LOGGER_NAME", "EGWALKER-EVAL")
63-
logger = get_logger(LoggerConfig(root_name=logger_name))
105+
try:
106+
agent_eval_dir, workspace_root = _resolve_workspace_paths()
107+
EGWALKER_CONFIG = _build_egwalker_config(agent_eval_dir = agent_eval_dir, workspace_root = workspace_root)
108+
except RuntimeError as exc:
109+
raise SystemExit(str(exc)) from exc
64110

65-
env_checker = OracleEnvSetup(config=EGWALKER_CONFIG, logger=logger)
66-
score += record_result(
67-
logger, results, type(env_checker).__name__, env_checker.run(verbose=verbose)
68-
)
111+
env_checker = OracleEnvSetup(config = EGWALKER_CONFIG, logger = logger)
112+
score += record_result(results, type(env_checker).__name__, env_checker.run(verbose = verbose))
69113

70-
build_checker = OracleArtifactBuild(config=EGWALKER_CONFIG, logger=logger)
71-
score += record_result(
72-
logger, results, type(build_checker).__name__, build_checker.run(verbose=verbose)
73-
)
114+
build_checker = OracleArtifactBuild(config = EGWALKER_CONFIG, logger = logger)
115+
score += record_result(results, type(build_checker).__name__, build_checker.run(verbose = verbose))
74116

75-
prep_checker = OracleBenchmarkPrep(config=EGWALKER_CONFIG, logger=logger)
76-
score += record_result(
77-
logger, results, type(prep_checker).__name__, prep_checker.run(verbose=verbose)
78-
)
117+
prep_checker = OracleBenchmarkPrep(config = EGWALKER_CONFIG, logger = logger)
118+
score += record_result(results, type(prep_checker).__name__, prep_checker.run(verbose = verbose))
79119

80-
runs_checker = OracleExperimentRuns(config=EGWALKER_CONFIG, logger=logger)
81-
score += record_result(
82-
logger, results, type(runs_checker).__name__, runs_checker.run(verbose=verbose)
83-
)
120+
runs_checker = OracleExperimentRuns(config = EGWALKER_CONFIG, logger = logger)
121+
score += record_result(results, type(runs_checker).__name__, runs_checker.run(verbose = verbose))
84122

85123
logger.info("Agent scores: %s", results)
86124
return score
Lines changed: 99 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
"""Artifact build oracle for the Eurosys'25 EGWALKER artifact.
1+
"""Artifact build oracle for EGWALKER (EuroSys'25).
22
33
Validates:
4-
- Required repository working directories exist.
4+
- Repository working directory exists.
55
- Build commands execute successfully (captures stdout/stderr/return code).
66
"""
77

@@ -13,102 +13,113 @@
1313
from pathlib import Path
1414

1515
from evaluator.oracle_artifact_build_primitives import (
16-
BuildCommandRequirement,
17-
BuildRequirement,
18-
OracleArtifactBuildBase,
16+
BuildCommandRequirement,
17+
OracleArtifactBuildBase,
1918
)
20-
from evaluator.utils import EntryConfig
19+
from evaluator.utils import EntryConfig, BaseRequirement
2120

2221

23-
@dataclass(frozen = True, slots = True, kw_only = True)
22+
@dataclass(frozen=True, slots=True, kw_only=True)
2423
class BuildTarget:
25-
"""Declarative description of one build command to run."""
24+
"""Declarative description of one build command to run.
2625
27-
name: str
28-
command: Sequence[str]
29-
cwd_relative: Path | None = None
30-
optional: bool = False
31-
timeout_seconds: float = 60.0
32-
env_overrides: Mapping[str, str] = field(default_factory = dict)
26+
Kept intentionally thin: the base primitive (BuildCommandRequirement) performs
27+
the authoritative validation and normalization.
28+
"""
3329

34-
def __post_init__(self) -> None:
35-
if not self.name:
36-
raise ValueError("BuildTarget.name must be non-empty")
37-
if not self.command:
38-
raise ValueError(f"{self.name}: command must be non-empty")
39-
if self.timeout_seconds <= 0:
40-
raise ValueError(f"{self.name}: timeout_seconds must be > 0")
30+
name: str
31+
cmd: Sequence[str]
32+
relative_workdir: Path | None = None
33+
optional: bool = False
34+
timeout_seconds: float = 60.0
35+
env_overrides: Mapping[str, str] = field(default_factory=dict)
4136

42-
# Normalize for downstream requirements.
43-
if self.cwd_relative is not None and not isinstance(self.cwd_relative, Path):
44-
object.__setattr__(self, "cwd_relative", Path(self.cwd_relative))
37+
def __post_init__(self) -> None:
38+
if not self.name:
39+
raise ValueError("BuildTarget.name must be non-empty")
4540

46-
# Freeze command to avoid accidental mutation.
47-
object.__setattr__(self, "command", tuple(self.command))
41+
object.__setattr__(self, "cmd", tuple(self.cmd))
42+
43+
if self.relative_workdir is not None and not isinstance(
44+
self.relative_workdir, Path
45+
):
46+
object.__setattr__(self, "relative_workdir", Path(self.relative_workdir))
4847

4948

5049
class OracleArtifactBuild(OracleArtifactBuildBase):
51-
"""The artifact build oracle for artifact-core.
52-
53-
Defaults:
54-
* Runs build commands in the repo keyed by config.name.
55-
* EntryConfig.repository_paths must contain an entry for config.name.
56-
"""
57-
58-
_DEFAULT_TARGET_SPECS: tuple[tuple[str, tuple[str, ...], float], ...] = (
59-
(
60-
"artifact-core: make tools",
61-
(
62-
"make",
63-
"-j8",
64-
"tools/diamond-types/target/release/dt",
65-
"tools/crdt-converter/target/release/crdt-converter",
66-
"tools/diamond-types/target/release/paper-stats",
67-
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
68-
"tools/paper-benchmarks/target/release/paper-benchmarks",
69-
"tools/ot-bench/target/memusage/ot-bench",
70-
"tools/ot-bench/target/release/ot-bench",
71-
),
72-
60.0,
73-
),
74-
)
75-
76-
def __init__(
77-
self,
78-
*,
79-
config: EntryConfig,
80-
logger: logging.Logger,
81-
targets: Sequence[BuildTarget] | None = None,
82-
) -> None:
83-
super().__init__(logger = logger)
84-
self._config = config
85-
86-
if targets is None:
87-
targets = self._make_default_targets()
88-
self._targets = tuple(targets)
89-
90-
names = [t.name for t in self._targets]
91-
if len(names) != len(set(names)):
92-
raise ValueError(f"Duplicate build target names: {names!r}")
93-
94-
def _make_default_targets(self) -> tuple[BuildTarget, ...]:
95-
"""Creates default targets (stored in the EntryConfig object)."""
96-
return tuple(
97-
BuildTarget(name = name, command = command, timeout_seconds = timeout_seconds)
98-
for (name, command, timeout_seconds) in self._DEFAULT_TARGET_SPECS
50+
"""The artifact build oracle for artifact-core.
51+
52+
Defaults:
53+
* Runs build commands in the repo keyed by config.name.
54+
* EntryConfig.repository_paths is expected to contain an entry for config.name.
55+
"""
56+
57+
_DEFAULT_TARGET_SPECS: tuple[tuple[str, tuple[str, ...], float], ...] = (
58+
(
59+
"artifact-core: make tools",
60+
(
61+
"make",
62+
"-j8",
63+
"tools/diamond-types/target/release/dt",
64+
"tools/crdt-converter/target/release/crdt-converter",
65+
"tools/diamond-types/target/release/paper-stats",
66+
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
67+
"tools/paper-benchmarks/target/release/paper-benchmarks",
68+
"tools/ot-bench/target/memusage/ot-bench",
69+
"tools/ot-bench/target/release/ot-bench",
70+
),
71+
300.0,
72+
),
9973
)
10074

101-
def requirements(self) -> Sequence[BuildRequirement]:
102-
"""Returns an ordered list of build requirements to validate."""
103-
return tuple(
104-
BuildCommandRequirement(
105-
name = target.name,
106-
optional = target.optional,
107-
cwd = self._config.repository_paths[self._config.name],
108-
command = target.command,
109-
cwd_relative = target.cwd_relative,
110-
timeout_seconds = target.timeout_seconds,
111-
env_overrides = target.env_overrides,
112-
)
113-
for target in self._targets
114-
)
75+
def __init__(
76+
self,
77+
*,
78+
config: EntryConfig,
79+
logger: logging.Logger,
80+
targets: Sequence[BuildTarget] | None = None,
81+
) -> None:
82+
super().__init__(logger=logger)
83+
self._config = config
84+
85+
if targets is None:
86+
targets = self._make_default_targets()
87+
self._targets = tuple(targets)
88+
89+
names = [t.name for t in self._targets]
90+
if len(names) != len(set(names)):
91+
raise ValueError(f"Duplicate build target names: {names!r}")
92+
93+
def _make_default_targets(self) -> tuple[BuildTarget, ...]:
94+
return tuple(
95+
BuildTarget(name=name, cmd=cmd, timeout_seconds=timeout_seconds)
96+
for (name, cmd, timeout_seconds) in self._DEFAULT_TARGET_SPECS
97+
)
98+
99+
def requirements(self) -> Sequence[BaseRequirement]:
100+
"""Returns an ordered list of build requirements to validate."""
101+
repo_root = self._config.repository_paths.get(self._config.name)
102+
103+
if repo_root is None:
104+
return (
105+
BuildCommandRequirement(
106+
name=f"config: missing repository_paths entry for {self._config.name!r}",
107+
optional=False,
108+
cwd=Path(self._config.home_dir) / "__MISSING_REPOSITORY_PATH__",
109+
cmd=("true",),
110+
timeout_seconds=1.0,
111+
),
112+
)
113+
114+
return tuple(
115+
BuildCommandRequirement(
116+
name=target.name,
117+
optional=target.optional,
118+
cwd=repo_root,
119+
cmd=target.cmd,
120+
relative_workdir=target.relative_workdir,
121+
timeout_seconds=target.timeout_seconds,
122+
env_overrides=target.env_overrides,
123+
)
124+
for target in self._targets
125+
)

0 commit comments

Comments
 (0)