Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,86 +1,124 @@
#!/usr/bin/env python3
"""Runs environment setup, build, benchmark prep, and experiment runs checks for EGWALKER."""
"""Runs environment setup, build, benchmark prep, and experiment runs checks for EGWALKER (EuroSys'25)."""

from __future__ import annotations

import os
import sys
from pathlib import Path
from typing import Dict
import os
import sys


_AGENT_EVAL_DIR = Path(__file__).resolve().parent
_AGENT_SRC_DIR = _AGENT_EVAL_DIR.parents[3] / "src"
sys.path.append(str(_AGENT_SRC_DIR))


from evaluator.utils import (
EntryConfig,
LoggerConfig,
get_logger,
record_result,
)
from oracle_artifact_build import OracleArtifactBuild
from oracle_benchmark_prep import OracleBenchmarkPrep
from oracle_env_setup import OracleEnvSetup
from oracle_benchmark_prep import OracleBenchmarkPrep
from oracle_experiment_runs import OracleExperimentRuns
from evaluator.utils import EntryConfig, LoggerConfig, get_logger, record_result


EGWALKER_CONFIG = EntryConfig(
name="eurosys25-egwalker",
home_dir=Path.home() / "eurosys25_egwalker",
repository_paths={
"eurosys25-egwalker": Path.home() / "eurosys25_egwalker" / "egwalker",
def _resolve_workspace_paths() -> tuple[Path, Path, Path]:
"""Resolve and validate _agent_eval/ and egwalker/ locations.
This expectes that either:
(1) _agent_eval/ and egwalker/ are located in the same root directory; or
(2) _AGENT_EVAL_DIR and _EGWALKER_HOME are set by the user
"""
try:
env_agent_eval = os.environ.get("_AGENT_EVAL_DIR")
env_egwalker_home = os.environ.get("_EGWALKER_HOME")

if env_agent_eval:
agent_eval_dir = Path(env_agent_eval).expanduser().resolve()
else:
agent_eval_dir = Path(__file__).resolve().parent

if env_egwalker_home:
egwalker_home = Path(env_egwalker_home).expanduser().resolve()
else:
egwalker_home = agent_eval_dir.parent.resolve()

if not agent_eval_dir.exists() or not agent_eval_dir.is_dir():
raise RuntimeError(
f"Invalid _agent_eval dir: {agent_eval_dir}\n"
f"This runner expects _agent_eval/ and egwalker/ to be located in the same root directory.\n"
f"Set _AGENT_EVAL_DIR to the directory containing main.py if needed."
)

egwalker_repo_root = egwalker_home / "egwalker"
if not egwalker_repo_root.exists() or not egwalker_repo_root.is_dir():
raise RuntimeError(
f"Invalid EGWALKER workspace: {egwalker_home}\n"
f"Expected to find a 'egwalker/' directory at: {egwalker_repo_root}\n"
f"This runner expects _agent_eval/ and egwalker/ to be located in the same root directory.\n"
f"Set _EGWALKER_HOME to the workspace root if needed."
)

workspace_root = egwalker_home
return agent_eval_dir, workspace_root

except OSError as exc:
raise RuntimeError(f"Failed to resolve workspace paths: {exc}") from exc


def _build_egwalker_config(*, agent_eval_dir: Path, workspace_root: Path) -> EntryConfig:
"""Constructs EntryConfig for the EGWALKER evaluation bundle from resolved paths."""
egwalker_repo = (workspace_root / "egwalker").resolve()
egwalker_agent_eval = agent_eval_dir.resolve()
egwalker_refs = (egwalker_agent_eval / "refs").resolve()
egwalker_results = (egwalker_repo / "results").resolve()

return EntryConfig(
name = "eurosys25-egwalker",
home_dir = workspace_root,
repository_paths = {
"eurosys25-egwalker": egwalker_repo,
},
results_paths={
# Matches legacy: <repo>/results/timings.json
"timings": Path.home()
/ "eurosys25_egwalker"
/ "egwalker"
/ "results"
/ "timings.json",
results_paths = {
"timings": egwalker_results / "timings.json",
},
ground_truth_paths={
"datasets": (
Path.home()
/ "eurosys25_egwalker"
/ "_agent_eval"
/ "refs"
/ "datasets.ref.json"
),
"timings": (
Path.home()
/ "eurosys25_egwalker"
/ "_agent_eval"
/ "refs"
/ "timings.ref.json"
),
ground_truth_paths = {
"datasets": egwalker_refs / "datasets.ref.json",
"timings": egwalker_refs / "timings.ref.json",
},
similarity_ratio=0.75,
)
similarity_ratio = 0.75,
)


def main(argv: list[str]) -> int:
verbose = "--verbose" in argv

results: Dict[str, int] = {}
score = 0

verbose = "--verbose" in argv
logger_name = os.environ.get("EVAL_LOGGER_NAME", "EGWALKER-AGENT-EVALUATOR")
logger = get_logger(LoggerConfig(root_name = logger_name))

logger_name = os.environ.get("EVAL_LOGGER_NAME", "EGWALKER-EVAL")
logger = get_logger(LoggerConfig(root_name=logger_name))
try:
agent_eval_dir, workspace_root = _resolve_workspace_paths()
EGWALKER_CONFIG = _build_egwalker_config(agent_eval_dir = agent_eval_dir, workspace_root = workspace_root)
except RuntimeError as exc:
raise SystemExit(str(exc)) from exc

env_checker = OracleEnvSetup(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(env_checker).__name__, env_checker.run(verbose=verbose)
)
env_checker = OracleEnvSetup(config = EGWALKER_CONFIG, logger = logger)
score += record_result(results, type(env_checker).__name__, env_checker.run(verbose = verbose))

build_checker = OracleArtifactBuild(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(build_checker).__name__, build_checker.run(verbose=verbose)
)
build_checker = OracleArtifactBuild(config = EGWALKER_CONFIG, logger = logger)
score += record_result(results, type(build_checker).__name__, build_checker.run(verbose = verbose))

prep_checker = OracleBenchmarkPrep(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(prep_checker).__name__, prep_checker.run(verbose=verbose)
)
prep_checker = OracleBenchmarkPrep(config = EGWALKER_CONFIG, logger = logger)
score += record_result(results, type(prep_checker).__name__, prep_checker.run(verbose = verbose))

runs_checker = OracleExperimentRuns(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(runs_checker).__name__, runs_checker.run(verbose=verbose)
)
runs_checker = OracleExperimentRuns(config = EGWALKER_CONFIG, logger = logger)
score += record_result(results, type(runs_checker).__name__, runs_checker.run(verbose = verbose))

logger.info("Agent scores: %s", results)
return score
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Artifact build oracle for the Eurosys'25 EGWALKER artifact.
"""Artifact build oracle for EGWALKER (EuroSys'25).

Validates:
- Required repository working directories exist.
- Repository working directory exists.
- Build commands execute successfully (captures stdout/stderr/return code).
"""

Expand All @@ -13,102 +13,113 @@
from pathlib import Path

from evaluator.oracle_artifact_build_primitives import (
BuildCommandRequirement,
BuildRequirement,
OracleArtifactBuildBase,
BuildCommandRequirement,
OracleArtifactBuildBase,
)
from evaluator.utils import EntryConfig
from evaluator.utils import EntryConfig, BaseRequirement


@dataclass(frozen = True, slots = True, kw_only = True)
@dataclass(frozen=True, slots=True, kw_only=True)
class BuildTarget:
"""Declarative description of one build command to run."""
"""Declarative description of one build command to run.

name: str
command: Sequence[str]
cwd_relative: Path | None = None
optional: bool = False
timeout_seconds: float = 60.0
env_overrides: Mapping[str, str] = field(default_factory = dict)
Kept intentionally thin: the base primitive (BuildCommandRequirement) performs
the authoritative validation and normalization.
"""

def __post_init__(self) -> None:
if not self.name:
raise ValueError("BuildTarget.name must be non-empty")
if not self.command:
raise ValueError(f"{self.name}: command must be non-empty")
if self.timeout_seconds <= 0:
raise ValueError(f"{self.name}: timeout_seconds must be > 0")
name: str
cmd: Sequence[str]
relative_workdir: Path | None = None
optional: bool = False
timeout_seconds: float = 60.0
env_overrides: Mapping[str, str] = field(default_factory=dict)

# Normalize for downstream requirements.
if self.cwd_relative is not None and not isinstance(self.cwd_relative, Path):
object.__setattr__(self, "cwd_relative", Path(self.cwd_relative))
def __post_init__(self) -> None:
if not self.name:
raise ValueError("BuildTarget.name must be non-empty")

# Freeze command to avoid accidental mutation.
object.__setattr__(self, "command", tuple(self.command))
object.__setattr__(self, "cmd", tuple(self.cmd))

if self.relative_workdir is not None and not isinstance(
self.relative_workdir, Path
):
object.__setattr__(self, "relative_workdir", Path(self.relative_workdir))


class OracleArtifactBuild(OracleArtifactBuildBase):
"""The artifact build oracle for artifact-core.

Defaults:
* Runs build commands in the repo keyed by config.name.
* EntryConfig.repository_paths must contain an entry for config.name.
"""

_DEFAULT_TARGET_SPECS: tuple[tuple[str, tuple[str, ...], float], ...] = (
(
"artifact-core: make tools",
(
"make",
"-j8",
"tools/diamond-types/target/release/dt",
"tools/crdt-converter/target/release/crdt-converter",
"tools/diamond-types/target/release/paper-stats",
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
"tools/paper-benchmarks/target/release/paper-benchmarks",
"tools/ot-bench/target/memusage/ot-bench",
"tools/ot-bench/target/release/ot-bench",
),
60.0,
),
)

def __init__(
self,
*,
config: EntryConfig,
logger: logging.Logger,
targets: Sequence[BuildTarget] | None = None,
) -> None:
super().__init__(logger = logger)
self._config = config

if targets is None:
targets = self._make_default_targets()
self._targets = tuple(targets)

names = [t.name for t in self._targets]
if len(names) != len(set(names)):
raise ValueError(f"Duplicate build target names: {names!r}")

def _make_default_targets(self) -> tuple[BuildTarget, ...]:
"""Creates default targets (stored in the EntryConfig object)."""
return tuple(
BuildTarget(name = name, command = command, timeout_seconds = timeout_seconds)
for (name, command, timeout_seconds) in self._DEFAULT_TARGET_SPECS
"""The artifact build oracle for artifact-core.

Defaults:
* Runs build commands in the repo keyed by config.name.
* EntryConfig.repository_paths is expected to contain an entry for config.name.
"""

_DEFAULT_TARGET_SPECS: tuple[tuple[str, tuple[str, ...], float], ...] = (
(
"artifact-core: make tools",
(
"make",
"-j8",
"tools/diamond-types/target/release/dt",
"tools/crdt-converter/target/release/crdt-converter",
"tools/diamond-types/target/release/paper-stats",
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
"tools/paper-benchmarks/target/release/paper-benchmarks",
"tools/ot-bench/target/memusage/ot-bench",
"tools/ot-bench/target/release/ot-bench",
),
300.0,
),
)

def requirements(self) -> Sequence[BuildRequirement]:
"""Returns an ordered list of build requirements to validate."""
return tuple(
BuildCommandRequirement(
name = target.name,
optional = target.optional,
cwd = self._config.repository_paths[self._config.name],
command = target.command,
cwd_relative = target.cwd_relative,
timeout_seconds = target.timeout_seconds,
env_overrides = target.env_overrides,
)
for target in self._targets
)
def __init__(
self,
*,
config: EntryConfig,
logger: logging.Logger,
targets: Sequence[BuildTarget] | None = None,
) -> None:
super().__init__(logger=logger)
self._config = config

if targets is None:
targets = self._make_default_targets()
self._targets = tuple(targets)

names = [t.name for t in self._targets]
if len(names) != len(set(names)):
raise ValueError(f"Duplicate build target names: {names!r}")

def _make_default_targets(self) -> tuple[BuildTarget, ...]:
return tuple(
BuildTarget(name=name, cmd=cmd, timeout_seconds=timeout_seconds)
for (name, cmd, timeout_seconds) in self._DEFAULT_TARGET_SPECS
)

def requirements(self) -> Sequence[BaseRequirement]:
"""Returns an ordered list of build requirements to validate."""
repo_root = self._config.repository_paths.get(self._config.name)

if repo_root is None:
return (
BuildCommandRequirement(
name=f"config: missing repository_paths entry for {self._config.name!r}",
optional=False,
cwd=Path(self._config.home_dir) / "__MISSING_REPOSITORY_PATH__",
cmd=("true",),
timeout_seconds=1.0,
),
)

return tuple(
BuildCommandRequirement(
name=target.name,
optional=target.optional,
cwd=repo_root,
cmd=target.cmd,
relative_workdir=target.relative_workdir,
timeout_seconds=target.timeout_seconds,
env_overrides=target.env_overrides,
)
for target in self._targets
)
Loading
Loading