sys-intelligence
diff --git a/‎benchmarks/arteval_bench/data/benchmark/arteval_tasks.jsonl‎
Lines changed: 3 additions & 1 deletion b/‎benchmarks/arteval_bench/data/benchmark/arteval_tasks.jsonl‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/main.py‎
Lines changed: 32 additions & 0 deletions b/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/main.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/oracle_artifact_build.py‎
Lines changed: 86 additions & 0 deletions b/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/oracle_artifact_build.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/oracle_benchmark_prep.py‎
Lines changed: 140 additions & 0 deletions b/‎benchmarks/arteval_bench/data/benchmark/osdi24_anvil/_agent_eval/oracle_benchmark_prep.py‎
Lines changed: 140 additions & 0 deletions
@@ -1 +1,3 @@
-{"artifact_id": "sosp24_wasabi", "artifact_dir": "sosp24_wasabi", "artifact_readme": "sosp24_wasabi/wasabi/README.md", "artifact_url": "https://github.com/bastoica/wasabi/tree/sosp24-ae", "evaluator": "sosp24_wasabi/wasabi/_agent_eval/main.py", "expected_score": 4, "docer_env": "bastoica/ae-agent-ubuntu24.04:latest"}
+{"artifact_id": "sosp24_wasabi", "artifact_dir": "sosp24_wasabi", "artifact_readme": "sosp24_wasabi/wasabi/README.md", "artifact_url": "https://github.com/bastoica/wasabi/tree/sosp24-ae", "evaluator": "sosp24_wasabi/wasabi/_agent_eval/main.py", "expected_score": 4, "docer_env": "bastoica/ae-agent-ubuntu24.04:latest"}
+{"artifact_id": "osdi24_anvil", "artifact_dir": "osdi24_anvil", "artifact_readme": "sosp23_acto/acto/README.md", "artifact_url": "https://github.com/anvil-verifier/anvil", "evaluator": "osdi24_anvil/anvil/_agent_eval/main.py", "expected_score": 4, "docer_env": "bastoica/ae-agent-ubuntu24.04:latest"}
+{"artifact_id": "sosp23_acto", "artifact_dir": "sosp23_acto", "artifact_readme": "sosp23_acto/acto/README.md", "artifact_url": "https://github.com/xlab-uiuc/acto", "evaluator": "sosp23_acto/acto/_agent_eval/main.py", "expected_score": 4, "docer_env": "bastoica/ae-agent-ubuntu24.04:latest"}
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+import sys
+from typing import Dict
+
+from oracle_artifact_build import OracleArtifactBuild
+from oracle_env_setup import OracleEnvSetup
+from oracle_benchmark_prep import OracleBenchmarkPrep
+from oracle_experiment_runs import OracleExperimentRuns
+
+from utils import logger
+
+def main():
+  results: Dict[str, int] = {}
+
+  score = 0
+  for cls in (OracleEnvSetup, OracleArtifactBuild, OracleBenchmarkPrep, OracleExperimentRuns):
+    checker = cls()
+    ok = checker.run()
+    name = cls.__name__
+    logger.info(f"{name}: {'PASS' if ok else 'FAIL'}")
+    if ok:
+      results[name] = 1
+      score += 1
+    else:
+      results[name] = 0
+
+  logger.info(f"Agent scores: {results}")
+  return score
+
+
+if __name__ == "__main__":
+  main()
@@ -0,0 +1,86 @@
+import os
+import subprocess
+from dataclasses import dataclass
+from typing import Iterable, Optional, Tuple
+from pathlib import Path
+
+from utils import REPO_DIRS
+from utils import logger
+
+
+@dataclass(frozen=True)
+class BuildTarget:
+  name: str
+  repo_key: str
+  cmd: list[str]
+
+
+BUILD_TARGETS: list[BuildTarget] = [
+  BuildTarget(
+    name="acto",
+    repo_key="acto",
+    cmd=["make", "lib"],
+  ),
+]
+
+
+class OracleArtifactBuild:
+
+  def __init__(self) -> None:
+    self.repo_dirs = REPO_DIRS
+
+  def run_shell_command(
+    self,
+    cmd: Iterable[str],
+    cwd: Optional[Path] = None,
+  ) -> Tuple[int, str, str]:
+    """
+    Run a command and return (rc, stdout, stderr) tuple.
+    """
+    try:
+      cp = subprocess.run(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        cwd=str(cwd) if cwd is not None else None,
+      )
+      return cp.returncode, cp.stdout or "", cp.stderr or ""
+    except FileNotFoundError:
+      return 127, "", ""
+
+  def build_target(self, target: BuildTarget) -> Optional[str]:
+    """
+    Build a single target using its configured repository and command.
+    """
+    repo_dir = self.repo_dirs.get(target.repo_key, "")
+    if not repo_dir:
+      return f"{target.name} repo directory undefined"
+
+    repo_path = Path(os.path.expanduser(repo_dir))
+    if not repo_path.exists():
+      return f"{target.name} repo directory missing"
+
+    rc, out, err = self.run_shell_command(target.cmd, cwd=repo_path)
+    if rc != 0:
+      return f"{target.name} build failed (rc={rc})"
+
+    return None
+
+  def build_check(self):
+    """
+    Run builds for all configured targets and collect failures.
+    """
+    problems: list[str] = []
+    for target in BUILD_TARGETS:
+      msg = self.build_target(target)
+      if msg:
+        problems.append(msg)
+    if problems:
+      return False, "; ".join(problems)
+    return True, ""
+
+  def run(self):
+    ok, why = self.build_check()
+    logger.info(f"Build: {'PASS' if ok else 'FAIL' + (' - ' + why if why else '')}")
+    return ok
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+import sys
+import subprocess
+from pathlib import Path
+
+from utils import REPO_DIRS, logger
+
+
+class OracleBenchmarkPrep:
+
+  def __init__(self):
+    self.repo_root = Path(REPO_DIRS["acto"])
+    self.expected_remote = "https://github.com/xlab-uiuc/acto.git"
+    self.expected_branch = "anvil-dev"
+
+  def run_shell_command(self, cmd):
+    """
+    Run a command and return (rc, stdout, stderr) tuple.
+    """
+    try:
+      cp = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+      return cp.returncode, (cp.stdout or "").strip(), (cp.stderr or "").strip()
+    except FileNotFoundError as e:
+      return 127, "", str(e)
+
+  def check_repo_exists(self):
+    """
+    Check that repository root exists and is a git working tree.
+    """
+    if not self.repo_root.is_dir():
+      return False, f"acto: FAIL (repo) - directory not found: {self.repo_root}"
+
+    rc, out, err = self.run_shell_command(
+      ["git", "-C", str(self.repo_root), "rev-parse", "--is-inside-work-tree"]
+    )
+    if rc != 0 or out != "true":
+      return False, f"acto: FAIL (repo) - not a git working tree: {err or out}"
+
+    return True, "acto: PASS (repo) - git working tree present"
+
+  def check_remote_origin(self):
+    """
+    Check that <origin> remote matches the expected repository URL.
+    """
+    rc, out, err = self.run_shell_command(
+      ["git", "-C", str(self.repo_root), "remote", "get-url", "origin"]
+    )
+    if rc != 0:
+      return False, f"acto: FAIL (remote) - cannot read origin remote: {err or out}"
+
+    origin_url = (out or "").strip()
+    def normalize(url: str) -> str:
+      return url[:-4] if url.endswith(".git") else url
+
+    if normalize(origin_url) != normalize(self.expected_remote):
+      return False, (
+        "acto: FAIL (remote) - origin URL "
+        f"{origin_url!r} does not match expected {self.expected_remote!r}"
+      )
+
+    return True, f"acto: PASS (remote) - origin URL matches {self.expected_remote}"
+
+  def check_branch_and_head(self):
+    """
+    Check that the current branch is the expected one and that the current 
+    commit resolves to a valid hash.
+    """
+    rc, out, err = self.run_shell_command(
+      ["git", "-C", str(self.repo_root), "rev-parse", "--abbrev-ref", "HEAD"]
+    )
+    if rc != 0:
+      return False, f"acto: FAIL (branch) - cannot read current branch: {err or out}"
+
+    branch = (out or "").strip()
+    if branch != self.expected_branch:
+      return False, f"acto: FAIL (branch) - {branch!r} != expected {self.expected_branch!r}"
+
+    rc, out, err = self.run_shell_command(
+      ["git", "-C", str(self.repo_root), "rev-parse", "HEAD"]
+    )
+    if rc != 0:
+      return False, f"acto: FAIL (commit) - cannot read HEAD: {err or out}"
+
+    head = (out or "").strip()
+    if not head:
+      return False, "acto: FAIL (commit) - empty HEAD hash"
+
+    return True, f"acto: PASS (branch/commit) - {branch}@{head[:12]}"
+
+  def check_submodules_recursive(self):
+    """
+    Check that submodules (if any) are initialized, approximating a --recursive clone.
+    """
+    gitmodules = self.repo_root / ".gitmodules"
+    if not gitmodules.exists():
+      # No submodules configured; nothing to check
+      return True, "acto: PASS (submodules) - no submodules configured"
+
+    rc, out, err = self.run_shell_command(
+      ["git", "-C", str(self.repo_root), "submodule", "status", "--recursive"]
+    )
+    if rc != 0:
+      return False, f"acto: FAIL (submodules) - git submodule status failed: {err or out}"
+
+    # Heuristic: lines starting with '-' indicate uninitialized submodules
+    uninitialized = [line for line in out.splitlines() if line.startswith("-")]
+    if uninitialized:
+      return False, (
+        "acto: FAIL (submodules) - uninitialized submodules present "
+        "(clone may have been done without --recursive)"
+      )
+
+    return True, "acto: PASS (submodules) - all submodules initialized"
+
+  def run(self):
+    """
+    Run all repository checks and return True on overall success.
+    """
+    results: list[bool] = []
+
+    ok, msg = self.check_repo_exists()
+    logger.info(msg)
+    results.append(ok)
+
+    ok, msg = self.check_remote_origin()
+    logger.info(msg)
+    results.append(ok)
+
+    ok, msg = self.check_branch_and_head()
+    logger.info(msg)
+    results.append(ok)
+
+    ok, msg = self.check_submodules_recursive()
+    logger.info(msg)
+    results.append(ok)
+
+    if all(results):
+      return True
+
+    return False