feat: reusable GitHub Action and GITHUB_STEP_SUMMARY support (#152)

Siddhant-K-code · ona-agent · web-flow · commit 954dbd18fad4 · 2026-05-31T08:11:15.000+05:30
- action.yml: composite action (uses: Siddhant-K-code/agent-trace@main) that installs agent-trace, runs eval ci, posts to step summary, and uploads .agent-traces as a workflow artifact - eval/runner.py: write to $GITHUB_STEP_SUMMARY when set (GitHub Actions native), fall back to .agent-traces/eval-summary.md otherwise - examples/ci/agent-eval.yml: updated to use the reusable action Closes #130 Co-authored-by: Ona <no-reply@ona.com>
diff --git a/action.yml b/action.yml
@@ -0,0 +1,89 @@
+name: agent-trace eval
+description: Run agent-trace evals in CI, post results to the step summary, and fail on regression.
+
+inputs:
+  config:
+    description: Path to eval config file
+    default: .agent-evals.yaml
+  baseline:
+    description: Path to baseline scores file (optional)
+    default: ""
+  save-baseline:
+    description: Save scores as new baseline after run
+    default: "false"
+  tolerance:
+    description: Regression tolerance (0.0–1.0, default 0.05)
+    default: "0.05"
+  trace-dir:
+    description: Directory where agent-trace sessions are stored
+    default: .agent-traces
+  python-version:
+    description: Python version to use
+    default: "3.12"
+  install-extras:
+    description: Comma-separated optional extras to install (e.g. openai,anthropic)
+    default: ""
+
+outputs:
+  passed:
+    description: "true if all scorers passed, false otherwise"
+    value: ${{ steps.eval.outputs.passed }}
+  summary-path:
+    description: Path to the written eval summary markdown file
+    value: ${{ steps.eval.outputs.summary-path }}
+
+runs:
+  using: composite
+  steps:
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python-version }}
+
+    - name: Install agent-trace
+      shell: bash
+      run: |
+        EXTRAS="${{ inputs.install-extras }}"
+        if [ -n "$EXTRAS" ]; then
+          pip install "agent-trace[$EXTRAS]"
+        else
+          pip install agent-trace
+        fi
+
+    - name: Run evals
+      id: eval
+      shell: bash
+      env:
+        GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }}
+      run: |
+        ARGS="--config ${{ inputs.config }} --trace-dir ${{ inputs.trace-dir }}"
+        ARGS="$ARGS --tolerance ${{ inputs.tolerance }}"
+        ARGS="$ARGS --github-summary"
+
+        if [ -n "${{ inputs.baseline }}" ]; then
+          ARGS="$ARGS --baseline ${{ inputs.baseline }}"
+        fi
+
+        if [ "${{ inputs.save-baseline }}" = "true" ]; then
+          ARGS="$ARGS --save-baseline"
+        fi
+
+        if agent-strace eval ci $ARGS; then
+          echo "passed=true" >> "$GITHUB_OUTPUT"
+        else
+          echo "passed=false" >> "$GITHUB_OUTPUT"
+          exit 1
+        fi
+
+        SUMMARY=".agent-traces/eval-summary.md"
+        if [ -f "$SUMMARY" ]; then
+          echo "summary-path=$SUMMARY" >> "$GITHUB_OUTPUT"
+        fi
+
+    - name: Upload trace artifacts
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: agent-traces
+        path: ${{ inputs.trace-dir }}
+        if-no-files-found: ignore
diff --git a/examples/ci/agent-eval.yml b/examples/ci/agent-eval.yml
@@ -2,7 +2,7 @@
 #
 # Runs eval scorers on every PR that touches agent config files.
 # Fails the PR if any scorer drops below its threshold.
-# Posts a score summary as a PR comment.
+# Posts a score summary to the GitHub Actions step summary.
 #
 # Prerequisites:
 #   1. Capture at least one session: agent-strace record -- <your-agent-command>
@@ -27,49 +27,18 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up Python
-        uses: actions/setup-python@v5
+      # Uses the reusable action bundled with agent-trace.
+      # Results appear in the GitHub Actions step summary automatically.
+      - name: Run agent-trace eval
+        uses: Siddhant-K-code/agent-trace@main
         with:
-          python-version: "3.12"
-
-      - name: Install agent-strace
-        run: pip install agent-strace
-
-      # Score the latest session in the dataset against all configured scorers.
-      # Exits 1 if any scorer is below threshold or regresses vs baseline.
-      - name: Run eval
+          baseline: .agent-traces/baselines/main.json
+          tolerance: "0.05"
         env:
           # Required only if using the llm_judge scorer.
-          # Remove if using heuristic scorers only (no_errors, cost_under, etc.)
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        run: |
-          agent-strace eval ci \
-            --baseline .agent-traces/baselines/main.json \
-            --tolerance 0.05 \
-            --github-summary
-
-      # Post the Markdown summary as a PR comment so reviewers see the score delta.
-      - name: Post eval summary
-        if: always()
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const summaryPath = '.agent-traces/eval-summary.md';
-            if (!fs.existsSync(summaryPath)) {
-              console.log('No eval summary found — skipping comment.');
-              return;
-            }
-            const summary = fs.readFileSync(summaryPath, 'utf8');
-            await github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: summary,
-            });
 
-  # Optional: update the baseline on every merge to main.
-  # Commit the updated baseline back to the repo so future PRs compare against it.
+  # Update the baseline on every merge to main so future PRs compare against it.
   update-baseline:
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
     runs-on: ubuntu-latest
diff --git a/src/agent_trace/__init__.py b/src/agent_trace/__init__.py
@@ -1,3 +1,3 @@
 """agent-trace: strace for AI agents."""
 
-__version__ = "0.54.0"
+__version__ = "0.55.0"
diff --git a/src/agent_trace/eval/runner.py b/src/agent_trace/eval/runner.py
@@ -7,6 +7,7 @@
 
 import argparse
 import json
+import os
 import sys
 from pathlib import Path
 from dataclasses import dataclass, field
@@ -267,10 +268,19 @@ def _write_github_summary(report: "EvalReport", baseline: dict[str, float], tole
         lines.append("")
         lines.append("</details>")
 
-    summary_path = Path(".agent-traces/eval-summary.md")
-    summary_path.parent.mkdir(parents=True, exist_ok=True)
-    summary_path.write_text("\n".join(lines) + "\n")
-    sys.stderr.write(f"GitHub summary written to {summary_path}\n")
+    content = "\n".join(lines) + "\n"
+
+    # Write to $GITHUB_STEP_SUMMARY when running inside GitHub Actions
+    gha_summary = os.environ.get("GITHUB_STEP_SUMMARY", "")
+    if gha_summary:
+        with open(gha_summary, "a", encoding="utf-8") as f:
+            f.write(content)
+        sys.stderr.write(f"GitHub Actions step summary written to {gha_summary}\n")
+    else:
+        summary_path = Path(".agent-traces/eval-summary.md")
+        summary_path.parent.mkdir(parents=True, exist_ok=True)
+        summary_path.write_text(content)
+        sys.stderr.write(f"GitHub summary written to {summary_path}\n")
 
 
 def cmd_eval_ci(args: argparse.Namespace) -> int:
diff --git a/tests/test_eval_extensions.py b/tests/test_eval_extensions.py
@@ -398,6 +398,33 @@ def test_github_summary_no_baseline_shows_dashes(self):
             os.chdir(orig)
         self.assertIn("—", summary)
 
+    def test_github_step_summary_env_var_used_when_set(self):
+        """When GITHUB_STEP_SUMMARY is set, write there instead of the local file."""
+        import os, tempfile
+        report = _make_report([("no_errors", 1.0, 1.0, True)])
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
+            gha_path = f.name
+        try:
+            orig_env = os.environ.get("GITHUB_STEP_SUMMARY")
+            os.environ["GITHUB_STEP_SUMMARY"] = gha_path
+            orig = os.getcwd()
+            os.chdir(self.tmp)
+            try:
+                _write_github_summary(report, {}, tolerance=0.0)
+            finally:
+                os.chdir(orig)
+                if orig_env is None:
+                    os.environ.pop("GITHUB_STEP_SUMMARY", None)
+                else:
+                    os.environ["GITHUB_STEP_SUMMARY"] = orig_env
+
+            content = Path(gha_path).read_text()
+            self.assertIn("agent-strace eval", content)
+            # Local fallback file must NOT have been written
+            self.assertFalse((Path(self.tmp) / ".agent-traces" / "eval-summary.md").exists())
+        finally:
+            os.unlink(gha_path)
+
 
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""agent-trace: strace for AI agents."""`
`2`	`2`
`3`		`-__version__ = "0.54.0"`
	`3`	`+__version__ = "0.55.0"`