Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/reviewer-bot-tests/test_reviewer_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,48 @@ def test_trusted_pr_comment_workflow_preflights_same_repo_before_mutation():
assert "RUN_TRUSTED_PR_COMMENT" in workflow_text


def test_pr_comment_observer_routes_through_reviewer_bot_payload_builder():
data = yaml.safe_load(Path(".github/workflows/reviewer-bot-pr-comment-observer.yml").read_text(encoding="utf-8"))
job = data["jobs"]["observer"]
steps = job["steps"]
assert steps[0]["name"] == "Install uv"
assert steps[1]["name"] == "Fetch trusted bot source tarball"
assert steps[2]["name"] == "Build deferred comment artifact"
assert steps[3]["name"] == "Upload deferred comment artifact"
workflow_text = Path(".github/workflows/reviewer-bot-pr-comment-observer.yml").read_text(encoding="utf-8")
assert "build_pr_comment_observer_payload" in workflow_text
assert 'uv run --project "$BOT_SRC_ROOT" python - <<\'PY\'' in workflow_text


def test_build_pr_comment_observer_payload_marks_trusted_direct_same_repo_as_observer_noop(monkeypatch):
monkeypatch.setenv("IS_PULL_REQUEST", "true")
monkeypatch.setenv("GITHUB_REPOSITORY", "rustfoundation/safety-critical-rust-coding-guidelines")
monkeypatch.setenv("COMMENT_USER_TYPE", "User")
monkeypatch.setenv("COMMENT_AUTHOR", "PLeVasseur")
monkeypatch.setenv("COMMENT_AUTHOR_ASSOCIATION", "COLLABORATOR")
monkeypatch.setenv("COMMENT_SENDER_TYPE", "User")
monkeypatch.setenv("COMMENT_INSTALLATION_ID", "")
monkeypatch.setenv("COMMENT_PERFORMED_VIA_GITHUB_APP", "false")
monkeypatch.setenv("COMMENT_BODY", "@guidelines-bot /r? @felix91gr")
monkeypatch.setenv("COMMENT_ID", "100")
monkeypatch.setenv("COMMENT_AUTHOR_ID", "123")
monkeypatch.setenv("COMMENT_CREATED_AT", "2026-03-20T20:48:25Z")
monkeypatch.setenv("GITHUB_RUN_ID", "999")
monkeypatch.setenv("GITHUB_RUN_ATTEMPT", "1")
monkeypatch.setattr(
reviewer_bot,
"github_api",
lambda method, endpoint, data=None: {
"head": {"repo": {"full_name": "rustfoundation/safety-critical-rust-coding-guidelines"}},
"user": {"login": "PLeVasseur"},
},
)
payload = reviewer_bot.build_pr_comment_observer_payload(42)
assert payload["kind"] == "observer_noop"
assert payload["reason"] == "trusted_direct_same_repo_human_comment"
assert payload["source_event_key"] == "issue_comment:100"


def test_issue_comment_direct_workflow_exports_issue_state():
workflow_text = Path(".github/workflows/reviewer-bot-issue-comment-direct.yml").read_text(encoding="utf-8")
assert "ISSUE_STATE: ${{ github.event.issue.state }}" in workflow_text
Expand Down
92 changes: 30 additions & 62 deletions .github/workflows/reviewer-bot-pr-comment-observer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,32 @@ jobs:
permissions:
contents: read
steps:
- name: Install uv
run: python -m pip install uv
- name: Fetch trusted bot source tarball
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python - <<'PY'
import io, os, tarfile, urllib.request
from pathlib import Path

req = urllib.request.Request(
f"https://api.github.com/repos/{os.environ['GITHUB_REPOSITORY']}/tarball/{os.environ['GITHUB_SHA']}",
headers={'Authorization': f"Bearer {os.environ['GITHUB_TOKEN']}", 'Accept': 'application/vnd.github+json'},
)
target = Path(os.environ['RUNNER_TEMP']) / 'reviewer-bot-src'
target.mkdir(parents=True, exist_ok=True)
with urllib.request.urlopen(req) as response:
data = response.read()
with tarfile.open(fileobj=io.BytesIO(data), mode='r:gz') as archive:
archive.extractall(target)
roots = list(target.iterdir())
print(f'BOT_SRC_ROOT={roots[0]}', file=open(os.environ['GITHUB_ENV'], 'a', encoding='utf-8'))
PY
- name: Build deferred comment artifact
env:
BOT_SRC_ROOT: ${{ env.BOT_SRC_ROOT }}
PAYLOAD_PATH: ${{ runner.temp }}/deferred-comment.json
COMMENT_BODY: ${{ github.event.comment.body }}
PR_NUMBER: ${{ github.event.issue.number }}
Expand All @@ -29,68 +53,12 @@ jobs:
COMMENT_INSTALLATION_ID: ${{ github.event.installation.id }}
COMMENT_PERFORMED_VIA_GITHUB_APP: ${{ github.event.comment.performed_via_github_app != null }}
run: |
python - <<'PY'
import hashlib, json, os, re
body = os.environ['COMMENT_BODY'].replace('\r\n', '\n')
normalized = '\n'.join(line.rstrip() for line in body.split('\n')).strip()
comment_user_type = os.environ.get('COMMENT_USER_TYPE', '').strip()
comment_author = os.environ.get('COMMENT_AUTHOR', '').strip()
sender_type = os.environ.get('COMMENT_SENDER_TYPE', '').strip()
installation_id = os.environ.get('COMMENT_INSTALLATION_ID', '').strip()
via_github_app = os.environ.get('COMMENT_PERFORMED_VIA_GITHUB_APP', '').strip().lower()
noop_reason = None
if comment_user_type == 'Bot' or comment_author.endswith('[bot]') or comment_author == 'guidelines-bot':
noop_reason = 'ignored_non_human_automation'
elif installation_id or via_github_app == 'true' or (sender_type and sender_type not in {'User', 'Bot'}):
noop_reason = 'ignored_non_human_automation'
if noop_reason is not None:
payload = {
'schema_version': 1,
'kind': 'observer_noop',
'reason': noop_reason,
'source_workflow_name': 'Reviewer Bot PR Comment Observer',
'source_workflow_file': '.github/workflows/reviewer-bot-pr-comment-observer.yml',
'source_run_id': int(os.environ['GITHUB_RUN_ID']),
'source_run_attempt': int(os.environ['GITHUB_RUN_ATTEMPT']),
'source_event_name': 'issue_comment',
'source_event_action': 'created',
'source_event_key': f"issue_comment:{os.environ['COMMENT_ID']}",
'pr_number': int(os.environ['PR_NUMBER']),
}
else:
command_pattern = re.compile(r'^@guidelines\-bot\s+/[A-Za-z0-9?_\-]+(?:\s+.*)?$')
lines = [line for line in normalized.splitlines() if line.strip()]
command_lines = [line for line in lines if command_pattern.match(line.strip())]
non_command_lines = [line for line in lines if not command_pattern.match(line.strip())]
if not normalized:
comment_class = 'empty_or_whitespace'
elif command_lines and not non_command_lines:
comment_class = 'command_only'
elif command_lines and non_command_lines:
comment_class = 'command_plus_text'
else:
comment_class = 'plain_text'
digest = hashlib.sha256(normalized.encode('utf-8')).hexdigest()
payload = {
'schema_version': 2,
'source_workflow_name': 'Reviewer Bot PR Comment Observer',
'source_workflow_file': '.github/workflows/reviewer-bot-pr-comment-observer.yml',
'source_run_id': int(os.environ['GITHUB_RUN_ID']),
'source_run_attempt': int(os.environ['GITHUB_RUN_ATTEMPT']),
'source_event_name': 'issue_comment',
'source_event_action': 'created',
'source_event_key': f"issue_comment:{os.environ['COMMENT_ID']}",
'pr_number': int(os.environ['PR_NUMBER']),
'comment_id': int(os.environ['COMMENT_ID']),
'comment_class': comment_class,
'has_non_command_text': bool(non_command_lines),
'source_body_digest': digest,
'source_created_at': os.environ['COMMENT_CREATED_AT'],
'actor_login': os.environ['COMMENT_AUTHOR'],
'actor_id': int(os.environ['COMMENT_AUTHOR_ID']),
'actor_class': 'repo_user_principal' if comment_user_type == 'User' else 'unknown_actor',
'source_artifact_name': f"reviewer-bot-comment-context-{os.environ['GITHUB_RUN_ID']}-attempt-{os.environ['GITHUB_RUN_ATTEMPT']}",
}
uv run --project "$BOT_SRC_ROOT" python - <<'PY'
import json
import os
import scripts.reviewer_bot as reviewer_bot

payload = reviewer_bot.build_pr_comment_observer_payload(int(os.environ['PR_NUMBER']))
with open(os.environ['PAYLOAD_PATH'], 'w', encoding='utf-8') as handle:
json.dump(payload, handle)
PY
Expand Down
4 changes: 4 additions & 0 deletions scripts/reviewer_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,10 @@ def handle_comment_event(state: dict) -> bool:
return comment_routing_module.handle_comment_event(_runtime_bot(), state)


def build_pr_comment_observer_payload(issue_number: int) -> dict:
return comment_routing_module.build_pr_comment_observer_payload(_runtime_bot(), issue_number)


def handle_manual_dispatch(state: dict) -> bool:
return maintenance_module.handle_manual_dispatch(_runtime_bot(), state)

Expand Down
83 changes: 71 additions & 12 deletions scripts/reviewer_bot_lib/comment_routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _fetch_pr_metadata(bot, issue_number: int) -> dict:
return pull_request


def route_issue_comment_trust(bot, issue_number: int) -> str:
def classify_pr_comment_processing_target(bot, issue_number: int) -> str:
actor_class = classify_issue_comment_actor()
if actor_class in {"bot_account", "github_app_or_other_automation"} or _is_self_comment(bot, os.environ.get("COMMENT_AUTHOR", "")):
return "safe_noop"
Expand All @@ -136,23 +136,82 @@ def route_issue_comment_trust(bot, issue_number: int) -> str:
pr_author = pull_request.get("user", {}).get("login")
is_dependabot_restricted = pr_author == "dependabot[bot]"
author_association = os.environ.get("COMMENT_AUTHOR_ASSOCIATION", "").strip()
workflow_file = os.environ.get("CURRENT_WORKFLOW_FILE", "").strip()
workflow_ref = os.environ.get("GITHUB_REF", "").strip()
direct_match = (
not is_cross_repo
and not is_dependabot_restricted
and actor_class == "repo_user_principal"
and author_association in bot.AUTHOR_ASSOCIATION_TRUST_ALLOWLIST
and workflow_file == ".github/workflows/reviewer-bot-pr-comment-trusted.yml"
and workflow_ref == "refs/heads/main"
)
trusted_principal = actor_class == "repo_user_principal" and author_association in bot.AUTHOR_ASSOCIATION_TRUST_ALLOWLIST
if is_cross_repo or is_dependabot_restricted:
return "pr_deferred_reconcile"
if direct_match:
if trusted_principal:
return "pr_trusted_direct"
raise RuntimeError("Ambiguous same-repo PR comment trust posture; failing closed")


def route_issue_comment_trust(bot, issue_number: int) -> str:
target = classify_pr_comment_processing_target(bot, issue_number)
if target != "pr_trusted_direct":
return target
workflow_file = os.environ.get("CURRENT_WORKFLOW_FILE", "").strip()
workflow_ref = os.environ.get("GITHUB_REF", "").strip()
if workflow_file == ".github/workflows/reviewer-bot-pr-comment-trusted.yml" and workflow_ref == "refs/heads/main":
return "pr_trusted_direct"
raise RuntimeError("Ambiguous same-repo PR comment trust posture; failing closed")


def build_pr_comment_observer_payload(bot, issue_number: int) -> dict:
actor_class = classify_issue_comment_actor()
comment_id = int(os.environ["COMMENT_ID"])
base_payload = {
"source_workflow_name": "Reviewer Bot PR Comment Observer",
"source_workflow_file": ".github/workflows/reviewer-bot-pr-comment-observer.yml",
"source_run_id": int(os.environ["GITHUB_RUN_ID"]),
"source_run_attempt": int(os.environ["GITHUB_RUN_ATTEMPT"]),
"source_event_name": "issue_comment",
"source_event_action": "created",
"source_event_key": f"issue_comment:{comment_id}",
"pr_number": issue_number,
}
if actor_class in {"bot_account", "github_app_or_other_automation"} or _is_self_comment(bot, os.environ.get("COMMENT_AUTHOR", "")):
return {
"schema_version": 1,
"kind": "observer_noop",
"reason": "ignored_non_human_automation",
**base_payload,
}
processing_target = classify_pr_comment_processing_target(bot, issue_number)
if processing_target == "pr_trusted_direct":
return {
"schema_version": 1,
"kind": "observer_noop",
"reason": "trusted_direct_same_repo_human_comment",
**base_payload,
}
body = os.environ["COMMENT_BODY"]
normalized = _normalize_comment_body(body)
command_pattern = re.compile(r"^@guidelines\-bot\s+/[A-Za-z0-9?_\-]+(?:\s+.*)?$")
lines = [line for line in normalized.splitlines() if line.strip()]
command_lines = [line for line in lines if command_pattern.match(line.strip())]
non_command_lines = [line for line in lines if not command_pattern.match(line.strip())]
if not normalized:
comment_class = "empty_or_whitespace"
elif command_lines and not non_command_lines:
comment_class = "command_only"
elif command_lines and non_command_lines:
comment_class = "command_plus_text"
else:
comment_class = "plain_text"
return {
"schema_version": 2,
**base_payload,
"comment_id": comment_id,
"comment_class": comment_class,
"has_non_command_text": bool(non_command_lines),
"source_body_digest": _digest_body(body),
"source_created_at": os.environ["COMMENT_CREATED_AT"],
"actor_login": os.environ["COMMENT_AUTHOR"],
"actor_id": int(os.environ["COMMENT_AUTHOR_ID"]),
"actor_class": "repo_user_principal" if actor_class == "repo_user_principal" else "unknown_actor",
"source_artifact_name": f"reviewer-bot-comment-context-{os.environ['GITHUB_RUN_ID']}-attempt-{os.environ['GITHUB_RUN_ATTEMPT']}",
}


def _record_conversation_freshness(bot, state: dict, issue_number: int, comment_author: str, comment_id: int, created_at: str) -> bool:
review_data = bot.ensure_review_entry(state, issue_number, create=True)
if review_data is None:
Expand Down
Loading