From 29a528fbd6b8ff97be5c60443047d5d2d193f89f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:16:52 -0700 Subject: [PATCH 01/50] Add methodology analysis phase for RLCR loop exit Add a pre-exit analysis phase that spawns an Opus agent to review development records from a methodology perspective. The agent produces a sanitized report (no project-specific info) and optionally helps the user file a GitHub issue with improvement suggestions. New --privacy flag disables the feature; legacy loops default to privacy=true (opt-in only for new loops). The phase integrates into all three non-manual exit paths (complete, stop, maxiter) using the established Finalize Phase pattern with state file renaming and a completion artifact gate. --- commands/cancel-rlcr-loop.md | 3 +- commands/start-rlcr-loop.md | 2 +- hooks/lib/loop-common.sh | 37 +++- hooks/lib/methodology-analysis.sh | 186 ++++++++++++++++++ hooks/loop-bash-validator.sh | 30 ++- hooks/loop-codex-stop-hook.sh | 45 ++++- hooks/loop-edit-validator.sh | 9 +- hooks/loop-write-validator.sh | 13 +- .../claude/methodology-analysis-prompt.md | 73 +++++++ scripts/cancel-rlcr-loop.sh | 13 +- scripts/lib/monitor-common.sh | 6 +- scripts/setup-rlcr-loop.sh | 7 + skills/humanize/SKILL.md | 4 + 13 files changed, 410 insertions(+), 18 deletions(-) create mode 100644 hooks/lib/methodology-analysis.sh create mode 100644 prompt-template/claude/methodology-analysis-prompt.md diff --git a/commands/cancel-rlcr-loop.md b/commands/cancel-rlcr-loop.md index f6891406..eceb580e 100644 --- a/commands/cancel-rlcr-loop.md +++ b/commands/cancel-rlcr-loop.md @@ -17,6 +17,7 @@ To cancel the active loop: 2. Check the first line of output: - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active RLCR loop found." - **CANCELLED**: Report the cancellation message from the output + - **CANCELLED_METHODOLOGY_ANALYSIS**: Report the cancellation message from the output - **CANCELLED_FINALIZE**: Report the cancellation message from the output - **FINALIZE_NEEDS_CONFIRM**: The loop is in Finalize Phase. Continue to step 3 @@ -33,6 +34,6 @@ To cancel the active loop: - **If user chooses "No, let it finish"**: - Report: "Understood. The Finalize Phase will continue. Once complete, the loop will end normally." -**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop) or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. +**Key principle**: The script handles all cancellation logic. A loop is active if `state.md` (normal loop), `methodology-analysis-state.md` (Methodology Analysis Phase), or `finalize-state.md` (Finalize Phase) exists in the newest loop directory. The loop directory with summaries, review results, and state information will be preserved for reference. diff --git a/commands/start-rlcr-loop.md b/commands/start-rlcr-loop.md index 8d08ce6a..a73ea27f 100644 --- a/commands/start-rlcr-loop.md +++ b/commands/start-rlcr-loop.md @@ -1,6 +1,6 @@ --- description: "Start iterative loop with Codex review" -argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams]" +argument-hint: "[path/to/plan.md | --plan-file path/to/plan.md] [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] [--track-plan-file] [--push-every-round] [--base-branch BRANCH] [--full-review-round N] [--skip-impl] [--claude-answer-codex] [--agent-teams] [--privacy]" allowed-tools: - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-rlcr-loop.sh:*)" - "Read" diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5151018f..ea8f62c7 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -38,6 +38,7 @@ readonly FIELD_FULL_REVIEW_ROUND="full_review_round" readonly FIELD_ASK_CODEX_QUESTION="ask_codex_question" readonly FIELD_SESSION_ID="session_id" readonly FIELD_AGENT_TEAMS="agent_teams" +readonly FIELD_PRIVACY_MODE="privacy_mode" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. @@ -216,7 +217,9 @@ extract_session_id() { resolve_active_state_file() { local loop_dir="$1" - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" elif [[ -f "$loop_dir/state.md" ]]; then echo "$loop_dir/state.md" @@ -234,7 +237,10 @@ resolve_any_state_file() { local loop_dir="$1" # Prefer active states - if [[ -f "$loop_dir/finalize-state.md" ]]; then + if [[ -f "$loop_dir/methodology-analysis-state.md" ]]; then + echo "$loop_dir/methodology-analysis-state.md" + return + elif [[ -f "$loop_dir/finalize-state.md" ]]; then echo "$loop_dir/finalize-state.md" return elif [[ -f "$loop_dir/state.md" ]]; then @@ -364,6 +370,7 @@ _parse_state_fields() { STATE_ASK_CODEX_QUESTION=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_ASK_CODEX_QUESTION}:" | sed "s/${FIELD_ASK_CODEX_QUESTION}: *//" | tr -d ' ' || true) STATE_SESSION_ID=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_SESSION_ID}:" | sed "s/${FIELD_SESSION_ID}: *//" || true) STATE_AGENT_TEAMS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_AGENT_TEAMS}:" | sed "s/${FIELD_AGENT_TEAMS}: *//" | tr -d ' ' || true) + STATE_PRIVACY_MODE=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_PRIVACY_MODE}:" | sed "s/${FIELD_PRIVACY_MODE}: *//" | tr -d ' ' || true) } # Parse state file frontmatter and set variables (tolerant mode with defaults) @@ -406,6 +413,8 @@ parse_state_file() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + # Default privacy_mode to "true" for legacy loops that pre-date this field + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" # STATE_REVIEW_STARTED left as-is (empty if missing, to allow schema validation) return 0 @@ -683,6 +692,21 @@ is_finalize_state_file_path() { echo "$path_lower" | grep -qE 'finalize-state\.md$' } +# Check if a path (lowercase) targets methodology-analysis-state.md +is_methodology_analysis_state_file_path() { + local path_lower="$1" + echo "$path_lower" | grep -qE 'methodology-analysis-state\.md$' +} + +# Standard message for blocking methodology-analysis-state file modifications +methodology_analysis_state_file_blocked_message() { + local fallback="# Methodology Analysis State File Modification Blocked + +You cannot modify methodology-analysis-state.md. This file is managed by the loop system during the Methodology Analysis Phase." + + load_and_render_safe "$TEMPLATE_DIR" "block/methodology-analysis-state-file-modification.md" "$fallback" +} + # Check if a path (lowercase) targets finalize-summary.md is_finalize_summary_path() { local path_lower="$1" @@ -847,7 +871,8 @@ is_cancel_authorized() { src=$(_normalize_path "$src") local expected_src_state="${loop_dir_lower}state.md" local expected_src_finalize="${loop_dir_lower}finalize-state.md" - if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]]; then + local expected_src_methodology="${loop_dir_lower}methodology-analysis-state.md" + if [[ "$src" != "$expected_src_state" ]] && [[ "$src" != "$expected_src_finalize" ]] && [[ "$src" != "$expected_src_methodology" ]]; then return 5 fi @@ -860,9 +885,11 @@ is_cancel_authorized() { # SECURITY: Reject if source file is a symlink (filesystem check) # Determine source file by comparing against expected paths (not substring match) - # This avoids vulnerability when loop directory path contains "finalize" + # This avoids vulnerability when loop directory path contains "finalize" or "methodology" local src_original - if [[ "$src" == "$expected_src_finalize" ]]; then + if [[ "$src" == "$expected_src_methodology" ]]; then + src_original="${active_loop_dir}/methodology-analysis-state.md" + elif [[ "$src" == "$expected_src_finalize" ]]; then src_original="${active_loop_dir}/finalize-state.md" else src_original="${active_loop_dir}/state.md" diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh new file mode 100644 index 00000000..0b61d5ae --- /dev/null +++ b/hooks/lib/methodology-analysis.sh @@ -0,0 +1,186 @@ +#!/bin/bash +# +# Methodology Analysis Phase library +# +# Provides functions for the methodology improvement analysis phase that runs +# before the RLCR loop truly exits. An independent Opus agent analyzes the +# development records from a pure methodology perspective and optionally helps +# the user file a GitHub issue with improvement suggestions. +# +# This library is sourced by loop-codex-stop-hook.sh. +# + +# Source guard: prevent double-sourcing +[[ -n "${_METHODOLOGY_ANALYSIS_LOADED:-}" ]] && return 0 2>/dev/null || true +_METHODOLOGY_ANALYSIS_LOADED=1 + +# Enter the methodology analysis phase +# +# Renames the current state file to methodology-analysis-state.md, records the +# exit reason, renders the analysis prompt, and outputs a block JSON response. +# +# Arguments: +# $1 - exit_reason: "complete", "stop", or "maxiter" +# $2 - exit_reason_description: human-readable explanation of why the loop is exiting +# +# Globals read: +# PRIVACY_MODE - "true" to skip analysis, "false" to proceed +# STATE_FILE - path to the current active state file +# LOOP_DIR - path to the loop directory +# CURRENT_ROUND - current round number +# MAX_ITERATIONS - max iterations setting +# TEMPLATE_DIR - template directory for prompt rendering +# +# Returns: +# 0 - analysis phase entered, block JSON has been output, caller should exit 0 +# 1 - analysis should be skipped (privacy on, already done, or re-entry) +# +enter_methodology_analysis_phase() { + local exit_reason="$1" + local exit_reason_description="$2" + + # Skip if privacy mode is on + if [[ "$PRIVACY_MODE" == "true" ]]; then + echo "Methodology analysis skipped (privacy mode enabled)" >&2 + return 1 + fi + + # Prevent re-entry: if methodology-analysis-state.md already exists, skip + if [[ -f "$LOOP_DIR/methodology-analysis-state.md" ]]; then + echo "Methodology analysis phase already active, skipping re-entry" >&2 + return 1 + fi + + # Skip if already completed in a previous attempt + if [[ -f "$LOOP_DIR/methodology-analysis-done.md" ]]; then + local done_content + done_content=$(cat "$LOOP_DIR/methodology-analysis-done.md" 2>/dev/null || echo "") + if [[ -n "$done_content" ]]; then + echo "Methodology analysis already completed, skipping" >&2 + return 1 + fi + fi + + # Rename current state file to methodology-analysis-state.md + mv "$STATE_FILE" "$LOOP_DIR/methodology-analysis-state.md" + echo "State file renamed to: $LOOP_DIR/methodology-analysis-state.md" >&2 + + # Record the original exit reason so the completion handler can finalize + echo "$exit_reason" > "$LOOP_DIR/.methodology-exit-reason" + + # Create empty placeholder for the completion artifact + touch "$LOOP_DIR/methodology-analysis-done.md" + + # Render prompt template + local fallback="# Methodology Analysis Phase + +Please analyze the development records in $LOOP_DIR and provide methodology improvement suggestions. +Write your analysis to $LOOP_DIR/methodology-analysis-report.md. +When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." + + local analysis_prompt + analysis_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/methodology-analysis-prompt.md" "$fallback" \ + "LOOP_DIR=$LOOP_DIR" \ + "EXIT_REASON=$exit_reason" \ + "EXIT_REASON_DESCRIPTION=$exit_reason_description" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "MAX_ITERATIONS=$MAX_ITERATIONS") + + # Output block JSON with the rendered prompt + jq -n \ + --arg reason "$analysis_prompt" \ + --arg msg "Loop: Methodology Analysis Phase - analyzing development methodology" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + + return 0 +} + +# Complete the methodology analysis phase +# +# Checks the completion artifact, reads the original exit reason, renames the +# state file to the appropriate terminal state, and cleans up marker files. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +# Returns: +# 0 - completion successful, caller should exit 0 (allow exit) +# 1 - completion artifact missing or empty, caller should block +# +complete_methodology_analysis() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + + # Check completion artifact has actual content (not just empty placeholder) + if [[ ! -f "$done_file" ]]; then + return 1 + fi + + local done_content + done_content=$(cat "$done_file" 2>/dev/null || echo "") + if [[ -z "$done_content" ]]; then + return 1 + fi + + # Read exit reason + local exit_reason="complete" + if [[ -f "$LOOP_DIR/.methodology-exit-reason" ]]; then + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "complete") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + fi + + # Validate exit reason + case "$exit_reason" in + complete|stop|maxiter) + ;; + *) + echo "Warning: Invalid methodology exit reason '$exit_reason', defaulting to complete" >&2 + exit_reason="complete" + ;; + esac + + # Rename methodology-analysis-state.md to the terminal state + local target_name="${exit_reason}-state.md" + mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/$target_name" + echo "Methodology analysis complete. State preserved as: $LOOP_DIR/$target_name" >&2 + + # Clean up marker file + rm -f "$LOOP_DIR/.methodology-exit-reason" + + return 0 +} + +# Block exit because methodology analysis is incomplete +# +# Outputs a block JSON instructing Claude to complete the analysis before exiting. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +block_methodology_analysis_incomplete() { + local done_file="$LOOP_DIR/methodology-analysis-done.md" + + local reason="# Methodology Analysis Incomplete + +Please complete the methodology analysis before exiting. + +You need to: +1. Spawn an Opus agent to analyze the development records +2. Review the analysis report +3. Optionally help the user file a GitHub issue +4. Write a completion note to: $done_file + +The completion marker file must contain actual content (not be empty) to signal that the analysis is done." + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Methodology Analysis Phase - please complete the analysis" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' +} diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 948612e1..211e1478 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -148,6 +148,15 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then # 1. command_modifies_file checks if DESTINATION contains state.md # 2. Additional check below catches if SOURCE contains state.md (e.g., mv state.md /tmp/foo) +if command_modifies_file "$COMMAND_LOWER" "methodology-analysis-state\.md"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi + if command_modifies_file "$COMMAND_LOWER" "finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -182,6 +191,7 @@ fi # This catches chained commands like: true; mv state.md /tmp/foo MV_CP_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']state\.md" MV_CP_FINALIZE_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']finalize-state\.md" +MV_CP_METHODOLOGY_SOURCE_PATTERN="^[[:space:]]*(sudo([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(env[[:space:]]+[^;&|]*[[:space:]]+)?(command([[:space:]]+-?[^[:space:];&|]+)*[[:space:]]+)?(mv|cp)[[:space:]].*[[:space:]/\"']methodology-analysis-state\.md" # Replace shell operators with newlines, then check each segment # Order matters: |& before |, && before single & @@ -295,7 +305,17 @@ while IFS= read -r SEGMENT; do t again ') - # Check for finalize-state.md as SOURCE first (more specific pattern) + # Check for methodology-analysis-state.md as SOURCE first (most specific pattern) + if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_METHODOLOGY_SOURCE_PATTERN"; then + # Check for cancel signal file - allow authorized cancel operation + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi + + # Check for finalize-state.md as SOURCE (more specific than state.md) if echo "$SEGMENT_CLEANED" | grep -qE "$MV_CP_FINALIZE_SOURCE_PATTERN"; then # Check for cancel signal file - allow authorized cancel operation if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then @@ -319,6 +339,14 @@ done <<< "$COMMAND_SEGMENTS" # This catches bypass attempts like: sh -c 'mv state.md /tmp/foo' # Pattern: look for sh/bash with -c flag and state.md or finalize-state.md in the payload if echo "$COMMAND_LOWER" | grep -qE "(^|[[:space:]/])(sh|bash)[[:space:]]+-c[[:space:]]"; then + # Shell wrapper detected - check if payload contains mv/cp methodology-analysis-state.md (most specific) + if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*methodology-analysis-state\.md"; then + if is_cancel_authorized "$ACTIVE_LOOP_DIR" "$COMMAND_LOWER"; then + exit 0 + fi + methodology_analysis_state_file_blocked_message >&2 + exit 2 + fi # Shell wrapper detected - check if payload contains mv/cp finalize-state.md (check first, more specific) if echo "$COMMAND_LOWER" | grep -qE "(mv|cp)[[:space:]].*finalize-state\.md"; then # Check for cancel signal file - allow authorized cancel operation diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 25142818..9339713d 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -50,6 +50,9 @@ source "$SCRIPT_DIR/lib/loop-common.sh" PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$PLUGIN_ROOT/scripts/portable-timeout.sh" +# Source methodology analysis library +source "$SCRIPT_DIR/lib/methodology-analysis.sh" + # Default timeout for git operations (30 seconds) GIT_TIMEOUT=30 @@ -80,6 +83,9 @@ fi IS_FINALIZE_PHASE=false [[ "$STATE_FILE" == *"/finalize-state.md" ]] && IS_FINALIZE_PHASE=true +IS_METHODOLOGY_ANALYSIS_PHASE=false +[[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]] && IS_METHODOLOGY_ANALYSIS_PHASE=true + # ======================================== # Parse State File (using shared function) # ======================================== @@ -120,6 +126,7 @@ CODEX_REVIEW_EFFORT="high" CODEX_TIMEOUT="${STATE_CODEX_TIMEOUT:-${CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}}" ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-false}" AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" +PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" BITLESSON_REQUIRED="false" if [[ -n "$RAW_BITLESSON_REQUIRED" ]]; then BITLESSON_REQUIRED=$(echo "$RAW_BITLESSON_REQUIRED" | sed 's/^bitlesson_required:[[:space:]]*//' | tr -d ' "') @@ -675,6 +682,25 @@ Please push before exiting." fi fi +# ======================================== +# Methodology Analysis Phase Completion Handler +# ======================================== +# When in methodology analysis phase, check if the analysis is done. +# If done, rename state to the original exit reason's terminal state. +# If not done, block and ask Claude to complete the analysis. +# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. + +if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then + if complete_methodology_analysis; then + # Analysis complete, allow exit + exit 0 + else + # Analysis not yet complete, block + block_methodology_analysis_incomplete + exit 0 + fi +fi + # ======================================== # Check Summary File Exists # ======================================== @@ -823,6 +849,10 @@ NEXT_ROUND=$((CURRENT_ROUND + 1)) # - Review Phase: must continue until [P?] issues are cleared, regardless of iteration count if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ $NEXT_ROUND -gt $MAX_ITERATIONS ]]; then echo "RLCR loop did not complete, but reached max iterations ($MAX_ITERATIONS). Exiting." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "maxiter" "Reached max iterations ($MAX_ITERATIONS) without completion"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -834,8 +864,12 @@ fi # No Codex review is performed - this is the final step after Codex already confirmed COMPLETE if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then - echo "Finalize Phase complete. All checks passed. Loop finished!" >&2 - # Rename finalize-state.md to complete-state.md + echo "Finalize Phase complete. All checks passed." >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "complete" "All acceptance criteria met and code review passed"; then + exit 0 + fi + # Methodology analysis skipped or already done - proceed with normal exit mv "$STATE_FILE" "$LOOP_DIR/complete-state.md" echo "State preserved as: $LOOP_DIR/complete-state.md" >&2 exit 0 @@ -1547,6 +1581,9 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # Max iterations check if [[ $CURRENT_ROUND -ge $MAX_ITERATIONS ]]; then echo "Codex review passed but at max iterations ($MAX_ITERATIONS). Terminating as MAXITER." >&2 + if enter_methodology_analysis_phase "maxiter" "Codex confirmed COMPLETE but at max iterations ($MAX_ITERATIONS)"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_MAXITER" exit 0 fi @@ -1640,6 +1677,10 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo " $REVIEW_RESULT_FILE" >&2 fi echo "========================================" >&2 + # Try to enter methodology analysis phase before final exit + if enter_methodology_analysis_phase "stop" "Circuit breaker triggered - stagnation detected at round $CURRENT_ROUND"; then + exit 0 + fi end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" exit 0 fi diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..851ad5c0 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -110,9 +110,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Edits (state.md and finalize-state.md) +# Block State File Edits (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..633caf73 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -109,12 +109,12 @@ if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ] exit 0 fi -# For state.md, finalize-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation +# For state.md, finalize-state.md, methodology-analysis-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation # For other files in .humanize/rlcr that aren't summaries, allow them FILENAME=$(basename "$FILE_PATH") IS_PLAN_BACKUP=$([[ "$FILENAME" == "plan.md" ]] && echo "true" || echo "false") if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then - if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then + if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_methodology_analysis_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then exit 0 fi fi @@ -147,9 +147,14 @@ fi CURRENT_ROUND="$STATE_CURRENT_ROUND" # ======================================== -# Block State File Writes (state.md and finalize-state.md) +# Block State File Writes (state.md, finalize-state.md, methodology-analysis-state.md) # ======================================== -# NOTE: Check finalize-state.md FIRST because is_state_file_path also matches finalize-state.md +# NOTE: Check most specific patterns first because is_state_file_path matches any *state.md + +if is_methodology_analysis_state_file_path "$FILE_PATH_LOWER"; then + methodology_analysis_state_file_blocked_message >&2 + exit 2 +fi if is_finalize_state_file_path "$FILE_PATH_LOWER"; then finalize_state_file_blocked_message >&2 diff --git a/prompt-template/claude/methodology-analysis-prompt.md b/prompt-template/claude/methodology-analysis-prompt.md new file mode 100644 index 00000000..f8a7ec4b --- /dev/null +++ b/prompt-template/claude/methodology-analysis-prompt.md @@ -0,0 +1,73 @@ +# Methodology Analysis Phase + +The RLCR loop has reached its exit point. + +**Exit reason**: {{EXIT_REASON}} - {{EXIT_REASON_DESCRIPTION}} +**Rounds completed**: {{CURRENT_ROUND}} of {{MAX_ITERATIONS}} + +Before the loop fully exits, please perform a methodology improvement analysis. This analysis helps improve the Humanize development methodology itself -- it is NOT about the project you just worked on. + +## Instructions + +### 1. Spawn an Opus Agent for Sanitized Analysis + +Use the Agent tool with `model: "opus"` to spawn an analysis agent. Give it this task: + +**Agent prompt**: Read the development records in `{{LOOP_DIR}}`: +- All files matching `round-*-summary.md` +- All files matching `round-*-review-result.md` + +Analyze these records from a **pure methodology perspective** and write your findings to `{{LOOP_DIR}}/methodology-analysis-report.md`. + +**CRITICAL SANITIZATION RULES** - The report MUST NOT contain: +- File paths, directory paths, or module paths +- Function names, variable names, class names, or method names +- Branch names, commit hashes, or git identifiers +- Business domain terms, product names, or feature names +- Code snippets or code fragments of any kind +- Raw error messages or stack traces +- Project-specific URLs or endpoints +- Any information that could identify the specific project + +**Focus areas for analysis**: +- Iteration efficiency: Were rounds productive or did they repeat similar work? +- Feedback loop quality: Did reviewer feedback lead to meaningful improvements? +- Stagnation patterns: Were there signs of going in circles? +- Review effectiveness: Did reviews catch real issues or create false positives? +- Plan-to-execution alignment: Did execution follow the plan or drift? +- Round count vs. progress ratio: Was the number of rounds proportional to progress? +- Communication clarity: Were summaries and reviews clear and actionable? + +**Output format**: Write a structured report with methodology improvement suggestions. Each suggestion should describe a general pattern observed and a concrete improvement to the RLCR methodology. If no improvements are found, write a brief note saying the methodology worked well for this session. + +### 2. Read the Analysis Report + +After the agent completes, read `{{LOOP_DIR}}/methodology-analysis-report.md`. ALL subsequent user-facing content MUST be derived solely from this report -- do NOT reference raw development records directly. + +### 3. Handle Results + +**If no improvements found**: Briefly inform the user that the methodology analysis found no significant improvement suggestions. Then write a completion note to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit. + +**If improvements found**: + +a) Report to the user: + - Brief summary of the exit reason ({{EXIT_REASON}}: {{EXIT_REASON_DESCRIPTION}}) + - Methodology improvement suggestions from the report + +b) Use `AskUserQuestion` to ask if the user would like to help improve Humanize by opening a GitHub issue with these suggestions. Emphasize: + - This is completely voluntary + - The content is fully sanitized (no project-specific information) + - It helps improve the methodology for everyone + +c) **If user declines**: Thank them, write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md`, and exit. + +d) **If user agrees**: + - Draft a GitHub issue title and body from the analysis report + - Show the draft via a second `AskUserQuestion` for the user to review and confirm + - If confirmed: run `gh issue create --repo humania-org/humanize --title "..." --body "..."` + - If `gh` is not available, provide the title and body so the user can create the issue manually + - Write completion marker to `{{LOOP_DIR}}/methodology-analysis-done.md` and exit + +### 4. Completion Marker + +You MUST write meaningful content to `{{LOOP_DIR}}/methodology-analysis-done.md` before exiting. This file signals that the analysis phase is complete. A brief summary of what was done (e.g., "Analysis complete, no suggestions" or "Analysis complete, issue filed") is sufficient. diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 907b051e..8f58b873 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -50,7 +50,7 @@ DESCRIPTION: Cancels the active RLCR loop by: 1. Finding the most recent loop directory 2. Creating a .cancel-requested signal file - 3. Renaming state.md or finalize-state.md to cancel-state.md + 3. Renaming state.md, methodology-analysis-state.md, or finalize-state.md to cancel-state.md HELP_EOF exit 0 ;; @@ -98,11 +98,15 @@ fi STATE_FILE="$LOOP_DIR/state.md" FINALIZE_STATE_FILE="$LOOP_DIR/finalize-state.md" +METHODOLOGY_ANALYSIS_STATE_FILE="$LOOP_DIR/methodology-analysis-state.md" CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" if [[ -f "$STATE_FILE" ]]; then LOOP_STATE="NORMAL_LOOP" ACTIVE_STATE_FILE="$STATE_FILE" +elif [[ -f "$METHODOLOGY_ANALYSIS_STATE_FILE" ]]; then + LOOP_STATE="METHODOLOGY_ANALYSIS_PHASE" + ACTIVE_STATE_FILE="$METHODOLOGY_ANALYSIS_STATE_FILE" elif [[ -f "$FINALIZE_STATE_FILE" ]]; then LOOP_STATE="FINALIZE_PHASE" ACTIVE_STATE_FILE="$FINALIZE_STATE_FILE" @@ -151,6 +155,9 @@ touch "$CANCEL_SIGNAL" # Clean up any pending session_id signal file (setup may not have completed) rm -f "$PROJECT_ROOT/.humanize/.pending-session-id" +# Clean up methodology analysis marker files if present +rm -f "$LOOP_DIR/.methodology-exit-reason" + # Rename state file to cancel-state.md mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" @@ -162,6 +169,10 @@ if [[ "$LOOP_STATE" == "NORMAL_LOOP" ]]; then echo "CANCELLED" echo "Cancelled RLCR loop (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." echo "State preserved as cancel-state.md" +elif [[ "$LOOP_STATE" == "METHODOLOGY_ANALYSIS_PHASE" ]]; then + echo "CANCELLED_METHODOLOGY_ANALYSIS" + echo "Cancelled RLCR loop during Methodology Analysis Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." + echo "State preserved as cancel-state.md" else echo "CANCELLED_FINALIZE" echo "Cancelled RLCR loop during Finalize Phase (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..405b25fd 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -159,7 +159,11 @@ monitor_find_state_file() { return fi - # Priority 1: state.md indicates active loop + # Priority 1: Active state files indicate running loop + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "$session_dir/methodology-analysis-state.md|methodology-analysis" + return + fi if [[ -f "$session_dir/state.md" ]]; then echo "$session_dir/state.md|active" return diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 1b449712..59300bb4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -51,6 +51,7 @@ SKIP_IMPL_NO_PLAN="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="false" BITLESSON_ALLOW_EMPTY_NONE="true" +PRIVACY_MODE="false" show_help() { cat <&2 echo "Use --help for usage information" >&2 @@ -840,6 +846,7 @@ review_started: $INITIAL_REVIEW_STARTED ask_codex_question: $ASK_CODEX_QUESTION session_id: agent_teams: $AGENT_TEAMS +privacy_mode: $PRIVACY_MODE bitlesson_required: $BITLESSON_STATE_VALUE bitlesson_file: $BITLESSON_FILE_REL bitlesson_allow_empty_none: $BITLESSON_ALLOW_EMPTY_NONE diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index c3219d78..f5800f2d 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -96,6 +96,7 @@ Transforms a rough draft document into a structured implementation plan with: - `--push-every-round` - Require git push after each round - `--claude-answer-codex` - Let Claude answer Codex Open Questions directly (default is AskUserQuestion) - `--agent-teams` - Enable Agent Teams mode +- `--privacy` - Disable methodology analysis at loop exit (default: analysis enabled) ### Cancel RLCR Loop @@ -226,6 +227,9 @@ Humanize stores all data in `.humanize/`: │ ├── round-N-review-result.md │ ├── finalize-state.md │ ├── finalize-summary.md +│ ├── methodology-analysis-state.md +│ ├── methodology-analysis-report.md +│ ├── methodology-analysis-done.md │ └── complete-state.md ├── pr-loop/ # PR loop data │ └── / From d3ded24b793e3b48988cb245d279ce44aab25e38 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:21:52 -0700 Subject: [PATCH 02/50] Fix methodology analysis completion: require report and fail closed Address Codex review findings: - Require methodology-analysis-report.md to exist before allowing completion (prevents silent no-op when Opus agent does not run) - Fail closed when .methodology-exit-reason marker is missing or invalid instead of defaulting to "complete" (prevents misreporting stop/maxiter loops as successful) --- hooks/lib/methodology-analysis.sh | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index 0b61d5ae..adbde197 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -109,10 +109,11 @@ When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." # # Returns: # 0 - completion successful, caller should exit 0 (allow exit) -# 1 - completion artifact missing or empty, caller should block +# 1 - incomplete (done marker missing/empty, report missing, or exit reason invalid) # complete_methodology_analysis() { local done_file="$LOOP_DIR/methodology-analysis-done.md" + local report_file="$LOOP_DIR/methodology-analysis-report.md" # Check completion artifact has actual content (not just empty placeholder) if [[ ! -f "$done_file" ]]; then @@ -125,20 +126,29 @@ complete_methodology_analysis() { return 1 fi - # Read exit reason - local exit_reason="complete" - if [[ -f "$LOOP_DIR/.methodology-exit-reason" ]]; then - exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "complete") - exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + # Require the analysis report to exist (ensures the Opus agent actually ran) + if [[ ! -f "$report_file" ]]; then + echo "Warning: methodology-analysis-report.md missing, blocking completion" >&2 + return 1 + fi + + # Read exit reason (fail closed: missing marker blocks completion) + if [[ ! -f "$LOOP_DIR/.methodology-exit-reason" ]]; then + echo "Error: .methodology-exit-reason marker missing, cannot determine terminal state" >&2 + return 1 fi - # Validate exit reason + local exit_reason + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + + # Validate exit reason (fail closed on invalid values) case "$exit_reason" in complete|stop|maxiter) ;; *) - echo "Warning: Invalid methodology exit reason '$exit_reason', defaulting to complete" >&2 - exit_reason="complete" + echo "Error: Invalid methodology exit reason '$exit_reason', blocking completion" >&2 + return 1 ;; esac From 7282f6e0729f3197a2d4cbb4175f587f083951d4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:27:09 -0700 Subject: [PATCH 03/50] Fix read validator to allow historical round access during methodology analysis The Opus analysis agent needs to read all round-*-summary.md and round-*-review-result.md files, but the read validator was blocking access to summaries from non-current rounds. Bypass the round number check when methodology-analysis-state.md is the active state file, while still requiring files to be within the active loop directory. --- hooks/loop-read-validator.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..b483e15f 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -88,9 +88,18 @@ if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi -# Detect if we're in Finalize Phase (finalize-state.md exists) +# Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +# In Methodology Analysis Phase, allow reading all round files (summaries and review results) +# The analysis agent needs access to the full development history +if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then + # Only allow reads within the active loop directory + if [[ "$FILE_PATH" == "$ACTIVE_LOOP_DIR/"* ]]; then + exit 0 + fi +fi + # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then echo "Error: Malformed state file, blocking operation for safety" >&2 From 50e8bbbf1b8d4947da8d182228cc55df09eefecf Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:32:31 -0700 Subject: [PATCH 04/50] Harden methodology analysis read bypass with path canonicalization and allowlist Address Codex review findings: - Use realpath to canonicalize paths before prefix check, preventing directory traversal attacks (e.g., LOOP_DIR/../sensitive-file) - Restrict allowed reads to an explicit allowlist of files the analysis agent actually needs: round summaries, review results, and its own artifacts. This prevents exposing plan.md, prompt files, and other project-specific loop metadata that would undercut sanitization. --- hooks/loop-read-validator.sh | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index b483e15f..a948cd22 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -91,12 +91,26 @@ fi # Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") -# In Methodology Analysis Phase, allow reading all round files (summaries and review results) -# The analysis agent needs access to the full development history +# In Methodology Analysis Phase, allow reading specific analysis-related files only +# The Opus agent needs round summaries, review results, and its own artifacts if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then - # Only allow reads within the active loop directory - if [[ "$FILE_PATH" == "$ACTIVE_LOOP_DIR/"* ]]; then - exit 0 + # Canonicalize to prevent path traversal (e.g., $LOOP_DIR/../secrets) + local_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + local_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$local_real_path" ]] && [[ -n "$local_real_loop" ]] && \ + [[ "$local_real_path" == "$local_real_loop/"* ]]; then + local_basename=$(basename "$local_real_path") + # Allowlist: only files the analysis agent needs + # - round-*-summary.md: development record summaries + # - round-*-review-result.md: Codex review feedback + # - methodology-analysis-report.md: the agent's own output + # - methodology-analysis-done.md: completion marker + # - methodology-analysis-state.md: state file (for parsing) + case "$local_basename" in + round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + exit 0 + ;; + esac fi fi From f34d45f07c698d742bda15d0dc32d652a5abaf4f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:43:08 -0700 Subject: [PATCH 05/50] Enforce read-only project files during methodology analysis phase Add methodology analysis restrictions to all four validators: - Read validator: restrict loop dir reads to allowlisted artifacts only - Write validator: block all writes except methodology report/done marker - Edit validator: block all edits except methodology report/done marker - Bash validator: block git write commands and in-place file editing tools This prevents source code modifications after Codex has signed off and prevents project-specific information from leaking into the analysis report. --- hooks/loop-bash-validator.sh | 29 ++++++++++++++ hooks/loop-edit-validator.sh | 30 ++++++++++++++ hooks/loop-read-validator.sh | 75 +++++++++++++++++++++++------------ hooks/loop-write-validator.sh | 30 ++++++++++++++ 4 files changed, 138 insertions(+), 26 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 211e1478..a39bd9ff 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -97,6 +97,35 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" + # ======================================== + # Methodology Analysis Phase Bash Restriction + # ======================================== + # During methodology analysis, block file-modifying bash commands. + # Only gh commands and read-only operations are allowed. + # This prevents source code modifications after Codex has signed off. + + if [[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]]; then + # Allow gh commands for issue creation + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + exit 0 + fi + # Block git commands that modify the working tree + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + echo "# Bash Blocked During Methodology Analysis + +Git commands that modify the working tree are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block in-place file editing tools (bypass for Write/Edit tool restriction) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install)[[:space:]]' || \ + echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then + echo "# Bash Blocked During Methodology Analysis + +File modification commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + fi + # ======================================== # Block Git Push When push_every_round is false # ======================================== diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 851ad5c0..a52604b7 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -79,6 +79,36 @@ if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then fi fi +# ======================================== +# Methodology Analysis Phase Edit Restriction +# ======================================== +# During methodology analysis, only methodology artifacts can be edited. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the humanize loop dir early exit below. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac + fi + echo "# Edit Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be edited. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 +fi + # ======================================== # Check if File is in .humanize/rlcr # ======================================== diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index a948cd22..e33554a0 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -65,6 +65,53 @@ if is_round_file_type "$FILE_PATH_LOWER" "todos"; then fi fi +# ======================================== +# Methodology Analysis Phase Read Restriction +# ======================================== +# During methodology analysis, restrict reads of files within the loop +# directory to only the artifacts the analysis agent needs. This prevents +# project-specific information from leaking into the analysis report. +# Files outside the loop directory are allowed (Claude needs system files). +# This check MUST come before the summary/prompt early exit below, +# otherwise non-summary/prompt files in the loop dir escape restriction. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$ACTIVE_LOOP_DIR" ]]; then + _MA_STATE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") + if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then + # Canonicalize to prevent path traversal + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + # Allowlist: only files the analysis agent needs + # - round-*-summary.md: development record summaries + # - round-*-review-result.md: review feedback + # - methodology-analysis-report.md: the agent's own output + # - methodology-analysis-done.md: completion marker + # - methodology-analysis-state.md: state file (for parsing) + case "$_ma_basename" in + round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + exit 0 + ;; + *) + echo "# Read Blocked During Methodology Analysis + +Only analysis artifacts can be read from the loop directory during this phase. +Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 + exit 2 + ;; + esac + fi + # Files outside loop dir are allowed (Claude needs system files to function) + exit 0 + fi +fi + # ======================================== # Check for Round Files (summary/prompt) # ======================================== @@ -80,9 +127,8 @@ IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || ec # Find Active Loop and Current Round # ======================================== -PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" -LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" -ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Re-use ACTIVE_LOOP_DIR if already set by methodology analysis check above +ACTIVE_LOOP_DIR="${ACTIVE_LOOP_DIR:-${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}}" if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 @@ -91,29 +137,6 @@ fi # Detect loop phase from state file STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") -# In Methodology Analysis Phase, allow reading specific analysis-related files only -# The Opus agent needs round summaries, review results, and its own artifacts -if [[ "$STATE_FILE_TO_PARSE" == *"/methodology-analysis-state.md" ]]; then - # Canonicalize to prevent path traversal (e.g., $LOOP_DIR/../secrets) - local_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") - local_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$local_real_path" ]] && [[ -n "$local_real_loop" ]] && \ - [[ "$local_real_path" == "$local_real_loop/"* ]]; then - local_basename=$(basename "$local_real_path") - # Allowlist: only files the analysis agent needs - # - round-*-summary.md: development record summaries - # - round-*-review-result.md: Codex review feedback - # - methodology-analysis-report.md: the agent's own output - # - methodology-analysis-done.md: completion marker - # - methodology-analysis-state.md: state file (for parsing) - case "$local_basename" in - round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) - exit 0 - ;; - esac - fi -fi - # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then echo "Error: Malformed state file, blocking operation for safety" >&2 diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 633caf73..4b7fca83 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -96,6 +96,36 @@ if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then fi fi +# ======================================== +# Methodology Analysis Phase Write Restriction +# ======================================== +# During methodology analysis, only methodology artifacts can be written. +# This prevents source code modifications after Codex has signed off. +# This check MUST come before the file type early exits below. + +PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" +LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +_MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" + +if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") + if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ + [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + _ma_basename=$(basename "$_ma_real_path") + case "$_ma_basename" in + methodology-analysis-report.md|methodology-analysis-done.md) + exit 0 + ;; + esac + fi + echo "# Write Blocked During Methodology Analysis + +During the methodology analysis phase, only methodology artifacts can be written. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md" >&2 + exit 2 +fi + # ======================================== # Determine File Types # ======================================== From 87687a2ccf563c0bb51b920acfd38c4a4fbb072d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:49:39 -0700 Subject: [PATCH 06/50] Fix spawned agent bypass and broaden bash restriction coverage All four validators now try unfiltered loop search when session-filtered search returns empty, so spawned agents (with different session_id) are also subject to methodology analysis restrictions. Bash validator now blocks: touch, mv, cp, rm, dd, truncate, chmod, chown, output redirection to non-/dev/ paths, and all git write commands. --- hooks/loop-bash-validator.sh | 79 ++++++++++++++++++++++------------- hooks/loop-edit-validator.sh | 6 +++ hooks/loop-read-validator.sh | 13 ++++-- hooks/loop-write-validator.sh | 6 +++ 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a39bd9ff..27fe3858 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -64,6 +64,56 @@ ACTIVE_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") PR_LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") +# ======================================== +# Methodology Analysis Phase Bash Restriction +# ======================================== +# During methodology analysis, block file-modifying bash commands. +# Only gh commands and read-only operations are allowed. +# This prevents source code modifications after Codex has signed off. +# Uses unfiltered search to also apply to spawned agents with different session_id. + +_MA_BASH_DIR="$ACTIVE_LOOP_DIR" +if [[ -z "$_MA_BASH_DIR" ]]; then + _MA_BASH_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + +if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then + # Allow gh commands for issue creation + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + exit 0 + fi + # Block git commands that modify the working tree + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + echo "# Bash Blocked During Methodology Analysis + +Git write commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block file manipulation commands (touch, mv, cp, rm, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +File modification commands are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block in-place file editing tools + if echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then + echo "# Bash Blocked During Methodology Analysis + +In-place file editing is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block output redirection to files (catches cat > file, echo > file, etc.) + # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > + _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') + if echo "$_ma_stripped" | grep -qE '[>]'; then + echo "# Bash Blocked During Methodology Analysis + +File redirection is not allowed during the methodology analysis phase." >&2 + exit 2 + fi +fi + # If no active loop of either type, allow all commands if [[ -z "$ACTIVE_LOOP_DIR" ]] && [[ -z "$ACTIVE_PR_LOOP_DIR" ]]; then exit 0 @@ -97,35 +147,6 @@ if [[ -n "$ACTIVE_LOOP_DIR" ]]; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" - # ======================================== - # Methodology Analysis Phase Bash Restriction - # ======================================== - # During methodology analysis, block file-modifying bash commands. - # Only gh commands and read-only operations are allowed. - # This prevents source code modifications after Codex has signed off. - - if [[ "$STATE_FILE" == *"/methodology-analysis-state.md" ]]; then - # Allow gh commands for issue creation - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then - exit 0 - fi - # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then - echo "# Bash Blocked During Methodology Analysis - -Git commands that modify the working tree are not allowed during the methodology analysis phase." >&2 - exit 2 - fi - # Block in-place file editing tools (bypass for Write/Edit tool restriction) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install)[[:space:]]' || \ - echo "$COMMAND_LOWER" | grep -qE 'sed[[:space:]]+-i|awk[[:space:]]+-i[[:space:]]+inplace|perl[[:space:]]+-[^[:space:]]*i'; then - echo "# Bash Blocked During Methodology Analysis - -File modification commands are not allowed during the methodology analysis phase." >&2 - exit 2 - fi - fi - # ======================================== # Block Git Push When push_every_round is false # ======================================== diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index a52604b7..52b7ec66 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,6 +90,12 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +if [[ -z "$_MA_LOOP_DIR" ]]; then + _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index e33554a0..efb66e4c 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,12 +79,19 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -if [[ -n "$ACTIVE_LOOP_DIR" ]]; then - _MA_STATE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +_MA_CHECK_DIR="$ACTIVE_LOOP_DIR" +if [[ -z "$_MA_CHECK_DIR" ]]; then + _MA_CHECK_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + +if [[ -n "$_MA_CHECK_DIR" ]]; then + _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then # Canonicalize to prevent path traversal _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") - _ma_real_loop=$(realpath "$ACTIVE_LOOP_DIR" 2>/dev/null || echo "") + _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 4b7fca83..9ce8cdf8 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,6 +107,12 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" +# Spawned agents (e.g., Opus analysis agent) have a different session_id. +# Try unfiltered search to detect methodology analysis phase for them. +if [[ -z "$_MA_LOOP_DIR" ]]; then + _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +fi + if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") From 9888044b0adac29990a56788916cea6af8b673a5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:53:31 -0700 Subject: [PATCH 07/50] Fix realpath for non-existent files and narrow gh allowlist Handle BSD/macOS where realpath fails for non-existent files by resolving the parent directory and appending the basename. This allows the initial Write to methodology-analysis-report.md to succeed. Narrow the bash gh allowlist from all gh commands to only gh issue subcommands, preventing workspace mutations via gh pr checkout, gh repo clone, or mutating gh api calls. --- hooks/loop-bash-validator.sh | 5 +++-- hooks/loop-edit-validator.sh | 5 +++++ hooks/loop-read-validator.sh | 5 +++++ hooks/loop-write-validator.sh | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 27fe3858..285df8e1 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -78,8 +78,9 @@ if [[ -z "$_MA_BASH_DIR" ]]; then fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow gh commands for issue creation - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]] ]]; then + # Allow only gh issue commands (for optional feedback issue creation) + # Block other gh subcommands (pr checkout, repo clone, api, etc.) + if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]]+issue[[:space:]] ]]; then exit 0 fi # Block git commands that modify the working tree diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 52b7ec66..685b44a8 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -97,7 +97,12 @@ if [[ -z "$_MA_LOOP_DIR" ]]; then fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index efb66e4c..428a0bec 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -90,7 +90,12 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") if [[ "$_MA_STATE" == *"/methodology-analysis-state.md" ]]; then # Canonicalize to prevent path traversal + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 9ce8cdf8..775bccbd 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -114,7 +114,12 @@ if [[ -z "$_MA_LOOP_DIR" ]]; then fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") + if [[ -z "$_ma_real_path" ]]; then + _ma_parent=$(realpath "$(dirname "$FILE_PATH")" 2>/dev/null || echo "") + [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" + fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then From 4c48209459159b8521bdd4db459d2780b6898b41 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 21:59:47 -0700 Subject: [PATCH 08/50] Block project-root reads and interpreters during methodology analysis Read validator now blocks reads of files within the project root (not just loop dir) during methodology analysis, while still allowing system files outside the project (CLAUDE.md, configs). This prevents the analysis agent from accessing source code that could leak into the report. Bash validator now blocks common interpreter commands (python, ruby, node, perl, php) during methodology analysis as defense-in-depth against file write bypasses. --- hooks/loop-bash-validator.sh | 7 +++++++ hooks/loop-read-validator.sh | 16 +++++++++++++++- hooks/loop-write-validator.sh | 3 +++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 285df8e1..f9477520 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -104,6 +104,13 @@ File modification commands are not allowed during the methodology analysis phase In-place file editing is not allowed during the methodology analysis phase." >&2 exit 2 fi + # Block common interpreters that could write files (defense-in-depth) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(python[23]?|ruby|node|perl|php)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running interpreters is not allowed during the methodology analysis phase." >&2 + exit 2 + fi # Block output redirection to files (catches cat > file, echo > file, etc.) # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 428a0bec..4bbf55e0 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -119,7 +119,21 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md ;; esac fi - # Files outside loop dir are allowed (Claude needs system files to function) + # Files within the project root are blocked (project-specific information) + # Files outside the project root are allowed (system files, config, etc.) + _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "") + if [[ -n "$_ma_project_real" ]]; then + _ma_path_check="${_ma_real_path:-$FILE_PATH}" + if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \ + [[ "$_ma_path_check" == "$PROJECT_ROOT/"* ]]; then + echo "# Read Blocked During Methodology Analysis + +Reading project files is not allowed during the methodology analysis phase. +Only analysis artifacts within the loop directory can be read. +Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 + exit 2 + fi + fi exit 0 fi fi diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 775bccbd..8fda456c 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -109,6 +109,9 @@ _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID" # Spawned agents (e.g., Opus analysis agent) have a different session_id. # Try unfiltered search to detect methodology analysis phase for them. +# Note: This may briefly affect concurrent sessions in the same repo, but +# methodology analysis is short-lived and this ensures spawned agents +# cannot bypass the write freeze after Codex has signed off. if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") fi From 786ed3259fb11b91c7e90f4dbddf9bfb1804741b Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:09:02 -0700 Subject: [PATCH 09/50] Block shell entry points and fix concurrent session binding in methodology analysis Add find_methodology_analysis_loop() that scans all loop directories for methodology-analysis-state.md instead of using the unfiltered find_active_loop fallback which only returns the newest active loop. This prevents spawned agents from binding to a wrong concurrent session during methodology analysis. Block shell script entry points (bash/sh/zsh, build tools, source/dot commands, direct script execution) in the bash validator during methodology analysis to prevent bypassing file modification restrictions via wrapper binaries. --- hooks/lib/loop-common.sh | 32 ++++++++++++++++++++++++++++++++ hooks/loop-bash-validator.sh | 35 +++++++++++++++++++++++++++++++++-- hooks/loop-edit-validator.sh | 7 ++++--- hooks/loop-read-validator.sh | 7 ++++--- hooks/loop-write-validator.sh | 10 ++++------ 5 files changed, 77 insertions(+), 14 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index ea8f62c7..8b465f5d 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -332,6 +332,38 @@ find_active_loop() { echo "" } +# Find any active loop directory currently in methodology analysis phase. +# Unlike find_active_loop() which returns the newest active loop (possibly the +# wrong one when multiple concurrent sessions exist), this function specifically +# searches for a loop with methodology-analysis-state.md present. +# This ensures spawned agents (which have different session_ids) always bind to +# the correct originating loop during methodology analysis. +# +# Args: +# $1 - loop_base_dir: path to .humanize/rlcr +# +# Outputs the directory path to stdout, or empty string if none found +find_methodology_analysis_loop() { + local loop_base_dir="$1" + + if [[ ! -d "$loop_base_dir" ]]; then + echo "" + return + fi + + local dir + while IFS= read -r dir; do + [[ -z "$dir" ]] && continue + local trimmed_dir="${dir%/}" + if [[ -f "$trimmed_dir/methodology-analysis-state.md" ]]; then + echo "$trimmed_dir" + return + fi + done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) + + echo "" +} + # Extract current round number from state.md # Outputs the round number to stdout, defaults to 0 # Note: For full state parsing, use parse_state_file() instead diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index f9477520..62323f46 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -73,8 +73,11 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Uses unfiltered search to also apply to spawned agents with different session_id. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]]; then - _MA_BASH_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then + # Spawned agents have a different session_id, so session-filtered search may + # miss the originating loop. Use targeted search that scans ALL loops for + # methodology-analysis-state.md to avoid binding to a wrong concurrent session. + _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then @@ -111,6 +114,34 @@ In-place file editing is not allowed during the methodology analysis phase." >&2 Running interpreters is not allowed during the methodology analysis phase." >&2 exit 2 fi + # Block shell script entry points (bash script.sh, sh script.sh, source, .) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(/usr/bin/env[[:space:]]+)?(bash|sh|zsh|/bin/bash|/bin/sh|/bin/zsh)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Running shell scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block build tools that execute arbitrary commands + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(make|cmake|ninja|gradle|mvn|ant|cargo|go[[:space:]]+run|go[[:space:]]+generate|npm[[:space:]]+run|yarn[[:space:]]+run|npx|pnpm)[[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Build tools are not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block source/dot commands (source script.sh, . script.sh) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(source|\.)[ ]+[^[:space:]]'; then + echo "# Bash Blocked During Methodology Analysis + +Sourcing scripts is not allowed during the methodology analysis phase." >&2 + exit 2 + fi + # Block direct script execution (./script.sh, ../script.sh, /path/to/script) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])\.{0,2}/[^[:space:]>|&;]*\.(sh|bash|py|rb|pl|js)'; then + echo "# Bash Blocked During Methodology Analysis + +Direct script execution is not allowed during the methodology analysis phase." >&2 + exit 2 + fi # Block output redirection to files (catches cat > file, echo > file, etc.) # Strip safe redirections (/dev/ paths, fd duplication) then check for remaining > _ma_stripped=$(echo "$COMMAND_LOWER" | sed 's|[0-9]*>[>]*[[:space:]]*/dev/[^[:space:]]*||g; s|[0-9]*>&[0-9]*||g') diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 685b44a8..89604d37 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -91,9 +91,10 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 4bbf55e0..2229ff94 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -80,10 +80,11 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]]; then - _MA_CHECK_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +if [[ -z "$_MA_CHECK_DIR" ]] || [[ ! -f "$_MA_CHECK_DIR/methodology-analysis-state.md" ]]; then + _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_CHECK_DIR" ]]; then diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 8fda456c..2831f428 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -108,12 +108,10 @@ LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" # Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Try unfiltered search to detect methodology analysis phase for them. -# Note: This may briefly affect concurrent sessions in the same repo, but -# methodology analysis is short-lived and this ensures spawned agents -# cannot bypass the write freeze after Codex has signed off. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "") +# Use targeted search that scans ALL loops for methodology-analysis-state.md +# to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then + _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then From f2a12c6947ec187d43ae67f9303cbea1dac5a3ab Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:13:58 -0700 Subject: [PATCH 10/50] Allow cancel script during methodology analysis, remove gh issue early exit Remove the gh issue early exit that short-circuited all subsequent methodology analysis checks. Commands like 'gh issue create; rm file' were bypassing blockers. Now all commands go through the full blocklist; pure gh issue commands pass naturally since they match no blocker. Add cancel-rlcr-loop.sh to the allowlist so the cancel command works during the methodology analysis phase. Document concurrent methodology analysis limitation in find_methodology_analysis_loop. --- hooks/lib/loop-common.sh | 6 ++++++ hooks/loop-bash-validator.sh | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 8b465f5d..6405c4b9 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -339,6 +339,12 @@ find_active_loop() { # This ensures spawned agents (which have different session_ids) always bind to # the correct originating loop during methodology analysis. # +# Limitation: If two loops are simultaneously in methodology analysis, this +# returns the newest one. The older session's spawned agents would bind to the +# wrong loop. This is accepted because concurrent methodology analyses are +# extremely unlikely (the phase is short-lived and requires two active RLCR +# sessions to overlap at this specific point). +# # Args: # $1 - loop_base_dir: path to .humanize/rlcr # diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 62323f46..3f97c775 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -81,9 +81,8 @@ if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow only gh issue commands (for optional feedback issue creation) - # Block other gh subcommands (pr checkout, repo clone, api, etc.) - if [[ "$COMMAND_LOWER" =~ ^[[:space:]]*gh[[:space:]]+issue[[:space:]] ]]; then + # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh'; then exit 0 fi # Block git commands that modify the working tree From 7b91c4701b0bf685e059dab9aaec28228db4e1f7 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:20:17 -0700 Subject: [PATCH 11/50] Scope methodology fallback to spawned agents only, add missing git subcommands Only fall back to find_methodology_analysis_loop when NO session-matched loop was found (spawned agent case). If a session has its own active loop, do NOT search for another session's methodology analysis state -- that would apply restrictions to an unrelated concurrent session. Add git restore, clean, rm, mv to the methodology analysis git command blocklist to prevent working tree modifications after Codex signoff. --- hooks/loop-bash-validator.sh | 10 +++++----- hooks/loop-edit-validator.sh | 8 ++++---- hooks/loop-read-validator.sh | 8 ++++---- hooks/loop-write-validator.sh | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 3f97c775..28c30567 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -73,10 +73,10 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Uses unfiltered search to also apply to spawned agents with different session_id. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]] || [[ ! -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Spawned agents have a different session_id, so session-filtered search may - # miss the originating loop. Use targeted search that scans ALL loops for - # methodology-analysis-state.md to avoid binding to a wrong concurrent session. +if [[ -z "$_MA_BASH_DIR" ]]; then + # Only fall back when NO session-matched loop was found (spawned agent case). + # If the session has its own active loop, do NOT search for another session's + # methodology analysis -- that would incorrectly restrict the current session. _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi @@ -86,7 +86,7 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m exit 0 fi # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push)'; then + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv)'; then echo "# Bash Blocked During Methodology Analysis Git write commands are not allowed during the methodology analysis phase." >&2 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 89604d37..5f6bed92 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,10 +90,10 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. -if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. +if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 2229ff94..6d9c6c38 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,11 +79,11 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]] || [[ ! -f "$_MA_CHECK_DIR/methodology-analysis-state.md" ]]; then +if [[ -z "$_MA_CHECK_DIR" ]]; then _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 2831f428..0239a9e3 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,10 +107,10 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Spawned agents (e.g., Opus analysis agent) have a different session_id. -# Use targeted search that scans ALL loops for methodology-analysis-state.md -# to avoid binding to a wrong concurrent session. -if [[ -z "$_MA_LOOP_DIR" ]] || [[ ! -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then +# Only fall back when NO session-matched loop was found (spawned agent case). +# If the session has its own active loop, do NOT search for another session's +# methodology analysis -- that would incorrectly restrict the current session. +if [[ -z "$_MA_LOOP_DIR" ]]; then _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") fi From 3c8533258a519b03a1d944afc94067678069a8f6 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:26:17 -0700 Subject: [PATCH 12/50] Remove cross-session methodology fallback, add realpath raw path fallback Remove unfiltered find_methodology_analysis_loop fallback from all validators. The fallback incorrectly applied methodology analysis restrictions to unrelated sessions opened in the same repo. Now only the originating session (matched by session_id) gets restricted. Spawned agents rely on their prompt for guidance. Add raw path fallback when realpath is unavailable (older macOS/BSD) to prevent deadlock where the originating session cannot write completion artifacts. --- hooks/loop-bash-validator.sh | 9 +++------ hooks/loop-edit-validator.sh | 16 +++++++--------- hooks/loop-read-validator.sh | 18 ++++++++---------- hooks/loop-write-validator.sh | 16 +++++++--------- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 28c30567..282e6c32 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -72,13 +72,10 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # This prevents source code modifications after Codex has signed off. # Uses unfiltered search to also apply to spawned agents with different session_id. +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_BASH_DIR" ]]; then - # Only fall back when NO session-matched loop was found (spawned agent case). - # If the session has its own active loop, do NOT search for another session's - # methodology analysis -- that would incorrectly restrict the current session. - _MA_BASH_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 5f6bed92..3b5becde 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -88,15 +88,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi - if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") @@ -105,8 +101,10 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in methodology-analysis-report.md|methodology-analysis-done.md) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 6d9c6c38..bddb4776 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -77,15 +77,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" - -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" -if [[ -z "$_MA_CHECK_DIR" ]]; then - _MA_CHECK_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi if [[ -n "$_MA_CHECK_DIR" ]]; then _MA_STATE=$(resolve_active_state_file "$_MA_CHECK_DIR") @@ -98,8 +94,10 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") # Allowlist: only files the analysis agent needs # - round-*-summary.md: development record summaries @@ -122,7 +120,7 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md fi # Files within the project root are blocked (project-specific information) # Files outside the project root are allowed (system files, config, etc.) - _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "") + _ma_project_real=$(realpath "$PROJECT_ROOT" 2>/dev/null || echo "$PROJECT_ROOT") if [[ -n "$_ma_project_real" ]]; then _ma_path_check="${_ma_real_path:-$FILE_PATH}" if [[ "$_ma_path_check" == "$_ma_project_real/"* ]] || \ diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 0239a9e3..5b116391 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -105,15 +105,11 @@ fi PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" +# Use only the session-matched loop. Do NOT fall back to an unfiltered search, +# as that would incorrectly restrict unrelated sessions opened in the same repo. +# Spawned agents (with different session_ids) are guided by their prompt instead. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" -# Only fall back when NO session-matched loop was found (spawned agent case). -# If the session has its own active loop, do NOT search for another session's -# methodology analysis -- that would incorrectly restrict the current session. -if [[ -z "$_MA_LOOP_DIR" ]]; then - _MA_LOOP_DIR=$(find_methodology_analysis_loop "$LOOP_BASE_DIR") -fi - if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then # If realpath fails (file doesn't exist yet on BSD/macOS), resolve parent dir _ma_real_path=$(realpath "$FILE_PATH" 2>/dev/null || echo "") @@ -122,8 +118,10 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m [[ -n "$_ma_parent" ]] && _ma_real_path="$_ma_parent/$(basename "$FILE_PATH")" fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") - if [[ -n "$_ma_real_path" ]] && [[ -n "$_ma_real_loop" ]] && \ - [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then + # Fallback to raw paths when realpath is unavailable (older macOS/BSD) + [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" + [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in methodology-analysis-report.md|methodology-analysis-done.md) From 5c123c2e1208b56c21532bd492e481ee964ee7a5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:33:06 -0700 Subject: [PATCH 13/50] Expand methodology bash blocklist, require non-empty report, fix monitor status Add git switch/pull/clone/submodule/worktree and mkdir/rmdir/ln/mktemp to the methodology analysis bash command blocklist. Require methodology-analysis-report.md to have content (not just exist) before allowing the methodology analysis phase to complete. Add methodology-analysis-state.md to statusline state file resolution and show "Analyzing" status. Treat methodology-analysis as active phase in monitor color. --- hooks/lib/methodology-analysis.sh | 9 ++++++++- hooks/loop-bash-validator.sh | 6 +++--- scripts/lib/monitor-common.sh | 2 +- scripts/statusline.sh | 8 ++++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index adbde197..a7743410 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -126,11 +126,18 @@ complete_methodology_analysis() { return 1 fi - # Require the analysis report to exist (ensures the Opus agent actually ran) + # Require the analysis report to exist with content (ensures the Opus agent + # actually produced an analysis, not just an empty/truncated file) if [[ ! -f "$report_file" ]]; then echo "Warning: methodology-analysis-report.md missing, blocking completion" >&2 return 1 fi + local report_content + report_content=$(cat "$report_file" 2>/dev/null || echo "") + if [[ -z "$report_content" ]]; then + echo "Warning: methodology-analysis-report.md is empty, blocking completion" >&2 + return 1 + fi # Read exit reason (fail closed: missing marker blocks completion) if [[ ! -f "$LOOP_DIR/.methodology-exit-reason" ]]; then diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 282e6c32..1f95d18c 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -83,14 +83,14 @@ if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.m exit 0 fi # Block git commands that modify the working tree - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv)'; then + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])git[[:space:]]+(commit|add|reset|checkout|merge|rebase|cherry-pick|am|apply|stash|push|restore|clean|rm|mv|switch|pull|clone|submodule|worktree)'; then echo "# Bash Blocked During Methodology Analysis Git write commands are not allowed during the methodology analysis phase." >&2 exit 2 fi - # Block file manipulation commands (touch, mv, cp, rm, etc.) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown)[[:space:]]'; then + # Block file manipulation commands (touch, mv, cp, rm, mkdir, ln, etc.) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&])(tee|install|touch|mv|cp|rm|dd|truncate|chmod|chown|mkdir|rmdir|ln|mktemp)[[:space:]]'; then echo "# Bash Blocked During Methodology Analysis File modification commands are not allowed during the methodology analysis phase." >&2 diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 405b25fd..5eb606d5 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -130,7 +130,7 @@ monitor_restore_terminal() { monitor_get_status_color() { local status="$1" case "$status" in - active) echo "\033[1;32m" ;; # green + active|methodology-analysis) echo "\033[1;32m" ;; # green completed) echo "\033[1;36m" ;; # cyan failed|error|timeout) echo "\033[1;31m" ;; # red cancelled) echo "\033[1;33m" ;; # yellow diff --git a/scripts/statusline.sh b/scripts/statusline.sh index b6430052..dafc0bdf 100755 --- a/scripts/statusline.sh +++ b/scripts/statusline.sh @@ -37,7 +37,9 @@ format_duration() { _resolve_rlcr_display() { local session_dir="$1" - if [[ -f "$session_dir/finalize-state.md" ]]; then + if [[ -f "$session_dir/methodology-analysis-state.md" ]]; then + echo "Analyzing" + elif [[ -f "$session_dir/finalize-state.md" ]]; then echo "Finalizing" elif [[ -f "$session_dir/state.md" ]]; then echo "Active" @@ -95,7 +97,9 @@ get_rlcr_status() { [[ -z "$dir" ]] && continue local trimmed="${dir%/}" local any_state="" - if [[ -f "$trimmed/finalize-state.md" ]]; then + if [[ -f "$trimmed/methodology-analysis-state.md" ]]; then + any_state="$trimmed/methodology-analysis-state.md" + elif [[ -f "$trimmed/finalize-state.md" ]]; then any_state="$trimmed/finalize-state.md" elif [[ -f "$trimmed/state.md" ]]; then any_state="$trimmed/state.md" From 03ac7d0e72c469ac0f82e7b28cdc7da4fb27ce9d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:36:56 -0700 Subject: [PATCH 14/50] Tighten cancel allowlist, document spawned agent limitation, remove dead code Require cancel-rlcr-loop.sh to be a standalone command (no shell operators) to prevent chained commands from bypassing methodology analysis restrictions. Document that spawned agents are not restricted by hooks due to session_id mismatch -- their sanitization is enforced by the analysis prompt. This is an inherent limitation of the hook architecture. Remove unused find_methodology_analysis_loop function. --- hooks/lib/loop-common.sh | 37 ----------------------------------- hooks/loop-bash-validator.sh | 14 ++++++++----- hooks/loop-edit-validator.sh | 3 ++- hooks/loop-read-validator.sh | 3 ++- hooks/loop-write-validator.sh | 3 ++- 5 files changed, 15 insertions(+), 45 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 6405c4b9..21813aca 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -332,43 +332,6 @@ find_active_loop() { echo "" } -# Find any active loop directory currently in methodology analysis phase. -# Unlike find_active_loop() which returns the newest active loop (possibly the -# wrong one when multiple concurrent sessions exist), this function specifically -# searches for a loop with methodology-analysis-state.md present. -# This ensures spawned agents (which have different session_ids) always bind to -# the correct originating loop during methodology analysis. -# -# Limitation: If two loops are simultaneously in methodology analysis, this -# returns the newest one. The older session's spawned agents would bind to the -# wrong loop. This is accepted because concurrent methodology analyses are -# extremely unlikely (the phase is short-lived and requires two active RLCR -# sessions to overlap at this specific point). -# -# Args: -# $1 - loop_base_dir: path to .humanize/rlcr -# -# Outputs the directory path to stdout, or empty string if none found -find_methodology_analysis_loop() { - local loop_base_dir="$1" - - if [[ ! -d "$loop_base_dir" ]]; then - echo "" - return - fi - - local dir - while IFS= read -r dir; do - [[ -z "$dir" ]] && continue - local trimmed_dir="${dir%/}" - if [[ -f "$trimmed_dir/methodology-analysis-state.md" ]]; then - echo "$trimmed_dir" - return - fi - done < <(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r) - - echo "" -} # Extract current round number from state.md # Outputs the round number to stdout, defaults to 0 diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 1f95d18c..0192bf62 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -68,18 +68,22 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Methodology Analysis Phase Bash Restriction # ======================================== # During methodology analysis, block file-modifying bash commands. -# Only gh commands and read-only operations are allowed. +# Only read-only operations and cancel-rlcr-loop.sh are allowed. # This prevents source code modifications after Codex has signed off. -# Uses unfiltered search to also apply to spawned agents with different session_id. - +# # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. This is an inherent +# limitation of the hook architecture which cannot distinguish spawned agents +# from unrelated sessions. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh'; then + # Only allow standalone invocation -- reject if chained with shell operators + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh' && \ + ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then exit 0 fi # Block git commands that modify the working tree diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 3b5becde..de5ac333 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -90,7 +90,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index bddb4776..ee33ac1d 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -79,7 +79,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. ACTIVE_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" _MA_CHECK_DIR="$ACTIVE_LOOP_DIR" diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 5b116391..45cb41c5 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -107,7 +107,8 @@ PROJECT_ROOT="${PROJECT_ROOT:-${CLAUDE_PROJECT_DIR:-$(pwd)}}" LOOP_BASE_DIR="${LOOP_BASE_DIR:-$PROJECT_ROOT/.humanize/rlcr}" # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Spawned agents (with different session_ids) are guided by their prompt instead. +# Limitation: Spawned agents (different session_id) are not restricted by hooks; +# their sanitization is enforced by the analysis prompt. _MA_LOOP_DIR="${LOOP_DIR:-$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID")}" if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.md" ]]; then From 5a083299c1a5d9621ff4717299f00e0f362561be Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:41:56 -0700 Subject: [PATCH 15/50] Remove raw records from read allowlist, document bash read limitation Remove round-*-summary.md and round-*-review-result.md from the methodology analysis Read allowlist. The originating session should only read the sanitized methodology-analysis-report.md, not raw development records that contain project-specific information. The spawned agent reads raw records directly (not restricted by hooks due to different session_id). Document that read-only bash commands are intentionally not blocked during methodology analysis -- blocking them would break basic operations. The analysis prompt is the primary enforcement mechanism for sanitization. --- hooks/loop-bash-validator.sh | 13 +++++++++---- hooks/loop-read-validator.sh | 15 ++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 0192bf62..a69ce625 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -71,12 +71,17 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") # Only read-only operations and cancel-rlcr-loop.sh are allowed. # This prevents source code modifications after Codex has signed off. # +# Accepted limitations: +# - Read-only bash commands (cat, grep, find, etc.) are NOT blocked. Blocking +# them would break basic Claude operations. The analysis prompt directs Claude +# to derive user-facing content only from methodology-analysis-report.md. +# - Spawned agents (different session_id) are not restricted by hooks; their +# sanitization is enforced by the analysis prompt. This is an inherent +# limitation of the hook architecture which cannot distinguish spawned agents +# from unrelated sessions. +# # Use only the session-matched loop. Do NOT fall back to an unfiltered search, # as that would incorrectly restrict unrelated sessions opened in the same repo. -# Limitation: Spawned agents (different session_id) are not restricted by hooks; -# their sanitization is enforced by the analysis prompt. This is an inherent -# limitation of the hook architecture which cannot distinguish spawned agents -# from unrelated sessions. _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index ee33ac1d..4c8a7f21 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -100,14 +100,15 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") - # Allowlist: only files the analysis agent needs - # - round-*-summary.md: development record summaries - # - round-*-review-result.md: review feedback - # - methodology-analysis-report.md: the agent's own output - # - methodology-analysis-done.md: completion marker - # - methodology-analysis-state.md: state file (for parsing) + # Allowlist: only methodology artifacts (not raw development records). + # Raw records (round-*-summary.md, round-*-review-result.md) are + # intentionally excluded so the originating session cannot read + # project-specific content and must rely solely on the sanitized + # methodology-analysis-report.md for all user-facing output. + # The spawned Opus agent reads raw records directly (not restricted + # by hooks due to different session_id -- see limitation comment above). case "$_ma_basename" in - round-*-summary.md|round-*-review-result.md|methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) + methodology-analysis-report.md|methodology-analysis-done.md|methodology-analysis-state.md) exit 0 ;; *) From 2cba950a862e496c64b68ac9e735519acd8fcb0e Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:47:07 -0700 Subject: [PATCH 16/50] Fix stale error messages in read validator methodology analysis block Update blocked-read messages to match actual allowlist (methodology-analysis-* files only). Previous messages incorrectly listed round-*-summary.md and round-*-review-result.md which were removed from the allowlist. --- hooks/loop-read-validator.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 4c8a7f21..110b66a1 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -114,8 +114,8 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then *) echo "# Read Blocked During Methodology Analysis -Only analysis artifacts can be read from the loop directory during this phase. -Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 +Only methodology artifacts can be read from the loop directory during this phase. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 exit 2 ;; esac @@ -130,8 +130,8 @@ Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md echo "# Read Blocked During Methodology Analysis Reading project files is not allowed during the methodology analysis phase. -Only analysis artifacts within the loop directory can be read. -Allowed: round-*-summary.md, round-*-review-result.md, methodology-analysis-*.md" >&2 +Only methodology artifacts within the loop directory can be read. +Allowed: methodology-analysis-report.md, methodology-analysis-done.md, methodology-analysis-state.md" >&2 exit 2 fi fi From 3eed7d69e17e683530ae178663acebf6466b9efd Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 22:53:48 -0700 Subject: [PATCH 17/50] Add missing template for methodology analysis state file block message The template was referenced in loop-common.sh but never created, causing the CI template-references test to fail. --- .../methodology-analysis-state-file-modification.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 prompt-template/block/methodology-analysis-state-file-modification.md diff --git a/prompt-template/block/methodology-analysis-state-file-modification.md b/prompt-template/block/methodology-analysis-state-file-modification.md new file mode 100644 index 00000000..16020c14 --- /dev/null +++ b/prompt-template/block/methodology-analysis-state-file-modification.md @@ -0,0 +1,9 @@ +# Methodology Analysis State File Modification Blocked + +You cannot modify `methodology-analysis-state.md`. This file is managed by the loop system during the Methodology Analysis Phase. + +The Methodology Analysis Phase runs before the loop fully exits. Focus on: +1. Spawning an Opus agent to analyze development records +2. Reviewing the sanitized analysis report +3. Optionally helping the user file a GitHub issue with improvement suggestions +4. Writing your completion marker to `methodology-analysis-done.md` From 13a47fb2260667a272b448e8d3c1a521f2382590 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Thu, 12 Mar 2026 23:02:02 -0700 Subject: [PATCH 18/50] Fix cancel allowlist regex and move methodology handler before git-clean The cancel-rlcr-loop.sh allowlist in the bash validator matched when the script name appeared as an argument to another command (e.g. cp). Anchor the regex to the start of the command string so only direct invocations are allowed. Move the methodology analysis completion handler in the stop hook to run before the git-clean check. Writing methodology artifacts can make the working tree appear dirty when .humanize is tracked, which would block exit before the handler ever ran. --- hooks/loop-bash-validator.sh | 6 ++--- hooks/loop-codex-stop-hook.sh | 41 +++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a69ce625..a991dde5 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -85,9 +85,9 @@ ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") _MA_BASH_DIR="$ACTIVE_LOOP_DIR" if [[ -n "$_MA_BASH_DIR" ]] && [[ -f "$_MA_BASH_DIR/methodology-analysis-state.md" ]]; then - # Allow cancel-rlcr-loop.sh (user must be able to cancel during this phase) - # Only allow standalone invocation -- reject if chained with shell operators - if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])([^[:space:]]*/)?cancel-rlcr-loop\.sh' && \ + # Allow cancel-rlcr-loop.sh only as the leading command (not as an argument + # to another command like cp/mv). Reject if chained with shell operators. + if echo "$COMMAND_LOWER" | grep -qE '^[[:space:]]*("?[^"]*/?)?cancel-rlcr-loop\.sh' && \ ! echo "$COMMAND_LOWER" | grep -qE '[;|&]'; then exit 0 fi diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 9339713d..191d2381 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -582,6 +582,28 @@ Split these into smaller modules before continuing." fi fi +# ======================================== +# Methodology Analysis Phase Completion Handler +# ======================================== +# When in methodology analysis phase, check if the analysis is done. +# If done, rename state to the original exit reason's terminal state. +# If not done, block and ask Claude to complete the analysis. +# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. +# IMPORTANT: This MUST run before the git-clean check, because methodology +# artifacts (.humanize/rlcr/...) may make the working tree appear dirty +# if .humanize is tracked, which would block exit before reaching this handler. + +if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then + if complete_methodology_analysis; then + # Analysis complete, allow exit + exit 0 + else + # Analysis not yet complete, block + block_methodology_analysis_incomplete + exit 0 + fi +fi + # ======================================== # Quick Check: Git Clean and Pushed? # ======================================== @@ -682,25 +704,6 @@ Please push before exiting." fi fi -# ======================================== -# Methodology Analysis Phase Completion Handler -# ======================================== -# When in methodology analysis phase, check if the analysis is done. -# If done, rename state to the original exit reason's terminal state. -# If not done, block and ask Claude to complete the analysis. -# All other checks (summary, bitlesson, goal tracker, max iterations) are skipped. - -if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then - if complete_methodology_analysis; then - # Analysis complete, allow exit - exit 0 - else - # Analysis not yet complete, block - block_methodology_analysis_incomplete - exit 0 - fi -fi - # ======================================== # Check Summary File Exists # ======================================== From 9b961a83365863d39d9c87a33f0ce0004cf0ed46 Mon Sep 17 00:00:00 2001 From: tastynoob <934348725@qq.com> Date: Sun, 15 Mar 2026 14:56:07 +0800 Subject: [PATCH 19/50] Harden RLCR against mainline drift --- hooks/check-todos-from-transcript.py | 29 +- hooks/lib/loop-common.sh | 235 +++++++++++++- hooks/loop-bash-validator.sh | 24 +- hooks/loop-codex-stop-hook.sh | 283 ++++++++++++++-- hooks/loop-edit-validator.sh | 63 +++- hooks/loop-read-validator.sh | 55 +++- hooks/loop-write-validator.sh | 77 +++-- .../block/finalize-contract-access.md | 7 + .../block/goal-tracker-modification.md | 29 +- prompt-template/block/mainline-drift-stop.md | 14 + .../block/mainline-verdict-missing.md | 13 + .../block/round-contract-bash-write.md | 7 + .../block/round-contract-missing.md | 13 + .../block/wrong-contract-location.md | 5 + prompt-template/claude/drift-replan-prompt.md | 68 ++++ .../claude/finalize-phase-prompt.md | 7 +- .../claude/finalize-phase-skipped-prompt.md | 7 +- .../claude/goal-tracker-update-request.md | 7 +- prompt-template/claude/next-round-prompt.md | 51 ++- .../claude/post-alignment-action-items.md | 1 + prompt-template/claude/review-phase-prompt.md | 37 ++- .../codex/full-alignment-review.md | 29 +- .../codex/goal-tracker-update-section.md | 13 +- prompt-template/codex/regular-review.md | 21 +- scripts/humanize.sh | 77 ++++- scripts/lib/monitor-common.sh | 49 ++- scripts/setup-rlcr-loop.sh | 304 +++++++++++++++--- .../test-goal-tracker-robustness.sh | 59 ++++ .../robustness/test-hook-system-robustness.sh | 192 ++++++++++- .../test-setup-scripts-robustness.sh | 64 ++++ .../robustness/test-state-file-robustness.sh | 49 +++ tests/test-agent-teams.sh | 57 ++++ tests/test-allowlist-validators.sh | 105 +++++- tests/test-finalize-phase.sh | 209 ++++++++++++ tests/test-plan-file-hooks.sh | 63 +++- tests/test-task-tag-routing.sh | 11 + tests/test-todo-checker.sh | 81 +++++ 37 files changed, 2242 insertions(+), 173 deletions(-) create mode 100644 prompt-template/block/finalize-contract-access.md create mode 100644 prompt-template/block/mainline-drift-stop.md create mode 100644 prompt-template/block/mainline-verdict-missing.md create mode 100644 prompt-template/block/round-contract-bash-write.md create mode 100644 prompt-template/block/round-contract-missing.md create mode 100644 prompt-template/block/wrong-contract-location.md create mode 100644 prompt-template/claude/drift-replan-prompt.md diff --git a/hooks/check-todos-from-transcript.py b/hooks/check-todos-from-transcript.py index af577a5c..31ec6e5e 100755 --- a/hooks/check-todos-from-transcript.py +++ b/hooks/check-todos-from-transcript.py @@ -15,11 +15,26 @@ echo '{"session_id": "...", "transcript_path": "/path/to/transcript.jsonl"}' | python3 check-todos-from-transcript.py """ import json +import re import sys from pathlib import Path from typing import List, Tuple +LANE_PREFIX_PATTERN = re.compile(r"^\s*\[(mainline|blocking|queued)\](?:\s|$)", re.IGNORECASE) + + +def classify_lane(*parts: str) -> str: + """Infer the task lane from content, defaulting to blocking for safety.""" + for part in parts: + if not part: + continue + match = LANE_PREFIX_PATTERN.match(part) + if match: + return match.group(1).lower() + return "blocking" + + def extract_tool_calls_from_entry(entry: dict) -> List[Tuple[str, dict]]: """ Extract tool calls from a transcript entry. @@ -92,10 +107,14 @@ def find_incomplete_todos_from_transcript(transcript_path: Path) -> List[dict]: status = todo.get("status", "") content = todo.get("content", "") if status != "completed": + lane = classify_lane(content) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "todo", + "lane": lane, }) return incomplete @@ -134,11 +153,15 @@ def find_incomplete_tasks_from_directory(session_id: str, tasks_base_dir: str = description = task.get("description", "") task_id = task_file.stem # Filename without .json content = subject or description or f"Task {task_id}" + lane = classify_lane(subject, description) + if lane == "queued": + continue incomplete.append({ "status": status, "content": content, "source": "task", "task_id": task_id, + "lane": lane, }) except (json.JSONDecodeError, OSError): # Skip malformed or unreadable task files @@ -184,11 +207,13 @@ def main(): status = item.get("status", "unknown") content = item.get("content", "") source = item.get("source", "unknown") + lane = item.get("lane", "blocking") + lane_marker = f"[{lane}]" if source == "task": task_id = item.get("task_id", "?") - output_lines.append(f" - [{status}] (Task #{task_id}) {content}") + output_lines.append(f" - [{status}] {lane_marker} (Task #{task_id}) {content}") else: - output_lines.append(f" - [{status}] {content}") + output_lines.append(f" - [{status}] {lane_marker} {content}") # Output marker and incomplete items both to stdout print("INCOMPLETE_TODOS") diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5151018f..b6bc2e5b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -38,6 +38,17 @@ readonly FIELD_FULL_REVIEW_ROUND="full_review_round" readonly FIELD_ASK_CODEX_QUESTION="ask_codex_question" readonly FIELD_SESSION_ID="session_id" readonly FIELD_AGENT_TEAMS="agent_teams" +readonly FIELD_MAINLINE_STALL_COUNT="mainline_stall_count" +readonly FIELD_LAST_MAINLINE_VERDICT="last_mainline_verdict" +readonly FIELD_DRIFT_STATUS="drift_status" + +readonly MAINLINE_VERDICT_ADVANCED="advanced" +readonly MAINLINE_VERDICT_STALLED="stalled" +readonly MAINLINE_VERDICT_REGRESSED="regressed" +readonly MAINLINE_VERDICT_UNKNOWN="unknown" + +readonly DRIFT_STATUS_NORMAL="normal" +readonly DRIFT_STATUS_REPLAN_REQUIRED="replan_required" # Default Codex configuration (single source of truth - all scripts reference this) # Scripts can pre-set DEFAULT_CODEX_MODEL/DEFAULT_CODEX_EFFORT before sourcing to override. @@ -364,6 +375,9 @@ _parse_state_fields() { STATE_ASK_CODEX_QUESTION=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_ASK_CODEX_QUESTION}:" | sed "s/${FIELD_ASK_CODEX_QUESTION}: *//" | tr -d ' ' || true) STATE_SESSION_ID=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_SESSION_ID}:" | sed "s/${FIELD_SESSION_ID}: *//" || true) STATE_AGENT_TEAMS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_AGENT_TEAMS}:" | sed "s/${FIELD_AGENT_TEAMS}: *//" | tr -d ' ' || true) + STATE_MAINLINE_STALL_COUNT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_MAINLINE_STALL_COUNT}:" | sed "s/${FIELD_MAINLINE_STALL_COUNT}: *//" | tr -d ' ' || true) + STATE_LAST_MAINLINE_VERDICT=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_LAST_MAINLINE_VERDICT}:" | sed "s/${FIELD_LAST_MAINLINE_VERDICT}: *//" | tr -d ' ' || true) + STATE_DRIFT_STATUS=$(echo "$STATE_FRONTMATTER" | grep "^${FIELD_DRIFT_STATUS}:" | sed "s/${FIELD_DRIFT_STATUS}: *//" | tr -d ' ' || true) } # Parse state file frontmatter and set variables (tolerant mode with defaults) @@ -384,6 +398,9 @@ _parse_state_fields() { # STATE_FULL_REVIEW_ROUND - interval for Full Alignment Check (default: 5) # STATE_ASK_CODEX_QUESTION - "true" or "false" (v1.6.5+) # STATE_AGENT_TEAMS - "true" or "false" +# STATE_MAINLINE_STALL_COUNT - consecutive stalled/regressed implementation rounds +# STATE_LAST_MAINLINE_VERDICT - advanced/stalled/regressed/unknown +# STATE_DRIFT_STATUS - normal/replan_required # Returns: 0 on success, 1 if file not found # Note: For strict validation, use parse_state_file_strict() instead parse_state_file() { @@ -406,6 +423,9 @@ parse_state_file() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # STATE_REVIEW_STARTED left as-is (empty if missing, to allow schema validation) return 0 @@ -481,10 +501,116 @@ parse_state_file_strict() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" return 0 } +# Normalize mainline progress verdict to a safe enum. +# Usage: normalize_mainline_progress_verdict "ADVANCED" +normalize_mainline_progress_verdict() { + local verdict_lower + verdict_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$verdict_lower" in + "$MAINLINE_VERDICT_ADVANCED"|"$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + echo "$verdict_lower" + ;; + *) + echo "$MAINLINE_VERDICT_UNKNOWN" + ;; + esac +} + +# Normalize drift status to a safe enum. +# Usage: normalize_drift_status "replan_required" +normalize_drift_status() { + local status_lower + status_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + + case "$status_lower" in + "$DRIFT_STATUS_REPLAN_REQUIRED") + echo "$DRIFT_STATUS_REPLAN_REQUIRED" + ;; + *) + echo "$DRIFT_STATUS_NORMAL" + ;; + esac +} + +# Extract "Mainline Progress Verdict" from Codex review content. +# Outputs one of: advanced, stalled, regressed, unknown +# Usage: extract_mainline_progress_verdict "$review_content" +extract_mainline_progress_verdict() { + local review_content="$1" + local verdict_line + local verdict_value + + verdict_line=$(printf '%s\n' "$review_content" | grep -Ei 'Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED)([^A-Za-z]|$)' | tail -1 || true) + if [[ -z "$verdict_line" ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + + verdict_value=$(printf '%s\n' "$verdict_line" | sed -E 's/.*Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED).*/\1/I') + normalize_mainline_progress_verdict "$verdict_value" +} + +# Upsert simple YAML frontmatter fields in a state file. +# Values must not contain newlines. +# Usage: upsert_state_fields "/path/to/state.md" "field=value" "other=value" +upsert_state_fields() { + local state_file="$1" + shift + + local temp_file="${state_file}.tmp.$$" + + awk -v assignments="$*" ' + BEGIN { + count = split(assignments, pairs, " "); + for (i = 1; i <= count; i++) { + split(pairs[i], kv, "="); + keys[kv[1]] = kv[2]; + order[i] = kv[1]; + } + separator_count = 0; + } + { + if ($0 == "---") { + separator_count++; + if (separator_count == 2) { + for (i = 1; i <= count; i++) { + key = order[i]; + if (!(key in seen)) { + print key ": " keys[key]; + seen[key] = 1; + } + } + } + print; + next; + } + + handled = 0; + for (i = 1; i <= count; i++) { + key = order[i]; + if ($0 ~ ("^" key ":")) { + print key ": " keys[key]; + seen[key] = 1; + handled = 1; + break; + } + } + + if (!handled) { + print; + } + } + ' "$state_file" > "$temp_file" && mv "$temp_file" "$state_file" +} + # Detect review issues from codex review log file # Returns: # 0 - issues found (caller should continue review loop) @@ -562,7 +688,7 @@ to_lower() { } # Check if a path (lowercase) matches a round file pattern -# Usage: is_round_file "$lowercase_path" "summary|prompt|todos" +# Usage: is_round_file "$lowercase_path" "summary|prompt|todos|contract" is_round_file_type() { local path_lower="$1" local file_type="$2" @@ -579,7 +705,7 @@ extract_round_number() { filename_lower=$(to_lower "$filename") # Use sed for portable regex extraction (works in both bash and zsh) - echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\)\.md$/\1/p' + echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\|contract\)\.md$/\1/p' } # Check if a file is in the allowlist for the active loop @@ -643,6 +769,21 @@ You cannot modify finalize-state.md. This file is managed by the loop system dur load_and_render_safe "$TEMPLATE_DIR" "block/finalize-state-file-modification.md" "$fallback" } +# Standard message for blocking round contract access during Finalize Phase +# Usage: finalize_contract_blocked_message "read" +finalize_contract_blocked_message() { + local action="$1" + local fallback="# Finalize Contract Access Blocked + +There is no active round contract during the Finalize Phase. + +Do not {{ACTION}} historical round contract files. +Use finalize-summary.md for finalize-only notes and goal-tracker.md for current state." + + load_and_render_safe "$TEMPLATE_DIR" "block/finalize-contract-access.md" "$fallback" \ + "ACTION=$action" +} + # Standard message for blocking summary file modifications via Bash # Usage: summary_bash_blocked_message "$correct_summary_path" summary_bash_blocked_message() { @@ -671,6 +812,79 @@ is_goal_tracker_path() { echo "$path_lower" | grep -qE 'goal-tracker\.md$' } +# Extract the immutable section from a goal-tracker content stream. +# Supports both current trackers (with --- separator) and older trackers +# that jump directly from IMMUTABLE SECTION to MUTABLE SECTION. +extract_goal_tracker_immutable_from_stream() { + awk ' + /^## IMMUTABLE SECTION[[:space:]]*$/ { capture=1 } + capture && /^## MUTABLE SECTION[[:space:]]*$/ { exit } + capture && /^---[[:space:]]*$/ { exit } + capture { print } + ' +} + +# Extract the immutable section from an on-disk goal-tracker file. +# Usage: extract_goal_tracker_immutable_from_file "/path/to/goal-tracker.md" +extract_goal_tracker_immutable_from_file() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + return 1 + fi + extract_goal_tracker_immutable_from_stream < "$tracker_file" +} + +# Extract the immutable section from an in-memory goal-tracker string. +# Usage: extract_goal_tracker_immutable_from_text "$content" +extract_goal_tracker_immutable_from_text() { + local tracker_content="$1" + printf '%s' "$tracker_content" | extract_goal_tracker_immutable_from_stream +} + +# Check whether a proposed goal-tracker update preserves the immutable section. +# Usage: goal_tracker_mutable_update_allowed "/path/to/current.md" "$new_content" +goal_tracker_mutable_update_allowed() { + local tracker_file="$1" + local updated_content="$2" + + local current_immutable="" + local updated_immutable="" + current_immutable=$(extract_goal_tracker_immutable_from_file "$tracker_file" 2>/dev/null || true) + updated_immutable=$(extract_goal_tracker_immutable_from_text "$updated_content" 2>/dev/null || true) + + [[ -n "$current_immutable" ]] || return 1 + [[ "$current_immutable" == "$updated_immutable" ]] +} + +# Render the post-edit contents for a literal Edit operation. +# Returns non-zero if the edit preview cannot be produced. +# Usage: preview_edit_result "/path/to/file" "$old_string" "$new_string" "true|false" +preview_edit_result() { + local file_path="$1" + local old_string="$2" + local new_string="$3" + local replace_all="${4:-false}" + + command -v perl >/dev/null 2>&1 || return 1 + + FILE_PATH="$file_path" \ + OLD_STRING="$old_string" \ + NEW_STRING="$new_string" \ + REPLACE_ALL="$replace_all" \ + perl -0pe ' + BEGIN { + $old = $ENV{"OLD_STRING"}; + $new = $ENV{"NEW_STRING"}; + $replace_all = $ENV{"REPLACE_ALL"} eq "true"; + } + if ($replace_all) { + s/\Q$old\E/$new/g; + } else { + s/\Q$old\E/$new/; + } + ' "$file_path" +} + # Check if a path (lowercase) targets state.md is_state_file_path() { local path_lower="$1" @@ -1275,17 +1489,24 @@ command_modifies_file() { } # Standard message for blocking goal-tracker modifications after Round 0 -# Usage: goal_tracker_blocked_message "$current_round" "$summary_file_path" +# Usage: goal_tracker_blocked_message "$current_round" "$correct_goal_tracker_path" goal_tracker_blocked_message() { local current_round="$1" - local summary_file="$2" - local fallback="# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) + local correct_path="$2" + local fallback="# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) + +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. + +Use Write or Edit on: {{CORRECT_PATH}} -After Round 0, only Codex can modify the Goal Tracker. Include a Goal Tracker Update Request in your summary: {{SUMMARY_FILE}}" +Rules: +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker" load_and_render_safe "$TEMPLATE_DIR" "block/goal-tracker-modification.md" "$fallback" \ "CURRENT_ROUND=$current_round" \ - "SUMMARY_FILE=$summary_file" + "CORRECT_PATH=$correct_path" } # End the loop by renaming state.md to indicate exit reason diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index 948612e1..7a5fdec1 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -6,7 +6,7 @@ # - cat/echo/printf > file.md (redirection) # - tee file.md # - sed -i file.md (in-place edit) -# - goal-tracker.md modifications after Round 0 +# - goal-tracker.md modifications via Bash # - PR loop state.md modifications # - PR loop read-only file modifications (pr-comment, prompt, codex-prompt, etc.) # @@ -359,12 +359,11 @@ fi # Round > 0: prompt to put request in summary if command_modifies_file "$COMMAND_LOWER" "goal-tracker\.md"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" if [[ "$CURRENT_ROUND" -eq 0 ]]; then - GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" goal_tracker_bash_blocked_message "$GOAL_TRACKER_PATH" >&2 else - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 fi exit 2 fi @@ -390,6 +389,23 @@ if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-summary\.md"; then exit 2 fi +# ======================================== +# Block Round Contract File Modifications (All Rounds) +# ======================================== +# Round contracts should be written using Write or Edit tools so round scoping +# stays aligned with the current loop state. + +if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-contract\.md"; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. +Use the Write or Edit tool instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-bash-write.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 +fi + # ======================================== # Block Todos File Modifications (All Rounds) # ======================================== diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 25142818..95783918 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -148,6 +148,9 @@ fi if [[ "$BITLESSON_ALLOW_EMPTY_NONE" != "true" && "$BITLESSON_ALLOW_EMPTY_NONE" != "false" ]]; then BITLESSON_ALLOW_EMPTY_NONE="true" fi +MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" +LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" +DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" # Re-validate Codex Model and Effort for YAML safety (in case state.md was manually edited) # Use same validation patterns as setup-rlcr-loop.sh if [[ ! "$CODEX_EXEC_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then @@ -189,6 +192,13 @@ if [[ ! "$MAX_ITERATIONS" =~ ^[0-9]+$ ]]; then MAX_ITERATIONS=42 fi +if [[ ! "$MAINLINE_STALL_COUNT" =~ ^[0-9]+$ ]]; then + echo "Warning: Invalid mainline_stall_count '$MAINLINE_STALL_COUNT', defaulting to 0" >&2 + MAINLINE_STALL_COUNT=0 +fi +LAST_MAINLINE_VERDICT=$(normalize_mainline_progress_verdict "$LAST_MAINLINE_VERDICT") +DRIFT_STATUS=$(normalize_drift_status "$DRIFT_STATUS") + # ======================================== # Quick-check 0: Schema Validation (v1.1.2+ fields) # ======================================== @@ -682,8 +692,10 @@ fi # In Finalize Phase, expect finalize-summary.md instead of round-N-summary.md if [[ "$IS_FINALIZE_PHASE" == "true" ]]; then SUMMARY_FILE="$LOOP_DIR/finalize-summary.md" + ROUND_CONTRACT_FILE="" else SUMMARY_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + ROUND_CONTRACT_FILE="$LOOP_DIR/round-${CURRENT_ROUND}-contract.md" fi if [[ ! -f "$SUMMARY_FILE" ]]; then @@ -713,6 +725,36 @@ Please write your work summary to: {{SUMMARY_FILE}}" exit 0 fi +# Check Round Contract Exists +# ======================================== + +if [[ "$IS_FINALIZE_PHASE" != "true" ]]; then + if [[ ! -f "$ROUND_CONTRACT_FILE" ]]; then + FALLBACK="# Round Contract Missing + +Before trying to exit, write the current round contract to: {{ROUND_CONTRACT_FILE}} + +The round contract must restate: +- The single mainline objective for this round +- The target ACs +- Which side issues are truly blocking +- Which side issues are queued and out of scope +- The success criteria for this round" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/round-contract-missing.md" "$FALLBACK" \ + "ROUND_CONTRACT_FILE=$ROUND_CONTRACT_FILE") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Round contract missing for round $CURRENT_ROUND" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +fi + # ======================================== # Check BitLesson Delta Section (all non-finalize rounds) # ======================================== @@ -742,7 +784,7 @@ GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" # Skip this check in Finalize Phase, Review Phase, or when review_started is already true (skip-impl mode) # - Finalize Phase: goal tracker was already initialized before COMPLETE -# - Review Phase (review_started=true): skip-impl mode skips implementation, no goal tracker needed +# - Review Phase: later rounds may update only the mutable section, so Round 0 placeholder checks no longer apply if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ "$REVIEW_STARTED" != "true" ]] && [[ "$CURRENT_ROUND" -eq 0 ]] && [[ -f "$GOAL_TRACKER_FILE" ]]; then # Check if goal-tracker.md still contains placeholder text # Extract each section and check for generic placeholder pattern within that section @@ -1235,6 +1277,79 @@ Follow the plan's per-task routing tags strictly: ROUTING_EOF } +# Stop the loop when mainline progress has stalled for too many consecutive rounds. +# Arguments: $1=stall_count, $2=last_verdict +stop_for_mainline_drift() { + local stall_count="$1" + local last_verdict="$2" + + upsert_state_fields "$STATE_FILE" \ + "${FIELD_MAINLINE_STALL_COUNT}=${stall_count}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${last_verdict}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_REPLAN_REQUIRED}" + + local fallback="# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the mainline failed to advance for {{STALL_COUNT}} consecutive implementation rounds. + +- Last mainline verdict: {{LAST_VERDICT}} +- Drift status: replan_required + +This loop should not continue automatically. Revisit the original plan, recover the round contract, and restart with a narrower mainline objective." + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-drift-stop.md" "$fallback" \ + "STALL_COUNT=$stall_count" \ + "LAST_VERDICT=$last_verdict" \ + "PLAN_FILE=$PLAN_FILE") + + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Stopped - mainline drift circuit breaker triggered" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when implementation review output omits the required mainline verdict. +# Arguments: $1=review_result_file, $2=review_prompt_file +block_missing_mainline_verdict() { + local review_result_file="$1" + local review_prompt_file="$2" + + local fallback="# Mainline Verdict Missing + +The implementation review output is missing the required line: + +\`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED\` + +Humanize cannot safely update drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}}" + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-verdict-missing.md" "$fallback" \ + "REVIEW_RESULT_FILE=$review_result_file" \ + "REVIEW_PROMPT_FILE=$review_prompt_file") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - implementation review missing Mainline Progress Verdict" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + # Continue review loop when issues are found # Arguments: $1=round_number, $2=review_content continue_review_loop_with_issues() { @@ -1273,6 +1388,7 @@ continue_review_loop_with_issues() { - Notes: [what changed and why] EOF fi + local next_contract_file="$LOOP_DIR/round-${round}-contract.md" local fallback="# Code Review Findings @@ -1284,14 +1400,35 @@ You are in the **Review Phase** of the RLCR loop. Codex has performed a code rev ## Instructions -1. Address all issues marked with [P0-9] severity markers -2. Focus on fixes only - do not add new features -3. Commit your changes after fixing the issues -4. Write your summary to: {{SUMMARY_FILE}}" +1. Re-anchor on the original plan and current goal tracker before changing code +2. Refresh the round contract at {{ROUND_CONTRACT_FILE}} +3. Address only the issues that are truly blocking the current mainline objective or code-review acceptance +4. Record non-blocking follow-up items as queued, not as the main goal +5. Commit your changes after fixing the issues +6. Write your summary to: {{SUMMARY_FILE}}" load_and_render_safe "$TEMPLATE_DIR" "claude/review-phase-prompt.md" "$fallback" \ "REVIEW_CONTENT=$review_content" \ - "SUMMARY_FILE=$next_summary_file" > "$next_prompt_file" + "SUMMARY_FILE=$next_summary_file" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "ROUND_CONTRACT_FILE=$next_contract_file" \ + "CURRENT_ROUND=$round" > "$next_prompt_file" + if [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$next_prompt_file"; then + cat >> "$next_prompt_file" << EOF + +## BitLesson Selection (REQUIRED FOR EACH FIX TASK) + +Before implementing each fix task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each fix task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF + fi append_task_tag_routing_note "$next_prompt_file" jq -n \ @@ -1536,6 +1673,53 @@ REVIEW_CONTENT=$(cat "$REVIEW_RESULT_FILE") LAST_LINE=$(echo "$REVIEW_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') +NEXT_MAINLINE_STALL_COUNT="$MAINLINE_STALL_COUNT" +NEXT_LAST_MAINLINE_VERDICT="$LAST_MAINLINE_VERDICT" +NEXT_DRIFT_STATUS="$DRIFT_STATUS" +DRIFT_REPLAN_REQUIRED=false +MAINLINE_DRIFT_STOP=false + +if [[ "$REVIEW_STARTED" != "true" ]]; then + EXTRACTED_MAINLINE_VERDICT=$(extract_mainline_progress_verdict "$REVIEW_CONTENT") + + if [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$EXTRACTED_MAINLINE_VERDICT" == "$MAINLINE_VERDICT_UNKNOWN" ]]; then + echo "Implementation review output is missing Mainline Progress Verdict. Blocking exit for safety." >&2 + block_missing_mainline_verdict "$REVIEW_RESULT_FILE" "$REVIEW_PROMPT_FILE" + fi + + case "$EXTRACTED_MAINLINE_VERDICT" in + "$MAINLINE_VERDICT_ADVANCED") + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + ;; + "$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + NEXT_MAINLINE_STALL_COUNT=$((MAINLINE_STALL_COUNT + 1)) + NEXT_LAST_MAINLINE_VERDICT="$EXTRACTED_MAINLINE_VERDICT" + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 2 ]]; then + NEXT_DRIFT_STATUS="$DRIFT_STATUS_REPLAN_REQUIRED" + DRIFT_REPLAN_REQUIRED=true + else + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + fi + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 3 ]]; then + MAINLINE_DRIFT_STOP=true + fi + ;; + *) + : + ;; + esac + + if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + DRIFT_REPLAN_REQUIRED=false + MAINLINE_DRIFT_STOP=false + fi +fi + # Handle COMPLETE - enter Review Phase or Finalize Phase if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then # In review phase, COMPLETE signal is ignored - only absence of [P0-9] triggers finalize @@ -1563,10 +1747,12 @@ if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then else echo "Implementation complete. Entering Review Phase..." >&2 - # Update state to indicate review phase has started - TEMP_FILE="${STATE_FILE}.tmp.$$" - sed "s/^review_started: .*/review_started: true/" "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" + # Update state to indicate review phase has started and clear drift counters. + upsert_state_fields "$STATE_FILE" \ + "${FIELD_REVIEW_STARTED}=true" \ + "${FIELD_MAINLINE_STALL_COUNT}=0" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${MAINLINE_VERDICT_ADVANCED}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_NORMAL}" REVIEW_STARTED="true" # Create marker file to validate review phase was properly entered @@ -1614,6 +1800,11 @@ Use \`/humanize:cancel-rlcr-loop\` to end this loop." run_and_handle_code_review "$((CURRENT_ROUND + 1))" "Loop: Finalize Phase - Code review passed" fi +if [[ "$MAINLINE_DRIFT_STOP" == "true" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$LAST_LINE_TRIMMED" != "$MARKER_COMPLETE" ]]; then + echo "Mainline progress stalled for $NEXT_MAINLINE_STALL_COUNT consecutive rounds. Triggering drift circuit breaker." >&2 + stop_for_mainline_drift "$NEXT_MAINLINE_STALL_COUNT" "$NEXT_LAST_MAINLINE_VERDICT" +fi + # Handle STOP - circuit breaker triggered if [[ "$LAST_LINE_TRIMMED" == "$MARKER_STOP" ]]; then echo "" >&2 @@ -1649,9 +1840,11 @@ fi # ======================================== # Update state file for next round -TEMP_FILE="${STATE_FILE}.tmp.$$" -sed "s/^current_round: .*/current_round: $NEXT_ROUND/" "$STATE_FILE" > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" +upsert_state_fields "$STATE_FILE" \ + "${FIELD_CURRENT_ROUND}=${NEXT_ROUND}" \ + "${FIELD_MAINLINE_STALL_COUNT}=${NEXT_MAINLINE_STALL_COUNT}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${NEXT_LAST_MAINLINE_VERDICT}" \ + "${FIELD_DRIFT_STATUS}=${NEXT_DRIFT_STATUS}" # Create next round prompt NEXT_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-prompt.md" @@ -1678,6 +1871,7 @@ if [[ ! -f "$NEXT_SUMMARY_FILE" ]]; then - Notes: [what changed and why] EOF fi +NEXT_CONTRACT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-contract.md" # Build the next round prompt from templates NEXT_ROUND_FALLBACK="# Next Round Instructions @@ -1692,12 +1886,60 @@ Before executing tasks in this round: ## Codex Review {{REVIEW_CONTENT}} -Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{BITLESSON_FILE}}" -load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ - "PLAN_FILE=$PLAN_FILE" \ - "REVIEW_CONTENT=$REVIEW_CONTENT" \ - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ - "BITLESSON_FILE=$BITLESSON_FILE" > "$NEXT_PROMPT_FILE" +Reference: {{PLAN_FILE}}, {{GOAL_TRACKER_FILE}}, {{ROUND_CONTRACT_FILE}}, {{BITLESSON_FILE}}" +DRIFT_REPLAN_FALLBACK="# Drift Recovery Required + +The mainline has not advanced for {{STALL_COUNT}} consecutive implementation rounds. + +Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results +- Rewrite @{{ROUND_CONTRACT_FILE}} with a recovery-focused mainline objective + +Do not spend this round clearing queued work. Recover mainline progress first. + +## Codex Review +{{REVIEW_CONTENT}}" + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + load_and_render_safe "$TEMPLATE_DIR" "claude/drift-replan-prompt.md" "$DRIFT_REPLAN_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +else + load_and_render_safe "$TEMPLATE_DIR" "claude/next-round-prompt.md" "$NEXT_ROUND_FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "REVIEW_CONTENT=$REVIEW_CONTENT" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "ROUND_CONTRACT_FILE=$NEXT_CONTRACT_FILE" \ + "CURRENT_ROUND=$NEXT_ROUND" \ + "STALL_COUNT=$NEXT_MAINLINE_STALL_COUNT" \ + "LAST_MAINLINE_VERDICT=$NEXT_LAST_MAINLINE_VERDICT" > "$NEXT_PROMPT_FILE" +fi + +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]] && [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$NEXT_PROMPT_FILE"; then + cat >> "$NEXT_PROMPT_FILE" << EOF + +## BitLesson Selection (REQUIRED FOR EACH TASK) + +Before executing each task or sub-task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF +fi if [[ "$AGENT_TEAMS" == "true" ]]; then ENFORCEMENT_BLOCK="**Delegation Warning**: Do NOT implement code yourself in Agent Teams mode; delegate all coding tasks to team members." @@ -1814,6 +2056,9 @@ fi # Build system message SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Codex found issues to address" +if [[ "$DRIFT_REPLAN_REQUIRED" == "true" ]]; then + SYSTEM_MSG="Loop: Round $NEXT_ROUND/$MAX_ITERATIONS - Mainline drift detected, replan required" +fi # Block exit and send review feedback jq -n \ diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..7259dce8 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -6,7 +6,8 @@ # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - State files (managed by hooks, not Claude) -# - Goal tracker after Round 0 +# - Wrong round number contract files +# - Goal tracker edits outside the active loop or that alter the immutable section # - PR loop state files (.humanize/pr-loop/) # - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # @@ -101,6 +102,10 @@ fi # Detect if we're in Finalize Phase (finalize-state.md exists) STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -124,6 +129,11 @@ if is_state_file_path "$FILE_PATH_LOWER"; then exit 2 fi +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "edit" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Edits # ======================================== @@ -139,20 +149,52 @@ if [[ "$FILENAME" == "plan.md" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Edits # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! echo "$HOOK_INPUT" | jq -e '.tool_input | has("old_string") and has("new_string")' >/dev/null 2>&1; then + echo "Error: Missing required field: tool_input.old_string or tool_input.new_string" >&2 + exit 1 + fi + OLD_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.old_string // ""') + if [[ -z "$OLD_STRING" ]]; then + echo "Error: Missing required field: tool_input.old_string" >&2 + exit 1 + fi + + NEW_STRING=$(echo "$HOOK_INPUT" | jq -r '.tool_input.new_string // ""') + REPLACE_ALL=$(echo "$HOOK_INPUT" | jq -r '.tool_input.replace_all // false') + + if ! UPDATED_CONTENT=$(preview_edit_result "$GOAL_TRACKER_PATH" "$OLD_STRING" "$NEW_STRING" "$REPLACE_ALL" 2>/dev/null); then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Validate Summary File Round Number +# Validate Summary/Contract File Round Number # ======================================== -if is_round_file_type "$FILE_PATH_LOWER" "summary"; then +if is_round_file_type "$FILE_PATH_LOWER" "summary" || is_round_file_type "$FILE_PATH_LOWER" "contract"; then # Extract filename from path (portable - works in bash and zsh) CLAUDE_FILENAME=$(echo "$FILE_PATH" | sed -n 's|.*\.humanize/rlcr/[^/]*/\(.*\)$|\1|p') if [[ -z "$CLAUDE_FILENAME" ]]; then @@ -161,9 +203,10 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then if [[ -n "$CLAUDE_FILENAME" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$FILE_PATH_LOWER" == *"-contract.md" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -172,7 +215,7 @@ Edit: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=edit" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..02d15202 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -3,10 +3,11 @@ # PreToolUse Hook: Validate Read access for RLCR loop and PR loop files # # Blocks Claude from reading: -# - Wrong round's prompt/summary files (outdated information) +# - Wrong round's prompt/summary/contract files (outdated information) # - Round files from wrong locations (not in .humanize/rlcr/) # - Round files from old session directories # - Todos files (should use native Task tools instead) +# - goal-tracker.md from old RLCR sessions # # PR loop files (.humanize/pr-loop/) are generally allowed to read # to give Claude access to comments, prompts, and feedback. @@ -66,15 +67,26 @@ if is_round_file_type "$FILE_PATH_LOWER" "todos"; then fi # ======================================== -# Check for Round Files (summary/prompt) +# Check for Restricted RLCR Files # ======================================== -if ! is_round_file_type "$FILE_PATH_LOWER" "summary" && ! is_round_file_type "$FILE_PATH_LOWER" "prompt"; then +IS_GOAL_TRACKER=$(is_goal_tracker_path "$FILE_PATH_LOWER" && echo "true" || echo "false") +IS_ROUND_FILE=$( + if is_round_file_type "$FILE_PATH_LOWER" "summary" || \ + is_round_file_type "$FILE_PATH_LOWER" "prompt" || \ + is_round_file_type "$FILE_PATH_LOWER" "contract"; then + echo "true" + else + echo "false" + fi +) + +IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") +if [[ "$IS_ROUND_FILE" != "true" ]] && ! { [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; }; then exit 0 fi CLAUDE_FILENAME=$(basename "$FILE_PATH") -IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") # ======================================== # Find Active Loop and Current Round @@ -90,6 +102,10 @@ fi # Detect if we're in Finalize Phase (finalize-state.md exists) STATE_FILE_TO_PARSE=$(resolve_active_state_file "$ACTIVE_LOOP_DIR") +IS_FINALIZE_PHASE=false +if [[ "$STATE_FILE_TO_PARSE" == *"/finalize-state.md" ]]; then + IS_FINALIZE_PHASE=true +fi # Parse state file using strict validation (fail closed on malformed state) if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then @@ -98,6 +114,35 @@ if ! parse_state_file_strict "$STATE_FILE_TO_PARSE" 2>/dev/null; then fi CURRENT_ROUND="$STATE_CURRENT_ROUND" +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && is_round_file_type "$FILE_PATH_LOWER" "contract"; then + finalize_contract_blocked_message "read" >&2 + exit 2 +fi + +# ======================================== +# Validate Goal Tracker Path +# ======================================== + +if [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_CORRECT_PATH=$(_normalize_path "$CORRECT_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_CORRECT_PATH" ]]; then + FALLBACK="# Wrong Goal Tracker Path + +Read the active loop goal tracker instead: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-file-location.md" "$FALLBACK" \ + "FILE_PATH=$FILE_PATH" \ + "ACTIVE_LOOP_DIR=$ACTIVE_LOOP_DIR" \ + "CURRENT_ROUND=$CURRENT_ROUND" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 + fi + + exit 0 +fi + # ======================================== # Extract Round Number and File Type # ======================================== @@ -113,6 +158,8 @@ if is_round_file_type "$FILE_PATH_LOWER" "summary"; then FILE_TYPE="summary" elif is_round_file_type "$FILE_PATH_LOWER" "prompt"; then FILE_TYPE="prompt" +elif is_round_file_type "$FILE_PATH_LOWER" "contract"; then + FILE_TYPE="contract" fi # ======================================== diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..9c6bdc4b 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -6,8 +6,9 @@ # - Todos files (should use native Task tools instead) # - Prompt files (read-only, generated by Codex) # - Wrong round number summary files +# - Wrong round number contract files # - Summary files outside .humanize/rlcr/ -# - Goal tracker after Round 0 +# - Goal tracker writes outside the active loop or that alter the immutable section # - PR loop state files (.humanize/pr-loop/) # - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # @@ -101,19 +102,20 @@ fi # ======================================== IS_SUMMARY_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "summary" && echo "true" || echo "false") +IS_CONTRACT_FILE=$(is_round_file_type "$FILE_PATH_LOWER" "contract" && echo "true" || echo "false") IS_FINALIZE_SUMMARY=$(is_finalize_summary_path "$FILE_PATH_LOWER" && echo "true" || echo "false") IN_HUMANIZE_LOOP_DIR=$(is_in_humanize_loop_dir "$FILE_PATH" && echo "true" || echo "false") -# If not a summary file, not a finalize summary, and not in .humanize/rlcr, allow normally -if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then +# If not a summary file, not a contract file, not a finalize summary, and not in .humanize/rlcr, allow normally +if [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then exit 0 fi # For state.md, finalize-state.md, goal-tracker.md, and plan.md in .humanize/rlcr, we need further validation -# For other files in .humanize/rlcr that aren't summaries, allow them +# For other files in .humanize/rlcr that aren't summaries/contracts, allow them FILENAME=$(basename "$FILE_PATH") IS_PLAN_BACKUP=$([[ "$FILENAME" == "plan.md" ]] && echo "true" || echo "false") -if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then +if [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]] && [[ "$IS_SUMMARY_FILE" == "false" ]] && [[ "$IS_CONTRACT_FILE" == "false" ]] && [[ "$IS_FINALIZE_SUMMARY" == "false" ]]; then if ! is_state_file_path "$FILE_PATH_LOWER" && ! is_finalize_state_file_path "$FILE_PATH_LOWER" && ! is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$IS_PLAN_BACKUP" != "true" ]]; then exit 0 fi @@ -174,6 +176,12 @@ if [[ "$IS_FINALIZE_SUMMARY" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true fi fi +# There is no active round contract once the loop has entered Finalize Phase. +if [[ "$IS_FINALIZE_PHASE" == "true" ]] && [[ "$IS_CONTRACT_FILE" == "true" ]]; then + finalize_contract_blocked_message "write to" >&2 + exit 2 +fi + # ======================================== # Block Plan Backup Writes # ======================================== @@ -188,26 +196,54 @@ if [[ "$IS_PLAN_BACKUP" == "true" ]]; then fi # ======================================== -# Block Goal Tracker After Round 0 +# Validate Goal Tracker Writes # ======================================== -if is_goal_tracker_path "$FILE_PATH_LOWER" && [[ "$CURRENT_ROUND" -gt 0 ]]; then - SUMMARY_FILE="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - goal_tracker_blocked_message "$CURRENT_ROUND" "$SUMMARY_FILE" >&2 - exit 2 +if is_goal_tracker_path "$FILE_PATH_LOWER"; then + GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" + NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + + if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + + if [[ "$CURRENT_ROUND" -gt 0 ]]; then + if ! require_tool_input_field "$HOOK_INPUT" "content"; then + exit 1 + fi + + UPDATED_CONTENT=$(echo "$HOOK_INPUT" | jq -r '.tool_input.content // ""') + if ! goal_tracker_mutable_update_allowed "$GOAL_TRACKER_PATH" "$UPDATED_CONTENT"; then + goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 + exit 2 + fi + fi + + exit 0 fi # ======================================== -# Block Summary Files Outside .humanize/rlcr +# Block Summary/Contract Files Outside .humanize/rlcr # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" - FALLBACK="# Wrong Summary Location +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "false" ]]; then + if [[ "$IS_CONTRACT_FILE" == "true" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-contract.md" + FALLBACK="# Wrong Round Contract Location + +Write the round contract to the correct path: {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-contract-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + else + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + FALLBACK="# Wrong Summary Location Write summary to the correct path: {{CORRECT_PATH}}" - load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ - "CORRECT_PATH=$CORRECT_PATH" >&2 + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-summary-location.md" "$FALLBACK" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + fi exit 2 fi @@ -224,14 +260,15 @@ if [[ -z "$CLAUDE_FILENAME" ]]; then fi # ======================================== -# Validate Round Number (for summary files) +# Validate Round Number (for summary/contract files) # ======================================== -if [[ "$IS_SUMMARY_FILE" == "true" ]]; then +if [[ "$IS_SUMMARY_FILE" == "true" || "$IS_CONTRACT_FILE" == "true" ]]; then CLAUDE_ROUND=$(extract_round_number "$CLAUDE_FILENAME") + FILE_TYPE=$([[ "$IS_CONTRACT_FILE" == "true" ]] && echo "contract" || echo "summary") if [[ -n "$CLAUDE_ROUND" ]] && [[ "$CLAUDE_ROUND" != "$CURRENT_ROUND" ]] && ! is_allowlisted_file "$FILE_PATH" "$ACTIVE_LOOP_DIR"; then - CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-summary.md" + CORRECT_PATH="$ACTIVE_LOOP_DIR/round-${CURRENT_ROUND}-${FILE_TYPE}.md" FALLBACK="# Wrong Round Number You tried to {{ACTION}} round-{{CLAUDE_ROUND}}-{{FILE_TYPE}}.md but current round is **{{CURRENT_ROUND}}**. @@ -240,7 +277,7 @@ Write to: {{CORRECT_PATH}}" load_and_render_safe "$TEMPLATE_DIR" "block/wrong-round-number.md" "$FALLBACK" \ "ACTION=write to" \ "CLAUDE_ROUND=$CLAUDE_ROUND" \ - "FILE_TYPE=summary" \ + "FILE_TYPE=$FILE_TYPE" \ "CURRENT_ROUND=$CURRENT_ROUND" \ "CORRECT_PATH=$CORRECT_PATH" >&2 exit 2 diff --git a/prompt-template/block/finalize-contract-access.md b/prompt-template/block/finalize-contract-access.md new file mode 100644 index 00000000..7b757d9d --- /dev/null +++ b/prompt-template/block/finalize-contract-access.md @@ -0,0 +1,7 @@ +# Finalize Contract Access Blocked + +There is no active `round-N-contract.md` during the Finalize Phase. + +- Do not {{ACTION}} historical round contract files. +- Use `finalize-summary.md` for finalize-only notes. +- Use `goal-tracker.md` if you need the current mainline/backlog state. diff --git a/prompt-template/block/goal-tracker-modification.md b/prompt-template/block/goal-tracker-modification.md index f7f26384..30c77835 100644 --- a/prompt-template/block/goal-tracker-modification.md +++ b/prompt-template/block/goal-tracker-modification.md @@ -1,25 +1,14 @@ -# Goal Tracker Modification Blocked (Round {{CURRENT_ROUND}}) +# Goal Tracker Update Blocked (Round {{CURRENT_ROUND}}) -After Round 0, **only Codex can modify the Goal Tracker**. +After Round 0, you may update only the **MUTABLE SECTION** of the active goal tracker. -You CANNOT directly modify `goal-tracker.md` via Write, Edit, or Bash commands. +Use Write or Edit on: +`{{CORRECT_PATH}}` -## How to Request Changes +## Rules -Include a **"Goal Tracker Update Request"** section in your summary file: -`{{SUMMARY_FILE}}` +- Keep the **IMMUTABLE SECTION** unchanged +- Do not modify `goal-tracker.md` via Bash +- Do not write to an old loop session's tracker -Use this format: -```markdown -## Goal Tracker Update Request - -### Requested Changes: -- [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] -- [E.g., "Plan Evolution: changed approach from A to B because..."] - -### Justification: -[Explain why these changes are needed and how they serve the Ultimate Goal] -``` - -Codex will review your request and update the Goal Tracker if the changes are justified. +If you need Codex to correct tracker drift that you could not safely resolve yourself, include an optional `Goal Tracker Update Request` in your summary. diff --git a/prompt-template/block/mainline-drift-stop.md b/prompt-template/block/mainline-drift-stop.md new file mode 100644 index 00000000..2b7cc53d --- /dev/null +++ b/prompt-template/block/mainline-drift-stop.md @@ -0,0 +1,14 @@ +# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the implementation failed to advance the mainline for **{{STALL_COUNT}} consecutive rounds**. + +- Last mainline verdict: `{{LAST_VERDICT}}` +- Plan anchor: `{{PLAN_FILE}}` +- Drift status: `replan_required` + +This loop should not continue automatically. + +Next action: +1. Re-read the original plan +2. Identify why recent rounds kept stalling or regressing +3. Start a fresh RLCR loop with a narrower recovered mainline objective diff --git a/prompt-template/block/mainline-verdict-missing.md b/prompt-template/block/mainline-verdict-missing.md new file mode 100644 index 00000000..bf822e53 --- /dev/null +++ b/prompt-template/block/mainline-verdict-missing.md @@ -0,0 +1,13 @@ +# Mainline Verdict Missing + +The implementation review output is missing the required line: + +`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED` + +Humanize cannot safely update the drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}} diff --git a/prompt-template/block/round-contract-bash-write.md b/prompt-template/block/round-contract-bash-write.md new file mode 100644 index 00000000..bc012ffd --- /dev/null +++ b/prompt-template/block/round-contract-bash-write.md @@ -0,0 +1,7 @@ +# Round Contract Bash Write Blocked + +Do not use Bash commands to modify round contract files. + +Use the `Write` or `Edit` tool instead: + +`{{CORRECT_PATH}}` diff --git a/prompt-template/block/round-contract-missing.md b/prompt-template/block/round-contract-missing.md new file mode 100644 index 00000000..35a822b1 --- /dev/null +++ b/prompt-template/block/round-contract-missing.md @@ -0,0 +1,13 @@ +# Round Contract Missing + +Before you try to exit this round, write the current round contract to: +`{{ROUND_CONTRACT_FILE}}` + +The round contract must restate: +- The single **mainline objective** for this round +- The target ACs +- Which issues are truly **blocking** +- Which issues are **queued** and out of scope +- The concrete success criteria for this round + +Do not continue without a round contract. The loop uses it to prevent goal drift. diff --git a/prompt-template/block/wrong-contract-location.md b/prompt-template/block/wrong-contract-location.md new file mode 100644 index 00000000..04060c5f --- /dev/null +++ b/prompt-template/block/wrong-contract-location.md @@ -0,0 +1,5 @@ +# Wrong Round Contract Location + +Round contract files MUST be in the active loop directory. + +**Correct path**: `{{CORRECT_PATH}}` diff --git a/prompt-template/claude/drift-replan-prompt.md b/prompt-template/claude/drift-replan-prompt.md new file mode 100644 index 00000000..a5970c59 --- /dev/null +++ b/prompt-template/claude/drift-replan-prompt.md @@ -0,0 +1,68 @@ +Your work is not finished. Read and execute the below with ultrathink. + +## Drift Recovery Mode + +Codex judged the recent implementation rounds as failing to advance the mainline. + +- Consecutive stalled/regressed rounds: {{STALL_COUNT}} +- Last mainline verdict: {{LAST_MAINLINE_VERDICT}} + +This round is a **drift recovery round**. Do not continue with normal issue-clearing behavior. + +## Original Implementation Plan + +**IMPORTANT**: Re-anchor on the original plan first: +@{{PLAN_FILE}} + +## Required Recovery Re-anchor + +Before changing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the recent round summaries and review results that led here +- Rewrite the round contract at @{{ROUND_CONTRACT_FILE}} + +Your recovery contract must contain: +- Exactly one recovered **mainline objective** +- The 1-2 target ACs that prove mainline progress this round +- The root cause of recent drift or stagnation +- Which issues are truly **blocking** the recovered mainline objective +- Which issues remain **queued** and explicitly out of scope +- Concrete success criteria that would change the verdict back to `ADVANCED` + +Do not start implementation until the recovery contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances the recovered objective +- `[blocking]` for issues that prevent the recovered mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- This round must prove mainline movement, not just reduce noise +- `[blocking]` work is allowed only when it directly unblocks the recovered mainline objective +- `[queued]` work must stay documented but must NOT replace the recovered objective +- If a new issue does not block the recovered objective, tag it `[queued]` and keep moving on mainline work + +--- +Below is Codex's review result: + +{{REVIEW_CONTENT}} + +--- + +## Goal Tracker Reference + +Before starting work, **read and update** @{{GOAL_TRACKER_FILE}} as needed: +- Keep the immutable section unchanged +- Record the drift/stagnation cause in the mutable section if it changed planning +- Keep blocking vs queued issue classification accurate +- Ensure the tracker and contract now describe the same recovered mainline objective + +## Recovery Guardrails + +- Do not spend this round mostly on queued cleanup +- Do not broaden scope to compensate for previous stalls +- If the original approach was flawed, log the plan evolution explicitly instead of silently changing direction +- If you cannot produce a credible recovered mainline objective, say so in the summary with concrete blockers diff --git a/prompt-template/claude/finalize-phase-prompt.md b/prompt-template/claude/finalize-phase-prompt.md index 4d1c584b..2ee14176 100644 --- a/prompt-template/claude/finalize-phase-prompt.md +++ b/prompt-template/claude/finalize-phase-prompt.md @@ -40,9 +40,10 @@ The code-simplifier agent should focus on: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What simplifications were made diff --git a/prompt-template/claude/finalize-phase-skipped-prompt.md b/prompt-template/claude/finalize-phase-skipped-prompt.md index 654fabdb..5cb01c3c 100644 --- a/prompt-template/claude/finalize-phase-skipped-prompt.md +++ b/prompt-template/claude/finalize-phase-skipped-prompt.md @@ -39,9 +39,10 @@ These constraints are **non-negotiable**: ## Before Exiting -1. Complete all tasks (mark them as completed using TaskUpdate with status "completed") -2. Commit your changes with a descriptive message -3. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** +1. Complete all `[mainline]` and `[blocking]` tasks (mark them as completed using TaskUpdate with status "completed") +2. `[queued]` tasks may remain only if they are documented as non-blocking follow-up work +3. Commit your changes with a descriptive message +4. Write your finalize summary to: **{{FINALIZE_SUMMARY_FILE}}** Your summary should include: - What work was done diff --git a/prompt-template/claude/goal-tracker-update-request.md b/prompt-template/claude/goal-tracker-update-request.md index 4c00d483..b685fd51 100644 --- a/prompt-template/claude/goal-tracker-update-request.md +++ b/prompt-template/claude/goal-tracker-update-request.md @@ -1,11 +1,12 @@ -**If Goal Tracker needs updates**, include this section in your summary: +**Optional fallback**: if you could not safely update the mutable section of `goal-tracker.md` directly, include this section in your summary: ```markdown ## Goal Tracker Update Request ### Requested Changes: - [E.g., "Mark Task X as completed with evidence: tests pass"] -- [E.g., "Add to Open Issues: discovered Y needs addressing"] +- [E.g., "Add to Blocking Side Issues: bug Y blocks AC-2"] +- [E.g., "Add to Queued Side Issues: cleanup Z is non-blocking"] - [E.g., "Plan Evolution: changed approach from A to B because..."] - [E.g., "Defer Task Z because... (impact on AC: none/minimal)"] @@ -13,4 +14,4 @@ [Explain why these changes are needed and how they serve the Ultimate Goal] ``` -Codex will review your request and update the Goal Tracker if justified. +Codex will review your request and reconcile the Goal Tracker if justified. diff --git a/prompt-template/claude/next-round-prompt.md b/prompt-template/claude/next-round-prompt.md index b3aaff01..fd1b1cfe 100644 --- a/prompt-template/claude/next-round-prompt.md +++ b/prompt-template/claude/next-round-prompt.md @@ -9,8 +9,35 @@ This plan contains the full scope of work and requirements. Ensure your work ali --- -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +## Round Re-anchor (REQUIRED FIRST STEP) + +Before writing code: +- Re-read @{{PLAN_FILE}} +- Re-read @{{GOAL_TRACKER_FILE}} +- Re-read the most recent round summaries/reviews that led to this round +- Write the current round contract to @{{ROUND_CONTRACT_FILE}} + +Your round contract must contain: +- Exactly one **mainline objective** +- The 1-2 target ACs for this round +- Which issues are truly **blocking** that mainline objective +- Which issues are **queued** and explicitly out of scope +- Concrete success criteria for this round + +Do not start implementation until the round contract exists. + +## Task Lane Rules + +Use the Task system (TaskCreate, TaskUpdate, TaskList) with one required tag per task: +- `[mainline]` for plan-derived work that directly advances this round's objective +- `[blocking]` for issues that prevent the mainline objective from succeeding safely +- `[queued]` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- `[mainline]` work is the round's primary success condition +- `[blocking]` work is allowed only when it truly blocks the mainline objective +- `[queued]` work must be documented but must NOT replace the round objective +- If a new bug does not block the current objective, tag it `[queued]` and keep moving on mainline work Before executing each task in this round: 1. Read @{{BITLESSON_FILE}} @@ -24,13 +51,25 @@ Below is Codex's review result: --- -## Goal Tracker Reference (READ-ONLY after Round 0) +## Goal Tracker Reference Before starting work, **read** @{{GOAL_TRACKER_FILE}} to understand: - The Ultimate Goal and Acceptance Criteria you're working toward - Which tasks are Active, Completed, or Deferred +- Which side issues are blocking vs queued - Any Plan Evolution that has occurred -- Open Issues that need attention +- The latest side-issue state that needs attention + +**IMPORTANT**: Keep the mutable section of `goal-tracker.md` up to date during the round. +Do NOT change the immutable section after Round 0. +If you cannot safely reconcile the tracker yourself, include an optional "Goal Tracker Update Request" section in your summary (see below). + +## Mainline Guardrails -**IMPORTANT**: You CANNOT directly modify goal-tracker.md after Round 0. -If you need to update the Goal Tracker, include a "Goal Tracker Update Request" section in your summary (see below). +- Keep the mainline objective from @{{ROUND_CONTRACT_FILE}} stable for this round +- Do not let queued issues take over the round +- If Codex reported several findings, classify them into: + - mainline gaps + - blocking side issues + - queued side issues +- Only mainline gaps and blocking side issues should drive the next code changes diff --git a/prompt-template/claude/post-alignment-action-items.md b/prompt-template/claude/post-alignment-action-items.md index 28611ec0..c78e95d0 100644 --- a/prompt-template/claude/post-alignment-action-items.md +++ b/prompt-template/claude/post-alignment-action-items.md @@ -5,3 +5,4 @@ This round follows a Full Goal Alignment Check. Pay special attention to: - **Forgotten Items**: Codex may have identified tasks that were being ignored. Address them. - **AC Status**: If any Acceptance Criteria were marked NOT MET, prioritize work toward those. - **Deferred Items**: If any deferrals were flagged as unjustified, un-defer them now. +- **Queued Issues**: Keep non-blocking follow-up work queued unless it now clearly blocks mainline progress. diff --git a/prompt-template/claude/review-phase-prompt.md b/prompt-template/claude/review-phase-prompt.md index 158ca0f0..e180e418 100644 --- a/prompt-template/claude/review-phase-prompt.md +++ b/prompt-template/claude/review-phase-prompt.md @@ -2,14 +2,39 @@ You are in the **Review Phase**. Codex has performed a code review and found issues that need to be addressed. +## Required Re-anchor + +Before touching code: +- Re-read the original plan at @{{PLAN_FILE}} +- Re-read the goal tracker at @{{GOAL_TRACKER_FILE}} +- Refresh the current round contract at @{{ROUND_CONTRACT_FILE}} + +The round contract must preserve a single mainline objective. Code review findings do NOT automatically become the new round objective. + ## Review Results {{REVIEW_CONTENT}} +## Issue Classification + +Classify each review finding before acting on it: +- **blocking side issue**: prevents the current mainline objective from succeeding safely or prevents review acceptance +- **queued side issue**: valid follow-up, but does not block the current round objective + +Queued issues may be documented, but they must NOT take over the round. + +## Task Rules + +Every task must use one lane tag: +- `[blocking]` for review findings that must be fixed now +- `[queued]` for non-blocking follow-up work + +Do not create new `[mainline]` tasks in review phase unless the review proves the previous mainline objective was incomplete. + ## Instructions -1. **Read `.humanize/bitlesson.md` and run `bitlesson-selector`** for each fix task before coding -2. **Address all issues** marked with `[P0-9]` severity markers +1. **Refresh the round contract** at `{{ROUND_CONTRACT_FILE}}` +2. **Address blocking issues first** and keep the mainline objective stable 3. **Focus on fixes only** - do not add new features or make unrelated changes 4. **Commit your changes** after fixing the issues 5. **Write your summary** to: `{{SUMMARY_FILE}}` @@ -17,9 +42,13 @@ You are in the **Review Phase**. Codex has performed a code review and found iss ## Summary Template Your summary should include: -- Which issues were fixed -- How each issue was resolved +- The mainline objective for this round +- Which blocking issues were fixed +- Which issues were reclassified as queued follow-up +- How each fixed issue was resolved - Any issues that could not be resolved (with explanation) +- Confirmation that `goal-tracker.md` was updated if the blocking/queued issue lists changed +- A Goal Tracker Update Request only if tracker reconciliation still needs Codex help ## Important Notes diff --git a/prompt-template/codex/full-alignment-review.md b/prompt-template/codex/full-alignment-review.md index d8ced81b..02997dd8 100644 --- a/prompt-template/codex/full-alignment-review.md +++ b/prompt-template/codex/full-alignment-review.md @@ -47,16 +47,32 @@ Estimated remaining rounds: ? Critical blockers: [list if any] ``` -## Part 2: Implementation Review +## Part 2: Mainline Drift Audit (MANDATORY) + +Determine whether the recent rounds are still serving the original plan: +- Is the current round's mainline objective clear and singular? +- Has Claude been advancing mainline ACs, or mostly clearing side issues? +- Which findings are true **blocking side issues** versus merely **queued side issues**? + +Include a short drift summary: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +Blocking Side Issues: N +Queued Side Issues: N +``` + +The `Mainline Progress Verdict` line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +## Part 3: Implementation Review - Conduct a deep critical review of the implementation - Verify Claude's claims match reality - Identify any gaps, bugs, or incomplete work - Reference @{{DOCS_PATH}} for design documents -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} -## Part 4: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) +## Part 5: Progress Stagnation Check (MANDATORY for Full Alignment Rounds) To implement the original plan at @{{PLAN_FILE}}, we have completed **{{COMPLETED_ITERATIONS}} iterations** (Round 0 to Round {{CURRENT_ROUND}}). @@ -83,10 +99,13 @@ The project's `.humanize/rlcr/{{LOOP_TIMESTAMP}}/` directory contains the histor **If development is stagnating**, write **STOP** (as a single word on its own line) as the last line of your review output @{{REVIEW_RESULT_FILE}} instead of COMPLETE. -## Part 5: Output Requirements +## Part 6: Output Requirements - If issues found OR any AC is NOT MET (including deferred ACs), write your findings to @{{REVIEW_RESULT_FILE}} -- Include specific action items for Claude to address +- Include specific action items for Claude to address, classified into: + - Mainline Gaps + - Blocking Side Issues + - Queued Side Issues - **If development is stagnating** (see Part 4), write "STOP" as the last line - **CRITICAL**: Only write "COMPLETE" as the last line if ALL ACs from the original plan are FULLY MET with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any AC is deferred diff --git a/prompt-template/codex/goal-tracker-update-section.md b/prompt-template/codex/goal-tracker-update-section.md index 77cbedca..fb312db8 100644 --- a/prompt-template/codex/goal-tracker-update-section.md +++ b/prompt-template/codex/goal-tracker-update-section.md @@ -1,17 +1,18 @@ ## Goal Tracker Update Requests (YOUR RESPONSIBILITY) -**Important**: Claude cannot directly modify `goal-tracker.md` after Round 0. If Claude's summary contains a "Goal Tracker Update Request" section, YOU must: +Claude should normally keep the **mutable section** of `goal-tracker.md` up to date directly. If Claude's summary contains a "Goal Tracker Update Request" section, or if you detect tracker drift during review, YOU must: -1. **Evaluate the request**: Is the change justified? Does it serve the Ultimate Goal? -2. **If approved**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: +1. **Evaluate the tracker state**: Is the mutable section still aligned with the Ultimate Goal and current AC progress? +2. **If correction is needed**: Update @{{GOAL_TRACKER_FILE}} yourself with the requested changes: - Move tasks between Active/Completed/Deferred sections as appropriate - Add entries to "Plan Evolution Log" with round number and justification - - Add new issues to "Open Issues" if discovered + - Add new issues to "Blocking Side Issues" or "Queued Side Issues" as appropriate - **NEVER modify the IMMUTABLE SECTION** (Ultimate Goal and Acceptance Criteria) -3. **If rejected**: Include in your review why the request was rejected +3. **If you reject a requested tracker change**: Include in your review why it was rejected Common update requests you should handle: - Task completion: Move from "Active Tasks" to "Completed and Verified" -- New issues: Add to "Open Issues" table +- New blocking issues: Add to "Blocking Side Issues" +- New queued issues: Add to "Queued Side Issues" - Plan changes: Add to "Plan Evolution Log" with your assessment - Deferrals: Only allow with strong justification; add to "Explicitly Deferred" diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 6d0a8671..7db26ea2 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -44,11 +44,28 @@ Include a brief Goal Alignment Summary in your review: ACs: X/Y addressed | Forgotten items: N | Unjustified deferrals: N ``` -## Part 3: {{GOAL_TRACKER_UPDATE_SECTION}} +## Part 3: Required Finding Classification -## Part 4: Output Requirements +You MUST classify your findings into these lanes: +- **Mainline Gaps**: plan-derived work or AC progress that is missing, incomplete, or regressing +- **Blocking Side Issues**: bugs or implementation issues that block the current mainline objective from succeeding safely +- **Queued Side Issues**: valid non-blocking follow-up issues that should be documented but must NOT take over the next round + +Also include a one-line verdict: +``` +Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED +``` + +This verdict line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. + +If Claude mostly worked on queued side issues and failed to advance the mainline, say so explicitly. + +## Part 4: {{GOAL_TRACKER_UPDATE_SECTION}} + +## Part 5: Output Requirements - In short, your review comments can include: problems/findings/blockers; claims that don't match reality; implementation plans for deferred work (to be implemented now); implementation plans for unfinished work; goal alignment issues. +- Your output should be structured so Claude can tell which items are mainline gaps, blocking side issues, and queued side issues. - If after your investigation the actual situation does not match what Claude claims to have completed, or there is pending work to be done, output your review comments to @{{REVIEW_RESULT_FILE}}. - **CRITICAL**: Only output "COMPLETE" as the last line if ALL tasks from the original plan are FULLY completed with no deferrals - DEFERRED items are considered INCOMPLETE - do NOT output COMPLETE if any task is deferred diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 1613dd62..a64c18ba 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -33,6 +33,39 @@ humanize_split_to_array() { fi } +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +humanize_parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return + fi + + _count_table_data_rows() { + local row_count + row_count=$(sed -n "/$1/,/$2/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } + + local blocking_issues + local queued_issues + local open_issues + + blocking_issues=$(_count_table_data_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_data_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) + + # Legacy schema only had Open Issues; treat them as blocking for safety. + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_data_rows '### Open Issues' '^###') + blocking_issues="$open_issues" + fi + + echo "${blocking_issues}|${queued_issues}|${open_issues}" +} + # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary humanize_parse_goal_tracker() { @@ -105,9 +138,10 @@ humanize_parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_data_rows '### Explicitly Deferred' '^###') - # Count Open Issues - local open_issues - open_issues=$(_count_table_data_rows '### Open Issues' '^###') + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local -a issue_parts + humanize_split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$tracker_file")" + local open_issues="${issue_parts[2]}" # Extract Ultimate Goal summary (first content line after heading) local goal_summary @@ -364,8 +398,11 @@ _humanize_monitor_codex() { local review_started=$(grep -E "^review_started:" "$state_file" 2>/dev/null | sed 's/review_started: *//' | tr -d ' ') local agent_teams=$(grep -E "^agent_teams:" "$state_file" 2>/dev/null | sed 's/agent_teams: *//' | tr -d ' ') local push_every_round=$(grep -E "^push_every_round:" "$state_file" 2>/dev/null | sed 's/push_every_round: *//' | tr -d ' ') + local mainline_stall_count=$(grep -E "^mainline_stall_count:" "$state_file" 2>/dev/null | sed 's/mainline_stall_count: *//' | tr -d ' ') + local last_mainline_verdict=$(grep -E "^last_mainline_verdict:" "$state_file" 2>/dev/null | sed 's/last_mainline_verdict: *//' | tr -d ' ') + local drift_status=$(grep -E "^drift_status:" "$state_file" 2>/dev/null | sed 's/drift_status: *//' | tr -d ' ') - echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}" + echo "${current_round:-N/A}|${max_iterations:-N/A}|${full_review_round:-N/A}|${codex_model:-N/A}|${codex_effort:-N/A}|${started_at:-N/A}|${plan_file:-N/A}|${ask_codex_question:-false}|${review_started:-false}|${agent_teams:-}|${push_every_round:-}|${mainline_stall_count:-0}|${last_mainline_verdict:-unknown}|${drift_status:-normal}" } # Internal wrappers that call top-level functions @@ -405,6 +442,9 @@ _humanize_monitor_codex() { local review_started="${state_parts[8]:-false}" local agent_teams="${state_parts[9]:-}" local push_every_round="${state_parts[10]:-}" + local mainline_stall_count="${state_parts[11]:-0}" + local last_mainline_verdict="${state_parts[12]:-unknown}" + local drift_status="${state_parts[13]:-normal}" # Parse goal-tracker.md local -a goal_parts @@ -416,6 +456,10 @@ _humanize_monitor_codex() { local deferred_tasks="${goal_parts[4]}" local open_issues="${goal_parts[5]}" local goal_summary="${goal_parts[6]}" + local -a issue_parts + _split_to_array issue_parts "$(humanize_parse_goal_tracker_issue_counts "$goal_tracker_file")" + local blocking_issues="${issue_parts[0]}" + local queued_issues="${issue_parts[1]}" # Parse git status local -a git_parts @@ -548,18 +592,35 @@ _humanize_monitor_codex() { fi team_mode_segment=" | Team Mode: ${team_color}${team_display}${reset}" fi - printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${clr_eol}\n" + local drift_segment="" + local drift_color="${dim}" + if [[ "$drift_status" == "replan_required" ]]; then + drift_color="${red}" + elif [[ "${mainline_stall_count:-0}" -gt 0 ]]; then + drift_color="${yellow}" + fi + if [[ -n "$drift_status" ]]; then + drift_segment=" | Drift: ${drift_color}${drift_status}${reset} (${mainline_stall_count}, ${last_mainline_verdict})" + fi + printf "${magenta}Status:${reset} ${status_line} | Codex Ask Question: ${ask_q_color}${ask_q_display}${reset}${team_mode_segment}${drift_segment}${clr_eol}\n" # Progress line (color based on completion status) local ac_color="${green}" [[ "$completed_acs" -lt "$total_acs" ]] && ac_color="${yellow}" - local issue_color="${dim}" - [[ "$open_issues" -gt 0 ]] && issue_color="${red}" + local issue_total_color="${dim}" + [[ "$queued_issues" -gt 0 ]] && issue_total_color="${yellow}" + [[ "$blocking_issues" -gt 0 ]] && issue_total_color="${red}" # Use magenta for Progress and Git labels (status/data lines) printf "${magenta}Progress:${reset} ${ac_color}ACs: ${completed_acs}/${total_acs}${reset} Tasks: ${active_tasks} active, ${completed_tasks} done" [[ "$deferred_tasks" -gt 0 ]] && printf " ${yellow}${deferred_tasks} deferred${reset}" - [[ "$open_issues" -gt 0 ]] && printf " ${issue_color}Issues: ${open_issues}${reset}" + if [[ "$open_issues" -gt 0 ]]; then + printf " ${issue_total_color}Issues: ${open_issues}${reset}" + [[ "$blocking_issues" -gt 0 ]] && printf " (${red}%s blocking${reset}" "$blocking_issues" + [[ "$queued_issues" -gt 0 ]] && printf "%s${yellow}%s queued${reset}" \ + "$([[ "$blocking_issues" -gt 0 ]] && echo ", " || echo "(")" "$queued_issues" + printf ")" + fi printf "${clr_eol}\n" # Git status line (same color as Progress) diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..a6e894ef 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -384,6 +384,41 @@ get_pr_loop_phase_display() { # Goal Tracker Parsing # ======================================== +# Parse issue breakdown from goal-tracker.md +# Returns: blocking_issues|queued_issues|open_issues +# Usage: parse_goal_tracker_issue_counts "/path/to/goal-tracker.md" +parse_goal_tracker_issue_counts() { + local tracker_file="$1" + if [[ ! -f "$tracker_file" ]]; then + echo "0|0|0" + return + fi + + _count_table_rows() { + local start_pattern="$1" + local end_pattern="$2" + local row_count + row_count=$(sed -n "/${start_pattern}/,/${end_pattern}/p" "$tracker_file" | grep -cE '^\|' || true) + row_count=${row_count:-0} + echo $((row_count > 2 ? row_count - 2 : 0)) + } + + local blocking_issues + local queued_issues + local open_issues + + blocking_issues=$(_count_table_rows '### Blocking Side Issues' '^###') + queued_issues=$(_count_table_rows '### Queued Side Issues' '^###') + open_issues=$((blocking_issues + queued_issues)) + + if [[ "$open_issues" -eq 0 ]]; then + open_issues=$(_count_table_rows '### Open Issues' '^###') + blocking_issues="$open_issues" + fi + + echo "${blocking_issues}|${queued_issues}|${open_issues}" +} + # Parse goal-tracker.md and return summary values # Returns: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary # Usage: parse_goal_tracker "/path/to/goal-tracker.md" @@ -448,9 +483,19 @@ parse_goal_tracker() { local deferred_tasks deferred_tasks=$(_count_table_rows '### Explicitly Deferred' '^###') - # Count Open Issues + # Count Open Issues (new schema prefers Blocking/Queued Side Issues; old schema used Open Issues) + local issue_parts_raw local open_issues - open_issues=$(_count_table_rows '### Open Issues' '^###') + issue_parts_raw=$(parse_goal_tracker_issue_counts "$tracker_file") + if [[ -n "${ZSH_VERSION:-}" ]]; then + local -a issue_parts + issue_parts=("${(@s:|:)issue_parts_raw}") + open_issues="${issue_parts[3]}" + else + local -a issue_parts + IFS='|' read -r -a issue_parts <<< "$issue_parts_raw" + open_issues="${issue_parts[2]}" + fi # Extract Ultimate Goal summary local goal_summary diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 20c1f32a..c5c079d4 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -48,10 +48,45 @@ BASE_BRANCH="" FULL_REVIEW_ROUND="$DEFAULT_FULL_REVIEW_ROUND" SKIP_IMPL="false" SKIP_IMPL_NO_PLAN="false" +SKIP_IMPL_PLAN_ANCHORED="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="false" BITLESSON_ALLOW_EMPTY_NONE="true" +extract_plan_goal_content() { + local plan_path="$1" + local goal_section="" + + goal_section=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -20 | tail -n +2 | head -10) + if [[ -n "$goal_section" ]]; then + printf '%s\n' "$goal_section" + return + fi + + awk ' + /^[[:space:]]*#/ { next } + /^[[:space:]]*$/ { + if (started) { + exit + } + next + } + { + print + started=1 + lines++ + if (lines >= 5) { + exit + } + } + ' "$plan_path" +} + +extract_plan_ac_content() { + local plan_path="$1" + { sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$plan_path" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25 +} + show_help() { cat < "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + PLAN_GOAL_CONTENT=$(extract_plan_goal_content "$FULL_PLAN_PATH") + PLAN_AC_CONTENT=$(extract_plan_ac_content "$FULL_PLAN_PATH") + + if [[ -z "$PLAN_GOAL_CONTENT" ]]; then + PLAN_GOAL_CONTENT="Preserve the original plan scope from $PLAN_FILE while resolving code review findings on the current branch." + fi + + if [[ -z "$PLAN_AC_CONTENT" ]]; then + PLAN_AC_CONTENT=$(cat < "$GOAL_TRACKER_FILE" << EOF +# Goal Tracker (Skip Implementation Mode with Plan Anchor) + +This RLCR loop was started with \`--skip-impl\` flag. The implementation phase was skipped, +but an explicit plan was provided and remains the scope anchor for review-only work. + +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +$PLAN_GOAL_CONTENT + +### Acceptance Criteria + +$PLAN_AC_CONTENT + +--- + +## MUTABLE SECTION + +### Plan Version: Review-Only (Updated: Round 0) + +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized around explicit plan anchor | Loop started with \`--skip-impl\` and retained @$PLAN_FILE as scope anchor | Review stays aligned with original plan | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Preserve original plan alignment while resolving blocking review findings | Plan ACs in scope | pending | Review-only mode with explicit plan anchor | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| + +EOF + else + # Create review-only goal tracker for skip-impl mode without a plan (no placeholder text) + cat > "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' # Goal Tracker (Skip Implementation Mode) This RLCR loop was started with `--skip-impl` flag. The implementation phase was skipped, and the loop is running in code review mode only. -## Mode: Code Review Only +This tracker is still used to keep the review loop aligned around one mainline objective +and to separate blocking issues from queued follow-up work. + +## IMMUTABLE SECTION + +### Ultimate Goal + +Pass code review for the current branch without regressing existing behavior. + +### Acceptance Criteria + +- AC-1: All blocking `[P0-9]` code review findings are resolved. +- AC-2: Non-blocking follow-up items are explicitly queued and do not block completion. +- AC-3: Finalize phase can complete without introducing new review regressions. + +--- -The goal tracker is not used in skip-impl mode because: -- There is no implementation plan to track -- The loop focuses solely on code review quality -- No acceptance criteria tracking is needed +## MUTABLE SECTION -## What This Loop Does +### Plan Version: Review-Only (Updated: Round 0) -1. Runs `codex review` on changes between base branch and current branch -2. If issues are found, Claude fixes them iteratively -3. When no issues remain, enters finalize phase for code simplification +#### Plan Evolution Log +| Round | Change | Reason | Impact on AC | +|-------|--------|--------|--------------| +| 0 | Skip implementation mode initialized | Loop started with `--skip-impl` | Focus on review-only objective | + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Pass code review for current branch | AC-1 | pending | Review-only mode | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| + +### Completed and Verified +| AC | Task | Completed Round | Verified Round | Evidence | +|----|------|-----------------|----------------|----------| + +### Explicitly Deferred +| Task | Original AC | Deferred Since | Justification | When to Reconsider | +|------|-------------|----------------|---------------|-------------------| GOAL_TRACKER_EOF + fi else # Normal mode: create full goal tracker @@ -935,11 +1082,8 @@ GOAL_TRACKER_EOF # Extract goal from plan file (look for ## Goal, ## Objective, or first paragraph) # This is a heuristic - Claude will refine it in round 0 # Use ^## without leading whitespace - markdown headers should start at column 0 -GOAL_LINE=$(grep -i -m1 '^##[[:space:]]*\(goal\|objective\|purpose\)' "$FULL_PLAN_PATH" 2>/dev/null || echo "") -if [[ -n "$GOAL_LINE" ]]; then - # Get the content after the heading - # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) - GOAL_SECTION=$({ sed -n '/^##[[:space:]]*[Gg]oal\|^##[[:space:]]*[Oo]bjective\|^##[[:space:]]*[Pp]urpose/,/^##/p' "$FULL_PLAN_PATH" || true; } | head -20 | tail -n +2 | head -10) +GOAL_SECTION=$(extract_plan_goal_content "$FULL_PLAN_PATH") +if [[ -n "$GOAL_SECTION" ]]; then echo "$GOAL_SECTION" >> "$GOAL_TRACKER_FILE" else # Use first non-empty, non-heading paragraph as goal description @@ -959,7 +1103,7 @@ GOAL_TRACKER_EOF # Extract acceptance criteria from plan file (look for ## Acceptance, ## Criteria, ## Requirements) # Use ^## without leading whitespace - markdown headers should start at column 0 # Use || true after sed to ignore SIGPIPE when head closes the pipe early (pipefail mode) -AC_SECTION=$({ sed -n '/^##[[:space:]]*[Aa]cceptance\|^##[[:space:]]*[Cc]riteria\|^##[[:space:]]*[Rr]equirements/,/^##/p' "$FULL_PLAN_PATH" 2>/dev/null || true; } | head -30 | tail -n +2 | head -25) +AC_SECTION=$(extract_plan_ac_content "$FULL_PLAN_PATH") if [[ -n "$AC_SECTION" ]]; then echo "$AC_SECTION" >> "$GOAL_TRACKER_FILE" else @@ -982,10 +1126,20 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | 0 | Initial plan | - | - | #### Active Tasks - + | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| -| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | - | +| [To be populated by Claude based on plan] | - | pending | coding or analyze | claude or codex | mainline task only | + +### Blocking Side Issues + +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues + +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| ### Completed and Verified @@ -997,10 +1151,6 @@ cat >> "$GOAL_TRACKER_FILE" << 'GOAL_TRACKER_EOF' | Task | Original AC | Deferred Since | Justification | When to Reconsider | |------|-------------|----------------|---------------|-------------------| -### Open Issues - -| Issue | Discovered Round | Blocking AC | Resolution Path | -|-------|-----------------|-------------|-----------------| GOAL_TRACKER_EOF fi # End of skip-impl goal tracker handling @@ -1043,6 +1193,7 @@ SUMMARY_TMPL_EOF # ======================================== SUMMARY_PATH="$LOOP_DIR/round-0-summary.md" +ROUND_CONTRACT_PATH="$LOOP_DIR/round-0-contract.md" # Create the round-0 summary template with BitLesson Delta section if [[ "$SKIP_IMPL" != "true" ]]; then @@ -1050,6 +1201,28 @@ if [[ "$SKIP_IMPL" != "true" ]]; then fi if [[ "$SKIP_IMPL" == "true" ]]; then + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat > "$ROUND_CONTRACT_PATH" << EOF +# Round 0 Contract + +- Mainline Objective: Keep the current branch aligned with @$PLAN_FILE while resolving only review findings that block clean acceptance. +- Target ACs: The original plan acceptance criteria affected by the current branch changes. +- Blocking Side Issues In Scope: Any \`[P0-9]\` findings or regressions that block review acceptance or violate the original plan scope. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance or plan alignment. +- Success Criteria: Code review passes and the current branch still matches the original plan's intended scope. +EOF + else + cat > "$ROUND_CONTRACT_PATH" << 'ROUND_CONTRACT_EOF' +# Round 0 Contract + +- Mainline Objective: Run code review for the current branch and resolve only findings that block clean acceptance. +- Target ACs: AC-1, AC-2 +- Blocking Side Issues In Scope: Any `[P0-9]` findings from the active review cycle. +- Queued Side Issues Out of Scope: Non-blocking cleanup, follow-up refactors, or future improvements that do not block review acceptance. +- Success Criteria: Code review passes with no blocking findings, and any remaining non-blocking follow-up is explicitly queued. +ROUND_CONTRACT_EOF + fi + # Skip-impl mode: create a prompt for code review only cat > "$LOOP_DIR/round-0-prompt.md" << EOF # Skip Implementation Mode - Code Review Loop @@ -1066,6 +1239,11 @@ The loop will automatically run \`codex review\` on your changes when you try to If issues are found (marked with [P0-9] priority), you'll need to fix them before the loop ends. Do not try to execute anything to trigger the review - just stop and it will run automatically. +Before requesting review, read: +- @$PLAN_FILE +- @$GOAL_TRACKER_FILE +- @$ROUND_CONTRACT_PATH + ## Your Task 1. Review your current work @@ -1074,10 +1252,32 @@ Do not try to execute anything to trigger the review - just stop and it will run 4. Repeat until no issues remain 5. Enter finalize phase for code simplification -## Note +## Review Objective -Since this is skip-impl mode, there is no implementation plan to follow. -The goal tracker is not used - focus on fixing code review issues. +Use the round contract as the current anchor: +- Keep one stable mainline objective and do not let it drift +- Treat review findings as \`[blocking]\` only if they block review acceptance +- Record non-blocking follow-up as \`[queued]\` +- Do not let queued work take over the round + +EOF + if [[ "$SKIP_IMPL_PLAN_ANCHORED" == "true" ]]; then + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF +- Keep review-only work aligned with the original plan at @$PLAN_FILE + +Implementation phase is skipped, but the original plan still defines the intended branch scope. + +EOF + else + cat >> "$LOOP_DIR/round-0-prompt.md" << 'EOF' +There is no explicit implementation plan for this loop, so the review-only contract is the primary anchor. + +EOF + fi + + cat >> "$LOOP_DIR/round-0-prompt.md" << EOF + +Keep @$ROUND_CONTRACT_PATH updated if the blocking/queued split changes materially during review iterations. When you're ready for review, write a brief summary of your changes and try to exit (do not try to execute anything, just stop). @@ -1098,8 +1298,21 @@ Before starting implementation, you MUST initialize the Goal Tracker: 1. Read @$GOAL_TRACKER_FILE 2. If the "Ultimate Goal" section says "[To be extracted...]", extract a clear goal statement from the plan 3. If the "Acceptance Criteria" section says "[To be defined...]", define 3-7 specific, testable criteria -4. Populate the "Active Tasks" table with tasks from the plan, mapping each to an AC and filling Tag/Owner -5. Write the updated goal-tracker.md +4. Populate the "Active Tasks" table with MAINLINE tasks from the plan, mapping each to an AC and filling Tag/Owner +5. Record any already-known side issues in either "Blocking Side Issues" or "Queued Side Issues" +6. Write the updated goal-tracker.md + +## Round Contract Setup (REQUIRED BEFORE CODING) + +Before starting implementation, create @$ROUND_CONTRACT_PATH with: + +1. **One mainline objective** for this round +2. **Target ACs** (1-2 ACs only) +3. **Blocking side issues in scope** for this round +4. **Queued side issues out of scope** for this round +5. **Round success criteria** + +Use this contract to keep the round focused. Do NOT let non-blocking bugs or cleanup work replace the mainline objective. **IMPORTANT**: The IMMUTABLE SECTION can only be modified in Round 0. After this round, it becomes read-only. @@ -1107,8 +1320,18 @@ Before starting implementation, you MUST initialize the Goal Tracker: ## Implementation Plan -For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList) to track each item in order of importance. -You are strictly prohibited from only addressing the most important issues - you MUST create Tasks for ALL discovered issues and attempt to resolve each one. +For all tasks that need to be completed, please use the Task system (TaskCreate, TaskUpdate, TaskList). + +Every task MUST start with exactly one lane tag: +- \`[mainline]\` for plan-derived work that directly advances the round objective +- \`[blocking]\` for issues that prevent the mainline objective from succeeding safely +- \`[queued]\` for non-blocking bugs, cleanup, or follow-up work + +Rules: +- \`[mainline]\` tasks are the primary success condition for the round +- \`[blocking]\` tasks may be resolved in the round only if they truly block mainline progress +- \`[queued]\` tasks must NOT become the round objective and do NOT need to be cleared before moving on +- If a new issue is not blocking the current objective, tag it \`[queued]\` and keep moving on the mainline ## Task Tag Routing (MUST FOLLOW) @@ -1177,18 +1400,24 @@ cat >> "$LOOP_DIR/round-0-prompt.md" << EOF Throughout your work, you MUST maintain the Goal Tracker: -1. **Before starting a task**: Mark it as "in_progress" in Active Tasks +1. **Before starting a round**: Re-anchor on the original plan and current round contract +2. **Before starting a task**: Mark the relevant mainline task as "in_progress" in Active Tasks - Confirm Tag/Owner routing is correct before execution -2. **After completing a task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") -3. **If you discover the plan has errors**: +3. **Active Tasks** are MAINLINE tasks only - side issues do not belong there +4. **Blocking Side Issues** are reserved for issues that truly stop mainline progress +5. **Queued Side Issues** are non-blocking and must not take over the round +6. **After completing a mainline task**: Move it to "Completed and Verified" with evidence (but mark as "pending verification") +7. **If you discover the plan has errors**: - Do NOT silently change direction - Add entry to "Plan Evolution Log" with justification - Explain how the change still serves the Ultimate Goal -4. **If you need to defer a task**: +8. **If you need to defer a task**: - Move it to "Explicitly Deferred" section - Provide strong justification - Explain impact on Acceptance Criteria -5. **If you discover new issues**: Add to "Open Issues" table +9. **If you discover new issues**: + - Add to "Blocking Side Issues" only if mainline progress is blocked + - Otherwise add to "Queued Side Issues" or keep them as \`[queued]\` tasks/backlog --- @@ -1197,8 +1426,9 @@ Note: You MUST NOT try to exit \`start-rlcr-loop\` loop by lying or edit loop st After completing the work, please: 0. If you have access to the \`code-simplifier\` agent, use it to review and optimize the code you just wrote 1. Finalize @$GOAL_TRACKER_FILE (this is Round 0, so you are initializing it - see "Goal Tracker Setup" above) -2. Commit your changes with a descriptive commit message -3. Write your work summary into @$SUMMARY_PATH +2. Write your round contract into @$ROUND_CONTRACT_PATH +3. Commit your changes with a descriptive commit message +4. Write your work summary into @$SUMMARY_PATH EOF # Add push instruction only if push_every_round is true diff --git a/tests/robustness/test-goal-tracker-robustness.sh b/tests/robustness/test-goal-tracker-robustness.sh index fe4c025b..88eda6fd 100755 --- a/tests/robustness/test-goal-tracker-robustness.sh +++ b/tests/robustness/test-goal-tracker-robustness.sh @@ -50,6 +50,16 @@ parse_result() { esac } +parse_issue_result() { + local result="$1" + local field="$2" + case "$field" in + blocking_issues) echo "$result" | cut -d'|' -f1 ;; + queued_issues) echo "$result" | cut -d'|' -f2 ;; + open_issues) echo "$result" | cut -d'|' -f3 ;; + esac +} + # ======================================== # Positive Tests - Valid Goal Tracker # ======================================== @@ -438,6 +448,55 @@ else fail "Deferred tasks count" "2" "$DEFERRED_TASKS" fi +# Test 15b: Distinguish blocking vs queued issues in new schema +echo "" +echo "Test 15b: Distinguish blocking vs queued issues" +cat > "$TEST_DIR/goal-tracker-issue-breakdown.md" << 'EOF' +# Goal Tracker + +### Acceptance Criteria + +- AC-1: Test + +--- + +### Blocking Side Issues + +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| Failing review item | 2 | AC-1 | Fix immediately | + +### Queued Side Issues + +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +| Cleanup follow-up | 2 | Cosmetic only | Next refactor | +| Extra test hardening | 3 | Current AC already met | Regression appears | +EOF + +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issue-breakdown.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "1" ]] && [[ "$QUEUED_ISSUES" == "2" ]] && [[ "$OPEN_ISSUES" == "3" ]]; then + pass "Separates blocking and queued issues in new schema" +else + fail "Issue breakdown" "1 blocking, 2 queued, 3 total" "$ISSUE_RESULT" +fi + +# Test 15c: Legacy open issues fallback maps to blocking count +echo "" +echo "Test 15c: Legacy open issues fallback maps to blocking count" +ISSUE_RESULT=$(humanize_parse_goal_tracker_issue_counts "$TEST_DIR/goal-tracker-issues.md") +BLOCKING_ISSUES=$(parse_issue_result "$ISSUE_RESULT" blocking_issues) +QUEUED_ISSUES=$(parse_issue_result "$ISSUE_RESULT" queued_issues) +OPEN_ISSUES=$(parse_issue_result "$ISSUE_RESULT" open_issues) +if [[ "$BLOCKING_ISSUES" == "2" ]] && [[ "$QUEUED_ISSUES" == "0" ]] && [[ "$OPEN_ISSUES" == "2" ]]; then + pass "Legacy open issues fallback treated as blocking" +else + fail "Legacy issue fallback" "2 blocking, 0 queued, 2 total" "$ISSUE_RESULT" +fi + # Test 16: File with only headers (no content) echo "" echo "Test 16: File with only section headers" diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 8f302bb4..5e8413a6 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -317,8 +317,10 @@ echo "" # Test 12: Bash validator blocks state.md modification attempts echo "Test 12: Bash validator blocks state.md modification" # Create RLCR state for the test -mkdir -p "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" -cat > "$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00/state.md" << 'EOF' +HOOK_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_12-00-00" +OLD_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2026-01-19_11-00-00" +mkdir -p "$HOOK_LOOP_DIR" +cat > "$HOOK_LOOP_DIR/state.md" << 'EOF' --- current_round: 1 max_iterations: 42 @@ -333,6 +335,54 @@ review_started: false plan_tracked: false --- EOF +cat > "$HOOK_LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | pending | - | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +mkdir -p "$OLD_LOOP_DIR" +cat > "$OLD_LOOP_DIR/goal-tracker.md" << 'EOF' +# Old Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Old session tracker. + +### Acceptance Criteria +- AC-1: Old session only. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 0) +EOF # Try to modify state.md - this SHOULD be blocked JSON='{"tool_name":"Bash","tool_input":{"command":"echo hacked >> '"$TEST_DIR"'/.humanize/rlcr/2026-01-19_12-00-00/state.md"}}' set +e @@ -366,9 +416,143 @@ else fail "Goal-tracker.md modification" "exit 2 (blocked)" "exit $EXIT_CODE, result: $RESULT" fi -# Test 12c: Unrelated dangerous commands are allowed through (sandbox handles security) +# Test 12c: Write validator allows mutable goal-tracker updates after round 0 +echo "" +echo "Test 12c: Write validator allows mutable goal-tracker updates after round 0" +cat > "$TEST_DIR/goal-tracker-updated.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Keep mainline aligned. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) + +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored | + +### Blocking Side Issues +| Issue | Discovered Round | Blocking AC | Resolution Path | +|-------|-----------------|-------------|-----------------| +| failing test for AC-1 | 1 | AC-1 | fix before exit | + +### Queued Side Issues +| Issue | Discovered Round | Why Not Blocking | Revisit Trigger | +|-------|-----------------|------------------|-----------------| +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-updated.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable write" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12d: Write validator blocks immutable goal-tracker changes after round 0 +echo "" +echo "Test 12d: Write validator blocks immutable goal-tracker changes after round 0" +cat > "$TEST_DIR/goal-tracker-bad.md" << 'EOF' +# Goal Tracker + +## IMMUTABLE SECTION + +### Ultimate Goal +Change the goal entirely. + +### Acceptance Criteria +- AC-1: Mainline progress is visible every round. + +--- + +## MUTABLE SECTION + +### Plan Version: 1 (Updated: Round 1) +EOF +UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-bad.md") +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Write blocks immutable goal-tracker changes after round 0" +else + fail "Goal-tracker immutable write" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12e: Edit validator allows mutable goal-tracker edits after round 0 +echo "" +echo "Test 12e: Edit validator allows mutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":"| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored |"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker updates after round 0" +else + fail "Goal-tracker mutable edit" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12ea: Edit validator allows mutable deletions after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":""}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows mutable goal-tracker deletions after round 0" +else + fail "Goal-tracker mutable delete" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 +echo "" +echo "Test 12f: Edit validator blocks immutable goal-tracker edits after round 0" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"Keep mainline aligned.","new_string":"Change the goal entirely."}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Edit blocks immutable goal-tracker updates after round 0" +else + fail "Goal-tracker immutable edit" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12g: Read validator blocks old-session goal tracker +echo "" +echo "Test 12g: Read validator blocks old-session goal tracker" +JSON='{"tool_name":"Read","tool_input":{"file_path":"'"$OLD_LOOP_DIR"'/goal-tracker.md"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]]; then + pass "Read blocks old-session goal-tracker.md" +else + fail "Goal-tracker old-session read" "exit 2" "exit $EXIT_CODE, result: $RESULT" +fi + +# Test 12h: Unrelated dangerous commands are allowed through (sandbox handles security) echo "" -echo "Test 12c: Unrelated dangerous commands allowed through (sandbox responsibility)" +echo "Test 12h: Unrelated dangerous commands allowed through (sandbox responsibility)" JSON='{"tool_name":"Bash","tool_input":{"command":"cat /tmp/test; rm -rf /"}}' set +e RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index 13fe21bc..b4ee9299 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -1046,6 +1046,32 @@ else fail "--skip-impl goal-tracker" "goal-tracker.md exists" "not found" fi +# Test 44b: --skip-impl creates round-0-contract.md +echo "" +echo "Test 44b: --skip-impl creates round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-contract.md" ]]; then + if grep -qi "Mainline Objective" "$LOOP_DIR/round-0-contract.md"; then + pass "--skip-impl creates round-0-contract.md with mainline objective" + else + fail "--skip-impl round contract content" "Mainline Objective text" "$(cat "$LOOP_DIR/round-0-contract.md")" + fi +else + fail "--skip-impl round contract" "round-0-contract.md exists" "not found" +fi + +# Test 44c: --skip-impl prompt references the round contract +echo "" +echo "Test 44c: --skip-impl prompt references round-0-contract.md" +if [[ -n "$LOOP_DIR" ]] && [[ -f "$LOOP_DIR/round-0-prompt.md" ]]; then + if grep -q "round-0-contract.md" "$LOOP_DIR/round-0-prompt.md"; then + pass "--skip-impl prompt references round-0-contract.md" + else + fail "--skip-impl prompt contract reference" "prompt mentions round-0-contract.md" "$(cat "$LOOP_DIR/round-0-prompt.md")" + fi +else + fail "--skip-impl prompt contract reference" "round-0-prompt.md exists" "not found" +fi + # Test 45: --skip-impl with plan file still works echo "" echo "Test 45: --skip-impl with plan file still works" @@ -1074,6 +1100,44 @@ else fi fi +LOOP_DIR_45=$(find "$TEST_DIR/repo45/.humanize/rlcr" -maxdepth 1 -type d -name "20*" 2>/dev/null | head -1) + +echo "" +echo "Test 45b: --skip-impl with plan file preserves plan goal in goal-tracker" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/goal-tracker.md" ]]; then + if grep -q "Test the setup script robustness" "$LOOP_DIR_45/goal-tracker.md"; then + pass "--skip-impl with plan preserves plan goal anchor" + else + fail "--skip-impl plan goal anchor" "goal-tracker contains plan goal" "$(cat "$LOOP_DIR_45/goal-tracker.md")" + fi +else + fail "--skip-impl plan goal anchor" "goal-tracker.md exists" "not found" +fi + +echo "" +echo "Test 45c: --skip-impl with plan file prompt references original plan" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-prompt.md" ]]; then + if grep -q "@plan.md" "$LOOP_DIR_45/round-0-prompt.md"; then + pass "--skip-impl with plan prompt references original plan" + else + fail "--skip-impl plan prompt anchor" "round-0-prompt references @plan.md" "$(cat "$LOOP_DIR_45/round-0-prompt.md")" + fi +else + fail "--skip-impl plan prompt anchor" "round-0-prompt.md exists" "not found" +fi + +echo "" +echo "Test 45d: --skip-impl with plan file contract references original plan alignment" +if [[ -n "$LOOP_DIR_45" ]] && [[ -f "$LOOP_DIR_45/round-0-contract.md" ]]; then + if grep -qi "aligned with @plan.md" "$LOOP_DIR_45/round-0-contract.md"; then + pass "--skip-impl with plan contract references original plan" + else + fail "--skip-impl plan contract anchor" "round-0-contract references @plan.md" "$(cat "$LOOP_DIR_45/round-0-contract.md")" + fi +else + fail "--skip-impl plan contract anchor" "round-0-contract.md exists" "not found" +fi + # ======================================== # Dependency Check Tests # ======================================== diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index ae6d0e7f..83f91824 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -473,6 +473,55 @@ else fail "Parses state with min full_review_round" "return 0" "returned non-zero" fi +# Test 22: State file with drift-tracking fields +echo "" +echo "Test 22: State file with drift-tracking fields" +cat > "$TEST_DIR/state-drift-fields.md" << 'EOF' +--- +current_round: 4 +max_iterations: 12 +review_started: false +base_branch: main +mainline_stall_count: 2 +last_mainline_verdict: stalled +drift_status: replan_required +--- +EOF + +if parse_state_file "$TEST_DIR/state-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Parses drift-tracking fields correctly" + else + fail "Parses drift-tracking fields" "stall=2 verdict=stalled drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state with drift-tracking fields" "return 0" "returned non-zero" +fi + +# Test 23: Missing drift-tracking fields use safe defaults +echo "" +echo "Test 23: Missing drift-tracking fields use safe defaults" +cat > "$TEST_DIR/state-no-drift-fields.md" << 'EOF' +--- +current_round: 1 +max_iterations: 8 +review_started: false +base_branch: main +--- +EOF + +if parse_state_file "$TEST_DIR/state-no-drift-fields.md"; then + if [[ "$STATE_MAINLINE_STALL_COUNT" == "0" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "unknown" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Uses safe defaults for drift-tracking fields" + else + fail "Default drift-tracking fields" "stall=0 verdict=unknown drift=normal" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" + fi +else + fail "Parses state without drift-tracking fields" "return 0" "returned non-zero" +fi + # ======================================== # Summary # ======================================== diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 1c685109..de52bb6c 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -455,6 +455,9 @@ ask_codex_question: false full_review_round: 5 session_id: agent_teams: $agent_teams +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- STATE_EOF @@ -483,6 +486,16 @@ GT_EOF Implemented features as requested. SUM_EOF + cat > "$LOOP_DIR/round-${round}-contract.md" << CONTRACT_EOF +# Round $round Contract + +- Mainline Objective: Continue the requested implementation round +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: advance the mainline objective without drift +CONTRACT_EOF + # Set up isolated cache directory export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" @@ -536,6 +549,8 @@ MOCK_EOF setup_stophook_test 3 "true" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling @@ -566,6 +581,46 @@ else fail "impl phase with agent_teams=true: next-round prompt contains agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" fi +# ======================================== +# Test: Drift recovery prompt still preserves agent-teams continuation +# ======================================== + +setup_stophook_test 3 "true" "false" +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +setup_mock_codex_impl_feedback "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still has no stable implementation +- Blocking side issue: the team is repeating the same non-advancing fix pattern + +Recover the mainline before trying again. + +CONTINUE" + +HOOK_INPUT='{"stop_hook_active": false, "transcript": [], "session_id": ""}' +set +e +RESULT=$(echo "$HOOK_INPUT" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$STOP_HOOK" 2>/dev/null) +HOOK_EXIT=$? +set -e + +NEXT_PROMPT="$LOOP_DIR/round-4-prompt.md" +if [[ -f "$NEXT_PROMPT" ]]; then + if grep -q "Drift Recovery Mode" "$NEXT_PROMPT"; then + pass "drift recovery prompt generated for stalled mainline" + else + fail "drift recovery prompt generated for stalled mainline" "Drift Recovery Mode" "not found" + fi + if grep -qi "Agent Teams" "$NEXT_PROMPT"; then + pass "drift recovery prompt keeps agent-teams continuation" + else + fail "drift recovery prompt keeps agent-teams continuation" "agent-teams text in prompt" "not found" + fi +else + fail "drift recovery prompt keeps agent-teams continuation" "round-4-prompt.md exists" "not found (hook exit=$HOOK_EXIT)" +fi + # ======================================== # Test: Implementation phase with agent_teams=false has no continuation # ======================================== @@ -573,6 +628,8 @@ fi setup_stophook_test 3 "false" "false" setup_mock_codex_impl_feedback "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues found: - Issue 1: Missing error handling diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index 6c604965..6c80022f 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -4,9 +4,9 @@ # # Tests: # - is_allowlisted_file() function in loop-common.sh -# - Read validator allowlist for todos and summaries -# - Write validator allowlist for todos and summaries -# - Edit validator allowlist for todos and summaries +# - Read validator allowlist for todos, summaries, and contracts +# - Write validator allowlist for todos, summaries, and contracts +# - Edit validator allowlist for todos, summaries, and contracts # - Bash validator allowlist for todos files (path-restricted) # @@ -117,6 +117,14 @@ else fail "round-2-summary.md blocked" "false" "true" fi +# Test 6b: Non-allowlisted file - round-0-contract.md +echo "Test 6b: round-0-contract.md is NOT allowlisted" +if ! is_allowlisted_file "$ACTIVE_LOOP_DIR/round-0-contract.md" "$ACTIVE_LOOP_DIR"; then + pass "round-0-contract.md is NOT allowlisted" +else + fail "round-0-contract.md blocked" "false" "true" +fi + # Test 7: Wrong directory - allowlisted filename but wrong path echo "Test 7: round-1-todos.md in wrong directory is NOT allowlisted" if ! is_allowlisted_file "/other/path/round-1-todos.md" "$ACTIVE_LOOP_DIR"; then @@ -158,6 +166,19 @@ else fail "Write validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 9b: Write validator allows current round contract +echo "Test 9b: Write validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write validator allows round-5-contract.md" +else + fail "Write validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 10: Write validator blocks round-3-todos.md (not in allowlist) echo "Test 10: Write validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -184,6 +205,19 @@ else fail "Write validator round-2-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 11b: Write validator blocks stale round contract +echo "Test 11b: Write validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Write validator blocks round-3-contract.md" +else + fail "Write validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Edit Validator Allowlist ===" echo "" @@ -214,6 +248,32 @@ else fail "Edit validator round-1-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 13b: Edit validator allows current round contract +echo "Test 13b: Edit validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit validator allows round-5-contract.md" +else + fail "Edit validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 13c: Edit validator blocks stale round contract +echo "Test 13c: Edit validator blocks round-0-contract.md" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-0-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Edit validator blocks round-0-contract.md" +else + fail "Edit validator round-0-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 14: Edit validator blocks round-4-todos.md echo "Test 14: Edit validator blocks round-4-todos.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-4-todos.md"}}' @@ -257,6 +317,19 @@ else fail "Read validator round-0-summary.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 16b: Read validator allows current round contract +echo "Test 16b: Read validator allows round-5-contract.md (current round)" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Read validator allows round-5-contract.md" +else + fail "Read validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 17: Read validator blocks round-3-todos.md echo "Test 17: Read validator blocks round-3-todos.md" HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-todos.md"}}' @@ -283,6 +356,19 @@ else fail "Read validator round-3-summary.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" fi +# Test 18b: Read validator blocks stale round contract +echo "Test 18b: Read validator blocks round-3-contract.md" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-3-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "round"; then + pass "Read validator blocks round-3-contract.md" +else + fail "Read validator round-3-contract.md" "exit 2 with round error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "" echo "=== Test: Bash Validator Allowlist (Path-Restricted) ===" echo "" @@ -313,6 +399,19 @@ else fail "Bash validator round-2-todos.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +# Test 20b: Bash validator blocks round-5-contract.md +echo "Test 20b: Bash validator blocks round-5-contract.md" +HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Bash validator blocks round-5-contract.md" +else + fail "Bash validator round-5-contract.md" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 21: Bash validator blocks round-1-todos.md in wrong directory echo "Test 21: Bash validator blocks round-1-todos.md in wrong directory" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > /tmp/round-1-todos.md"}}' diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 96890a41..483ae665 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -200,6 +200,9 @@ plan_tracked: false start_branch: $current_branch base_branch: main review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal started_at: 2024-01-01T12:00:00Z --- EOF @@ -223,6 +226,16 @@ Test finalize phase | Task | Target AC | Status | |------|-----------|--------| | Test | AC-1 | completed | +EOF + + cat > "$LOOP_DIR/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Verify finalize phase coverage +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are complete EOF } @@ -366,6 +379,18 @@ else fail "Write validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5aa: Write validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Write", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Write validator blocks finalize-phase round contract" +else + fail "Write validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5b: Edit validator blocks finalize-state.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/finalize-state.md"}}' set +e @@ -378,6 +403,18 @@ else fail "Edit validator finalize-state.md" "exit 2 with finalize error" "exit $EXIT_CODE, output: $RESULT" fi +echo "T-NEG-5bb: Edit validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Edit validator blocks finalize-phase round contract" +else + fail "Edit validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "T-NEG-5c: Bash validator blocks finalize-state.md modification" HOOK_INPUT='{"tool_name": "Bash", "tool_input": {"command": "echo test > '$LOOP_DIR'/finalize-state.md"}}' set +e @@ -513,6 +550,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -571,6 +610,8 @@ setup_test_repo setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 setup_mock_codex_review_failure "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" 1 # Create summary for current round @@ -630,6 +671,8 @@ setup_test_repo setup_loop_dir 4 10 # current_round: 4, max_iterations: 10 setup_mock_codex_review_empty_stdout "All requirements met. +Mainline Progress Verdict: ADVANCED + COMPLETE" # Create summary for current round @@ -752,6 +795,8 @@ setup_loop_dir 3 10 # current_round: 3, max_iterations: 10 # Create a mock Codex that outputs review feedback (not COMPLETE) setup_mock_codex "## Review Feedback +Mainline Progress Verdict: ADVANCED + Some issues need to be addressed: - Issue 1: Fix the bug in function X - Issue 2: Add tests for edge case Y @@ -813,6 +858,158 @@ else fail "Review feedback in output" "output contains 'Issue 1' from Codex review" "output does not contain expected feedback" fi +echo "" +echo "=== T-POS-6 / T-NEG-10: Mainline Drift State Machine ===" +echo "" + +# T-POS-6: Two consecutive stalled rounds trigger drift recovery prompt +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: STALLED + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please recover the mainline before trying again. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but AC-1 is still not advancing. +EOF + +TRANSCRIPT_FILE="$TEST_DIR/transcript.jsonl" +cat > "$TRANSCRIPT_FILE" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[mainline] Recover AC-1", "status": "completed", "activeForm": "Recovering AC-1"}]}}]}} +EOF + +echo "T-POS-6: Two stalled rounds trigger drift recovery prompt" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && [[ -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Drift recovery round blocks exit and creates next prompt" +else + fail "Drift recovery prompt creation" "block with round-4 prompt" "exit $EXIT_CODE, output: $RESULT" +fi + +if grep -q "Drift Recovery Mode" "$LOOP_DIR/round-4-prompt.md"; then + pass "Drift recovery prompt uses special replan template" +else + fail "Drift recovery prompt template" "Drift Recovery Mode in prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "4" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "2" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "State records drift recovery requirement after second stalled round" +else + fail "Drift recovery state update" "round=4 stall=2 verdict=stalled drift=replan_required" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10a: Missing Mainline Progress Verdict blocks exit and preserves state +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 1/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +- Mainline gap: AC-1 still lacks a passing implementation path +- Blocking side issue: current approach keeps looping on the same failing path + +Please restate the mainline more clearly. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +Tried another implementation pass, but the review omitted the verdict line. +EOF + +echo "T-NEG-10a: Missing Mainline Progress Verdict blocks exit" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if echo "$RESULT" | grep -q '"decision".*block' && echo "$RESULT" | grep -qi "verdict"; then + pass "Missing Mainline Progress Verdict blocks exit" +else + fail "Missing Mainline Progress Verdict" "block with verdict error" "exit $EXIT_CODE, output: $RESULT" +fi + +if [[ ! -f "$LOOP_DIR/round-4-prompt.md" ]]; then + pass "Missing verdict does not generate next-round prompt" +else + fail "Missing verdict prompt generation" "no round-4 prompt" "$(cat "$LOOP_DIR/round-4-prompt.md")" +fi + +parse_state_file "$LOOP_DIR/state.md" +if [[ "$STATE_CURRENT_ROUND" == "3" ]] && [[ "$STATE_MAINLINE_STALL_COUNT" == "1" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "stalled" ]] && [[ "$STATE_DRIFT_STATUS" == "normal" ]]; then + pass "Missing verdict preserves prior drift state" +else + fail "Missing verdict state preservation" "round=3 stall=1 verdict=stalled drift=normal" \ + "round=$STATE_CURRENT_ROUND stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + +# T-NEG-10: Third consecutive stalled/regressed round stops the loop +rm -rf "$TEST_DIR/.humanize" +setup_test_repo +setup_loop_dir 3 10 +perl -0pi -e 's/mainline_stall_count: 0/mainline_stall_count: 2/' "$LOOP_DIR/state.md" +perl -0pi -e 's/last_mainline_verdict: unknown/last_mainline_verdict: stalled/' "$LOOP_DIR/state.md" +perl -0pi -e 's/drift_status: normal/drift_status: replan_required/' "$LOOP_DIR/state.md" + +setup_mock_codex "## Review Feedback + +Mainline Progress Verdict: REGRESSED + +- Mainline gap: this round moved farther from AC-1 +- Blocking side issue: recent fixes keep undoing the prior mainline path + +Stop and replan. + +CONTINUE" + +cat > "$LOOP_DIR/round-3-summary.md" << 'EOF' +# Round 3 Summary +The latest attempt regressed the mainline objective again. +EOF + +echo "T-NEG-10: Third stalled/regressed round triggers circuit breaker" +HOOK_INPUT='{"stop_hook_active": false, "transcript_path": "'$TRANSCRIPT_FILE'"}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e + +if [[ -f "$LOOP_DIR/stop-state.md" ]] && echo "$RESULT" | grep -qi "drift"; then + pass "Third stalled/regressed round stops the loop with drift message" +else + fail "Drift circuit breaker" "stop-state.md and drift message" "exit $EXIT_CODE, files: $(ls "$LOOP_DIR"/*state*.md 2>/dev/null || echo 'none'), output: $RESULT" +fi + +parse_state_file "$LOOP_DIR/stop-state.md" +if [[ "$STATE_MAINLINE_STALL_COUNT" == "3" ]] && [[ "$STATE_LAST_MAINLINE_VERDICT" == "regressed" ]] && [[ "$STATE_DRIFT_STATUS" == "replan_required" ]]; then + pass "Stopped loop preserves final drift state" +else + fail "Preserved drift state on stop" "stall=3 verdict=regressed drift=replan_required" \ + "stall=$STATE_MAINLINE_STALL_COUNT verdict=$STATE_LAST_MAINLINE_VERDICT drift=$STATE_DRIFT_STATUS" +fi + echo "" echo "=== Validator Finalize Phase State Parsing Tests ===" echo "" @@ -850,6 +1047,18 @@ else fail "Read validator finalize-state.md parsing" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi +echo "Test: Read validator blocks round contract during Finalize Phase" +HOOK_INPUT='{"tool_name": "Read", "tool_input": {"file_path": "'$LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "contract"; then + pass "Read validator blocks finalize-phase round contract" +else + fail "Read validator finalize-phase contract" "exit 2 with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + echo "Test: Plan-file validator parses finalize-state.md correctly" # The plan-file validator should not error when only finalize-state.md exists HOOK_INPUT='{"prompt": "test prompt"}' diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index d2e8af6f..c345d944 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -60,6 +60,21 @@ setup_mock_codex # Default branch name (set after first git init) DEFAULT_BRANCH="" +create_round_contract() { + local loop_dir="$1" + local round="$2" + + cat > "$loop_dir/round-${round}-contract.md" << EOF +# Round $round Contract + +- Mainline Objective: Keep plan-file integrity checks aligned +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: current round artifacts are present and coherent +EOF +} + setup_test_loop() { cd "$TEST_DIR" @@ -80,6 +95,7 @@ setup_test_loop() { # Create loop directory structure LOOP_DIR="$TEST_DIR/.humanize/rlcr/2024-01-01_12-00-00" + rm -rf "$LOOP_DIR" mkdir -p "$LOOP_DIR" # Create plan file (gitignored) @@ -91,7 +107,12 @@ Test the RLCR loop ## Requirements - Requirement 1 EOF - echo "plans/" >> .gitignore + cat >> .gitignore << 'EOF' +plans/ +.humanize* +.cache/ +bin/ +EOF git add .gitignore git -c commit.gpgsign=false commit -q -m "Add gitignore" @@ -111,6 +132,8 @@ base_branch: $CURRENT_BRANCH review_started: false --- EOF + + create_round_contract "$LOOP_DIR" 0 } echo "=== Test: UserPromptSubmit Hook ===" @@ -466,6 +489,38 @@ else fail "Stop hook YAML parsing" "no YAML parse errors" "output: $RESULT" fi +# Test 8.8b: Stop hook blocks when round contract is missing +echo "Test 8.8b: Stop hook blocks when round contract is missing" +setup_test_loop +rm -f "$LOOP_DIR/round-0-contract.md" +cat > "$LOOP_DIR/round-0-summary.md" << 'EOF' +# Summary +Work done. +EOF +cat > "$LOOP_DIR/goal-tracker.md" << 'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test goal +### Acceptance Criteria +- Criterion 1 +## MUTABLE SECTION +### Plan Version: 1 (Updated: Round 0) +#### Active Tasks +| Task | Target AC | Status | Notes | +|------|-----------|--------|-------| +| Task 1 | AC1 | done | - | +EOF +set +e +RESULT=$(echo '{}' | "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +EXIT_CODE=$? +set -e +if echo "$RESULT" | grep -q '"decision"' && echo "$RESULT" | grep -qi "contract"; then + pass "Stop hook blocks when round contract is missing" +else + fail "Stop hook missing round contract" "block with contract error" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 8.9: Hook handles plan_file path with hyphens correctly echo "Test 8.9: Hook handles plan_file with hyphens in path" setup_test_loop @@ -642,6 +697,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -738,6 +794,7 @@ cat > "$TRACKED_LOOP_DIR/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$TRACKED_LOOP_DIR" 0 cat > "$TRACKED_LOOP_DIR/goal-tracker.md" << 'EOF' # Goal Tracker ## IMMUTABLE SECTION @@ -822,6 +879,7 @@ cat > "$LOOP_DIR_14_1/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_1" 0 # Goal tracker with ONLY Ultimate Goal placeholder (AC and Tasks are filled) cat > "$LOOP_DIR_14_1/goal-tracker.md" << 'EOF' # Goal Tracker @@ -893,6 +951,7 @@ cat > "$LOOP_DIR_14_2/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_2" 0 # Goal tracker with ONLY AC placeholder (Goal and Tasks are filled) cat > "$LOOP_DIR_14_2/goal-tracker.md" << 'EOF' # Goal Tracker @@ -964,6 +1023,7 @@ cat > "$LOOP_DIR_14_3/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_3" 0 # Goal tracker with ONLY Active Tasks placeholder (Goal and AC are filled) cat > "$LOOP_DIR_14_3/goal-tracker.md" << 'EOF' # Goal Tracker @@ -1033,6 +1093,7 @@ cat > "$LOOP_DIR_14_4/round-0-summary.md" << 'EOF' # Summary Work done. EOF +create_round_contract "$LOOP_DIR_14_4" 0 # Goal tracker with ALL placeholders cat > "$LOOP_DIR_14_4/goal-tracker.md" << 'EOF' # Goal Tracker diff --git a/tests/test-task-tag-routing.sh b/tests/test-task-tag-routing.sh index ae9365f7..3d4bc0fe 100755 --- a/tests/test-task-tag-routing.sh +++ b/tests/test-task-tag-routing.sh @@ -180,6 +180,15 @@ Keep routing behavior stable. | Task | Target AC | Status | Tag | Owner | Notes | |------|-----------|--------|-----|-------|-------| | Keep routing note | AC-1 | in_progress | analyze | codex | - +EOF + cat > "$loop_dir/round-0-contract.md" << 'EOF' +# Round 0 Contract + +- Mainline Objective: Keep routing behavior stable while addressing the current review feedback. +- Target ACs: AC-1 +- Blocking Side Issues In Scope: none +- Queued Side Issues Out of Scope: none +- Success Criteria: Follow-up prompt is generated with routing guidance intact. EOF cat > "$loop_dir/round-0-summary.md" << 'EOF' # Round 0 Summary @@ -197,6 +206,8 @@ setup_test_dir setup_stophook_repo "$TEST_DIR/hook-routing" create_mock_codex "$TEST_DIR/hook-routing/bin" "## Review Feedback +Mainline Progress Verdict: STALLED + Issue remains unresolved. CONTINUE" diff --git a/tests/test-todo-checker.sh b/tests/test-todo-checker.sh index b3e7b072..18b076a2 100755 --- a/tests/test-todo-checker.sh +++ b/tests/test-todo-checker.sh @@ -157,6 +157,36 @@ else fail "In-progress status" "exit 1" "exit $EXIT_CODE" fi +# Test 8b: Queued TodoWrite item does NOT block exit +echo "Test 8b: Queued TodoWrite item" +cat > "$TEST_DIR/transcript-queued.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "[queued] Cleanup follow-up", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-queued.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued TodoWrite item exits 0" +else + fail "Queued TodoWrite item" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 8c: Lane tags in the middle of TodoWrite content do NOT downgrade blocking tasks +echo "Test 8c: Inline queued tag does not bypass TodoWrite blocker" +cat > "$TEST_DIR/transcript-inline-tag.jsonl" << 'EOF' +{"type": "assistant", "message": {"content": [{"type": "tool_use", "name": "TodoWrite", "input": {"todos": [{"content": "Fix docs mentioning [queued] follow-ups", "status": "pending"}]}}]}} +EOF +set +e +RESULT=$(echo "{\"transcript_path\": \"$TEST_DIR/transcript-inline-tag.jsonl\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks TodoWrite item" +else + fail "Inline queued TodoWrite item" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # ======================================== # Test Group 3: Transcript Format Variations # ======================================== @@ -357,6 +387,57 @@ else fail "Task with in_progress status" "exit 1" "exit $EXIT_CODE, output: $RESULT" fi +# Test 19b: Queued file-based task does NOT block exit +echo "Test 19b: Queued task does not block" +MOCK_SESSION_19B="session-19b" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19B" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19B/task-1.json" << 'EOF' +{"subject": "[queued] Follow-up cleanup", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19B\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Queued task exits 0" +else + fail "Queued task" "exit 0" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19c: Explicit blocking tag still blocks +echo "Test 19c: Blocking task still blocks" +MOCK_SESSION_19C="session-19c" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19C" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19C/task-1.json" << 'EOF' +{"subject": "[blocking] Fix failing test", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19C\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Blocking task exits 1 with lane marker" +else + fail "Blocking task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 19d: Inline queued tag in task body does NOT downgrade blocking tasks +echo "Test 19d: Inline queued tag in task body does not bypass blocker" +MOCK_SESSION_19D="session-19d" +mkdir -p "$MOCK_TASKS_BASE/$MOCK_SESSION_19D" +cat > "$MOCK_TASKS_BASE/$MOCK_SESSION_19D/task-1.json" << 'EOF' +{"subject": "Triage review fallout", "description": "Notes mention [queued] cleanup but this task is still active", "status": "pending"} +EOF +set +e +RESULT=$(echo "{\"session_id\": \"$MOCK_SESSION_19D\", \"tasks_base_dir\": \"$MOCK_TASKS_BASE\"}" | python3 "$TODO_CHECKER" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 1 ]] && echo "$RESULT" | grep -q '\[blocking\]'; then + pass "Inline queued tag still blocks file-based task" +else + fail "Inline queued file-based task" "exit 1 with [blocking] output" "exit $EXIT_CODE, output: $RESULT" +fi + # Test 20: Multiple tasks, one incomplete echo "Test 20: Multiple tasks, one incomplete" MOCK_SESSION_20="session-20" From cfc46d626e32e832e3ef2d6ec0fb9d2744bee33f Mon Sep 17 00:00:00 2001 From: Emin Date: Tue, 17 Mar 2026 15:43:41 +0800 Subject: [PATCH 20/50] Use portable shebang across all shell scripts Replace hardcoded #!/bin/bash with #!/usr/bin/env bash for better portability across different Unix systems where bash may be installed in non-standard locations. Co-Authored-By: Claude Opus 4.6 --- hooks/lib/loop-common.sh | 2 +- hooks/lib/template-loader.sh | 2 +- hooks/loop-bash-validator.sh | 2 +- hooks/loop-codex-stop-hook.sh | 2 +- hooks/loop-edit-validator.sh | 2 +- hooks/loop-plan-file-validator.sh | 2 +- hooks/loop-post-bash-hook.sh | 2 +- hooks/loop-read-validator.sh | 2 +- hooks/loop-write-validator.sh | 2 +- hooks/pr-loop-stop-hook.sh | 2 +- scripts/ask-codex.sh | 2 +- scripts/cancel-pr-loop.sh | 2 +- scripts/cancel-rlcr-loop.sh | 2 +- scripts/check-bot-reactions.sh | 2 +- scripts/check-pr-reviewer-status.sh | 2 +- scripts/fetch-pr-comments.sh | 2 +- scripts/humanize.sh | 2 +- scripts/install-skill.sh | 2 +- scripts/install-skills-codex.sh | 2 +- scripts/install-skills-kimi.sh | 2 +- scripts/lib/monitor-common.sh | 2 +- scripts/lib/monitor-skill.sh | 2 +- scripts/poll-pr-reviews.sh | 2 +- scripts/portable-timeout.sh | 2 +- scripts/rlcr-stop-gate.sh | 2 +- scripts/setup-pr-loop.sh | 2 +- scripts/setup-rlcr-loop.sh | 2 +- scripts/validate-gen-plan-io.sh | 2 +- tests/manual-monitor-test.sh | 2 +- tests/mocks/gh | 2 +- .../robustness/test-base-branch-detection.sh | 2 +- .../test-cancel-security-robustness.sh | 2 +- .../test-concurrent-state-robustness.sh | 2 +- .../test-git-operations-robustness.sh | 2 +- .../test-goal-tracker-robustness.sh | 2 +- .../robustness/test-hook-input-robustness.sh | 8 +-- .../robustness/test-hook-system-robustness.sh | 4 +- .../test-path-validation-robustness.sh | 4 +- tests/robustness/test-plan-file-robustness.sh | 4 +- tests/robustness/test-pr-loop-api-fetch.sh | 2 +- tests/robustness/test-pr-loop-api-poll.sh | 2 +- .../robustness/test-pr-loop-api-robustness.sh | 8 +-- tests/robustness/test-session-robustness.sh | 2 +- .../test-setup-scripts-robustness.sh | 39 +++++++++---- .../robustness/test-state-file-robustness.sh | 2 +- .../test-state-transition-robustness.sh | 2 +- .../test-template-error-robustness.sh | 2 +- .../test-template-stress-robustness.sh | 2 +- tests/robustness/test-timeout-robustness.sh | 2 +- tests/run-all-tests.sh | 4 +- tests/setup-fixture-mock-gh.sh | 4 +- tests/setup-monitor-test-env.sh | 2 +- tests/test-agent-teams.sh | 6 +- tests/test-allowlist-validators.sh | 2 +- tests/test-ansi-parsing.sh | 2 +- tests/test-ask-codex.sh | 4 +- tests/test-bash-validator-patterns.sh | 2 +- tests/test-cancel-signal-file.sh | 2 +- tests/test-codex-review-merge.sh | 2 +- tests/test-error-scenarios.sh | 2 +- tests/test-finalize-phase.sh | 10 ++-- tests/test-gen-plan.sh | 2 +- tests/test-helpers.sh | 2 +- tests/test-humanize-escape.sh | 2 +- tests/test-monitor-e2e-deletion.sh | 2 +- tests/test-monitor-e2e-real.sh | 10 ++-- tests/test-monitor-e2e-sigint.sh | 2 +- tests/test-monitor-runtime.sh | 12 ++-- tests/test-plan-file-hooks.sh | 4 +- tests/test-plan-file-validation.sh | 4 +- tests/test-pr-loop-1-scripts.sh | 2 +- tests/test-pr-loop-2-hooks.sh | 2 +- tests/test-pr-loop-3-stophook.sh | 2 +- tests/test-pr-loop-hooks.sh | 22 ++++---- tests/test-pr-loop-lib.sh | 6 +- tests/test-pr-loop-scripts.sh | 2 +- tests/test-pr-loop-stophook.sh | 56 +++++++++---------- tests/test-pr-loop-system.sh | 10 ++-- tests/test-pr-loop.sh | 2 +- tests/test-session-id.sh | 2 +- tests/test-skill-monitor.sh | 2 +- tests/test-state-exit-naming.sh | 2 +- tests/test-stop-gate.sh | 2 +- tests/test-template-loader.sh | 2 +- tests/test-template-references.sh | 2 +- tests/test-templates-comprehensive.sh | 2 +- tests/test-todo-checker.sh | 2 +- 87 files changed, 187 insertions(+), 168 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 2e7014c2..17bd1c3e 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Common functions for RLCR loop hooks # diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7f46853d..fdb488a6 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template loading functions for RLCR loop hooks # diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index a9c394f3..65f3ca56 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Bash commands for RLCR loop and PR loop # diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 0c1bd7b9..cad09969 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Stop Hook for RLCR loop # diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 76cf9c03..e59821ff 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Edit paths for RLCR loop and PR loop # diff --git a/hooks/loop-plan-file-validator.sh b/hooks/loop-plan-file-validator.sh index 595d408a..4f336abc 100755 --- a/hooks/loop-plan-file-validator.sh +++ b/hooks/loop-plan-file-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # UserPromptSubmit hook for plan file validation during RLCR loop # diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index eeed7bde..a0cc8268 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PostToolUse Bash Hook for RLCR loop # diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index f0b6f71f..421a2143 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Read access for RLCR loop and PR loop files # diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 02090265..7c12b8a0 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PreToolUse Hook: Validate Write paths for RLCR loop and PR loop # diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index 86c8a7e0..18361001 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Stop Hook for PR loop # diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index 3a07ab43..c9927162 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Ask Codex - One-shot consultation with Codex # diff --git a/scripts/cancel-pr-loop.sh b/scripts/cancel-pr-loop.sh index f01c606b..388b536f 100755 --- a/scripts/cancel-pr-loop.sh +++ b/scripts/cancel-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Cancel script for cancel-pr-loop # diff --git a/scripts/cancel-rlcr-loop.sh b/scripts/cancel-rlcr-loop.sh index 907b051e..bc0f2511 100755 --- a/scripts/cancel-rlcr-loop.sh +++ b/scripts/cancel-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Cancel script for cancel-rlcr-loop # diff --git a/scripts/check-bot-reactions.sh b/scripts/check-bot-reactions.sh index 74a3372c..e14861a6 100755 --- a/scripts/check-bot-reactions.sh +++ b/scripts/check-bot-reactions.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Check bot reactions on PR or comments # diff --git a/scripts/check-pr-reviewer-status.sh b/scripts/check-pr-reviewer-status.sh index cb3e31fd..e4915c99 100755 --- a/scripts/check-pr-reviewer-status.sh +++ b/scripts/check-pr-reviewer-status.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Check PR reviewer status for startup case determination # diff --git a/scripts/fetch-pr-comments.sh b/scripts/fetch-pr-comments.sh index 2dc996b8..b4e892b1 100755 --- a/scripts/fetch-pr-comments.sh +++ b/scripts/fetch-pr-comments.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Fetch PR comments from GitHub # diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 3c6b4546..01c6e1b6 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # humanize.sh - Humanize shell utilities # Part of rc.d configuration # Compatible with both bash and zsh diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 3e891b02..1c1f817d 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Install/upgrade Humanize skills for Kimi and/or Codex. # diff --git a/scripts/install-skills-codex.sh b/scripts/install-skills-codex.sh index 16b0375f..45d7476e 100755 --- a/scripts/install-skills-codex.sh +++ b/scripts/install-skills-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Codex target. # diff --git a/scripts/install-skills-kimi.sh b/scripts/install-skills-kimi.sh index 7bcbb775..15a94dd4 100755 --- a/scripts/install-skills-kimi.sh +++ b/scripts/install-skills-kimi.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Convenience wrapper: install Humanize skills for Kimi target. # diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 26bdaa9b..0e6af09f 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-common.sh - Shared utilities for humanize monitor functions # diff --git a/scripts/lib/monitor-skill.sh b/scripts/lib/monitor-skill.sh index 16fd4936..bd839d59 100644 --- a/scripts/lib/monitor-skill.sh +++ b/scripts/lib/monitor-skill.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # monitor-skill.sh - Skill monitor for humanize # diff --git a/scripts/poll-pr-reviews.sh b/scripts/poll-pr-reviews.sh index 4292dd2d..282899bd 100755 --- a/scripts/poll-pr-reviews.sh +++ b/scripts/poll-pr-reviews.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Poll for new PR reviews from specified bots # diff --git a/scripts/portable-timeout.sh b/scripts/portable-timeout.sh index 318ef72e..2dcd9308 100755 --- a/scripts/portable-timeout.sh +++ b/scripts/portable-timeout.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Portable timeout wrapper for macOS/Linux compatibility # Usage: source portable-timeout.sh; run_with_timeout [args...] diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 306f875c..31616423 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run RLCR stop-hook logic from non-hook environments (e.g. skill workflows). # diff --git a/scripts/setup-pr-loop.sh b/scripts/setup-pr-loop.sh index 85fabb9d..56ffeb2b 100755 --- a/scripts/setup-pr-loop.sh +++ b/scripts/setup-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Setup script for start-pr-loop # diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 27bb439c..1974c58e 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Setup script for start-rlcr-loop # diff --git a/scripts/validate-gen-plan-io.sh b/scripts/validate-gen-plan-io.sh index e024df8a..a566b18b 100755 --- a/scripts/validate-gen-plan-io.sh +++ b/scripts/validate-gen-plan-io.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-gen-plan-io.sh # Validates input and output paths for the gen-plan command # Exit codes: diff --git a/tests/manual-monitor-test.sh b/tests/manual-monitor-test.sh index f8d17e41..332b9548 100644 --- a/tests/manual-monitor-test.sh +++ b/tests/manual-monitor-test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Manual Test Script for tests # diff --git a/tests/mocks/gh b/tests/mocks/gh index 5cf767d1..d91a58c9 100755 --- a/tests/mocks/gh +++ b/tests/mocks/gh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Mock gh CLI for testing PR loop functionality # diff --git a/tests/robustness/test-base-branch-detection.sh b/tests/robustness/test-base-branch-detection.sh index 92ce1825..bb31cd3a 100755 --- a/tests/robustness/test-base-branch-detection.sh +++ b/tests/robustness/test-base-branch-detection.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for base branch auto-detection # diff --git a/tests/robustness/test-cancel-security-robustness.sh b/tests/robustness/test-cancel-security-robustness.sh index 524cc19e..7ecc3c3c 100755 --- a/tests/robustness/test-cancel-security-robustness.sh +++ b/tests/robustness/test-cancel-security-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for cancel operation security # diff --git a/tests/robustness/test-concurrent-state-robustness.sh b/tests/robustness/test-concurrent-state-robustness.sh index 74ae84f6..ad72b3fc 100755 --- a/tests/robustness/test-concurrent-state-robustness.sh +++ b/tests/robustness/test-concurrent-state-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent state access # diff --git a/tests/robustness/test-git-operations-robustness.sh b/tests/robustness/test-git-operations-robustness.sh index d409c0ed..a80114c8 100755 --- a/tests/robustness/test-git-operations-robustness.sh +++ b/tests/robustness/test-git-operations-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for git operation scripts # diff --git a/tests/robustness/test-goal-tracker-robustness.sh b/tests/robustness/test-goal-tracker-robustness.sh index fe4c025b..0a9bc358 100755 --- a/tests/robustness/test-goal-tracker-robustness.sh +++ b/tests/robustness/test-goal-tracker-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for goal tracker parsing # diff --git a/tests/robustness/test-hook-input-robustness.sh b/tests/robustness/test-hook-input-robustness.sh index 6a6c77dd..675ff887 100755 --- a/tests/robustness/test-hook-input-robustness.sh +++ b/tests/robustness/test-hook-input-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for hook input parsing and monitor edge cases # @@ -453,7 +453,7 @@ cd "$MONITOR_TEST_DIR/project" # Create monitor runner script cat > "$MONITOR_TEST_DIR/run_monitor.sh" << 'MONITOR_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -512,7 +512,7 @@ echo "Test log" > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED/2026-01-17_10-0 # Create narrow terminal runner - calls _humanize_monitor_codex directly in same shell cat > "$MONITOR_TEST_DIR/run_narrow.sh" << 'NARROW_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" @@ -603,7 +603,7 @@ mkdir -p "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00" printf '\033[31mRed text\033[0m\n\033[1;32mBold green\033[0m\n' > "$FAKE_HOME_MONITOR/.cache/humanize/$SANITIZED3/2026-01-17_11-00-00/round-1-codex-run.log" cat > "$MONITOR_TEST_DIR/run_ansi.sh" << 'ANSI_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" FAKE_HOME="$3" diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 8f302bb4..bfccf7f0 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for all hook scripts # @@ -592,7 +592,7 @@ cd - > /dev/null # Create mock codex to avoid real API calls (review_started: false triggers codex exec) mkdir -p "$TEST_DIR/mock-bin" cat > "$TEST_DIR/mock-bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex that returns review output indicating work continues echo "Review: Code looks good but more testing needed." echo "No COMPLETE or STOP markers - work should continue." diff --git a/tests/robustness/test-path-validation-robustness.sh b/tests/robustness/test-path-validation-robustness.sh index be7a916e..56b98528 100755 --- a/tests/robustness/test-path-validation-robustness.sh +++ b/tests/robustness/test-path-validation-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for path validation # @@ -22,7 +22,7 @@ setup_test_dir setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-path-validation-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-plan-file-robustness.sh b/tests/robustness/test-plan-file-robustness.sh index 7ee5f186..d2f5ee7f 100755 --- a/tests/robustness/test-plan-file-robustness.sh +++ b/tests/robustness/test-plan-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for plan file validation # @@ -27,7 +27,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-robustness.sh echo "Mock codex output" exit 0 diff --git a/tests/robustness/test-pr-loop-api-fetch.sh b/tests/robustness/test-pr-loop-api-fetch.sh index e549be08..489a1cee 100755 --- a/tests/robustness/test-pr-loop-api-fetch.sh +++ b/tests/robustness/test-pr-loop-api-fetch.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop API fetch/state tests (parallel split 1/2) # diff --git a/tests/robustness/test-pr-loop-api-poll.sh b/tests/robustness/test-pr-loop-api-poll.sh index 234130a6..f56d5594 100755 --- a/tests/robustness/test-pr-loop-api-poll.sh +++ b/tests/robustness/test-pr-loop-api-poll.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop API poll/stop-hook tests (parallel split 2/2) # diff --git a/tests/robustness/test-pr-loop-api-robustness.sh b/tests/robustness/test-pr-loop-api-robustness.sh index 20b5c165..d8e5097a 100755 --- a/tests/robustness/test-pr-loop-api-robustness.sh +++ b/tests/robustness/test-pr-loop-api-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for PR loop API handling # @@ -41,7 +41,7 @@ create_mock_gh() { # fetch-pr-comments.sh uses: gh repo view --json owner,name -q '...' # gh pr view PR --repo REPO --json number -q .number cat > "$dir/bin/gh" << 'GHEOF_START' -#!/bin/bash +#!/usr/bin/env bash # Mock gh command for testing # Check for -q flag anywhere in args (jq query) @@ -694,7 +694,7 @@ run_poll_tests() { # Create a mock gh that sleeps briefly but responds mkdir -p "$TEST_DIR/poll2/bin" cat > "$TEST_DIR/poll2/bin/gh" << 'GHEOF' -#!/bin/bash +#!/usr/bin/env bash # Handle repo view if [[ "$1" == "repo" && "$2" == "view" ]]; then if [[ "$*" == *"--json"* ]]; then @@ -761,7 +761,7 @@ GHEOF # Create a mock gh that fails on API calls mkdir -p "$TEST_DIR/poll3/bin" cat > "$TEST_DIR/poll3/bin/gh" << 'GHEOF' -#!/bin/bash +#!/usr/bin/env bash # Check for -q flag anywhere in args (jq query) HAS_Q_FLAG=false for arg in "$@"; do diff --git a/tests/robustness/test-session-robustness.sh b/tests/robustness/test-session-robustness.sh index 7b232e9f..4bc636af 100755 --- a/tests/robustness/test-session-robustness.sh +++ b/tests/robustness/test-session-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for concurrent session handling # diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index 13fe21bc..94fecad2 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for setup scripts # @@ -62,6 +62,23 @@ init_basic_git_repo() { cd - > /dev/null } +# Create a minimal PATH toolset in a test bin directory so scripts using +# '/usr/bin/env bash' still run even in restricted PATH scenarios. +prepare_runtime_bin() { + local bin_dir="$1" + local tool + local tool_path + + mkdir -p "$bin_dir" + + for tool in bash env git dirname cat sed awk grep mkdir date head od tr wc sort ls rm cp mv chmod ln readlink printf timeout gtimeout; do + tool_path=$(command -v "$tool" 2>/dev/null || true) + if [[ -n "$tool_path" && -x "$tool_path" && ! -e "$bin_dir/$tool" ]]; then + ln -s "$tool_path" "$bin_dir/$tool" + fi + done +} + # Run setup-rlcr-loop.sh with proper isolation from real RLCR loop # Usage: run_rlcr_setup [args...] run_rlcr_setup() { @@ -720,7 +737,7 @@ init_basic_git_repo "$TEST_DIR/repo30" # Create mock gh that fails auth check (to test dependency handling) mkdir -p "$TEST_DIR/repo30/bin" cat > "$TEST_DIR/repo30/bin/gh" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash if [[ "$1" == "auth" && "$2" == "status" ]]; then echo "Not logged in" >&2 exit 1 @@ -816,7 +833,7 @@ REAL_GIT=$(command -v git) # Mock timeout that returns 124 for git rev-parse (first check in setup script) cat > "$TEST_DIR/repo34/bin/timeout" << TIMEOUTEOF -#!/bin/bash +#!/usr/bin/env bash # Mock timeout that returns 124 for git rev-parse to simulate timeout if [[ "\$*" == *"git"*"rev-parse"* ]]; then exit 124 @@ -833,7 +850,7 @@ chmod +x "$TEST_DIR/repo34/bin/gtimeout" # Create mock codex cat > "$TEST_DIR/repo34/bin/codex" << 'CODEXEOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 CODEXEOF chmod +x "$TEST_DIR/repo34/bin/codex" @@ -1092,13 +1109,14 @@ git -C "$TEST_DIR/repo46" add .gitignore && git -C "$TEST_DIR/repo46" commit -q # Create bin dir with jq but no codex mkdir -p "$TEST_DIR/repo46/bin" +prepare_runtime_bin "$TEST_DIR/repo46/bin" cat > "$TEST_DIR/repo46/bin/jq" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo46/bin/jq" -# Hide system codex by making the only codex on PATH our empty bin dir -OUTPUT=$(PATH="$TEST_DIR/repo46/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? +# Hide system codex by making the only codex on PATH our test bin dir +OUTPUT=$(PATH="$TEST_DIR/repo46/bin" run_rlcr_setup "$TEST_DIR/repo46" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "codex"; then pass "Missing codex detected in dependency check" @@ -1121,13 +1139,14 @@ git -C "$TEST_DIR/repo47" add .gitignore && git -C "$TEST_DIR/repo47" commit -q # Create bin dir with codex but no jq mkdir -p "$TEST_DIR/repo47/bin" +prepare_runtime_bin "$TEST_DIR/repo47/bin" cat > "$TEST_DIR/repo47/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash exit 0 EOF chmod +x "$TEST_DIR/repo47/bin/codex" -# Use a restricted PATH that has git but no jq -OUTPUT=$(PATH="$TEST_DIR/repo47/bin:/usr/bin:/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? +# Use a restricted PATH with required runtime tools but no jq +OUTPUT=$(PATH="$TEST_DIR/repo47/bin" run_rlcr_setup "$TEST_DIR/repo47" plan.md 2>&1) || EXIT_CODE=$? EXIT_CODE=${EXIT_CODE:-0} if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "Missing required dependencies" && echo "$OUTPUT" | grep -q "jq"; then pass "Missing jq detected in dependency check" diff --git a/tests/robustness/test-state-file-robustness.sh b/tests/robustness/test-state-file-robustness.sh index b16171f9..9865a9ee 100755 --- a/tests/robustness/test-state-file-robustness.sh +++ b/tests/robustness/test-state-file-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state file parsing # diff --git a/tests/robustness/test-state-transition-robustness.sh b/tests/robustness/test-state-transition-robustness.sh index 96eeb7da..6f6bdc62 100755 --- a/tests/robustness/test-state-transition-robustness.sh +++ b/tests/robustness/test-state-transition-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for state transition logic # diff --git a/tests/robustness/test-template-error-robustness.sh b/tests/robustness/test-template-error-robustness.sh index ccb3faac..7d9ae47a 100755 --- a/tests/robustness/test-template-error-robustness.sh +++ b/tests/robustness/test-template-error-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system error handling # diff --git a/tests/robustness/test-template-stress-robustness.sh b/tests/robustness/test-template-stress-robustness.sh index b599903b..0719c4ef 100755 --- a/tests/robustness/test-template-stress-robustness.sh +++ b/tests/robustness/test-template-stress-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for template system stress conditions # diff --git a/tests/robustness/test-timeout-robustness.sh b/tests/robustness/test-timeout-robustness.sh index 7d9276c9..03de1b92 100755 --- a/tests/robustness/test-timeout-robustness.sh +++ b/tests/robustness/test-timeout-robustness.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Robustness tests for timeout implementation # diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index d5cb5caf..1afafbee 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Run all test suites for the Humanize plugin (parallel execution) # @@ -98,7 +98,7 @@ trap "rm -rf $OUTPUT_DIR" EXIT if ! command -v codex &>/dev/null; then mkdir -p "$OUTPUT_DIR/mock-bin" cat > "$OUTPUT_DIR/mock-bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_CODEX chmod +x "$OUTPUT_DIR/mock-bin/codex" diff --git a/tests/setup-fixture-mock-gh.sh b/tests/setup-fixture-mock-gh.sh index eac3a8e3..f522bf58 100755 --- a/tests/setup-fixture-mock-gh.sh +++ b/tests/setup-fixture-mock-gh.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Create a mock gh CLI that returns fixture data for testing # fetch-pr-comments.sh and poll-pr-reviews.sh @@ -25,7 +25,7 @@ mkdir -p "$MOCK_BIN_DIR" # Create mock gh that returns fixtures cat > "$MOCK_BIN_DIR/gh" << MOCK_GH_EOF -#!/bin/bash +#!/usr/bin/env bash # Fixture-backed mock gh CLI for testing fetch/poll scripts FIXTURES_DIR="$FIXTURES_DIR" diff --git a/tests/setup-monitor-test-env.sh b/tests/setup-monitor-test-env.sh index 14673c6c..92fa9431 100755 --- a/tests/setup-monitor-test-env.sh +++ b/tests/setup-monitor-test-env.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Helper script to set up monitor test environment # This script creates the necessary directory structure and state files diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 33c23df3..b55bc97b 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for --agent-teams feature in RLCR loop # @@ -498,7 +498,7 @@ setup_mock_codex_impl_feedback() { local feedback="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash +#!/usr/bin/env bash if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' $feedback @@ -516,7 +516,7 @@ setup_mock_codex_review_issues() { local review_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF -#!/bin/bash +#!/usr/bin/env bash if [[ "\$1" == "exec" ]]; then echo "Should not be called in review phase" elif [[ "\$1" == "review" ]]; then diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index 6c604965..53255928 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for allowlist behavior in RLCR loop validators # diff --git a/tests/test-ansi-parsing.sh b/tests/test-ansi-parsing.sh index 4394a8d7..8f70847f 100755 --- a/tests/test-ansi-parsing.sh +++ b/tests/test-ansi-parsing.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test ANSI escape code handling in test runner output parsing # diff --git a/tests/test-ask-codex.sh b/tests/test-ask-codex.sh index 17405f69..319ab383 100755 --- a/tests/test-ask-codex.sh +++ b/tests/test-ask-codex.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for ask-codex.sh - one-shot consultation with mock Codex # @@ -36,7 +36,7 @@ MOCK_BIN_DIR="$TEST_DIR/mock-bin" mkdir -p "$MOCK_BIN_DIR" cat > "$MOCK_BIN_DIR/codex" << 'MOCK_EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex binary for testing ask-codex.sh # Controlled via environment variables. if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then diff --git a/tests/test-bash-validator-patterns.sh b/tests/test-bash-validator-patterns.sh index 62a491f7..bd7f07f4 100755 --- a/tests/test-bash-validator-patterns.sh +++ b/tests/test-bash-validator-patterns.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for command_modifies_file function in loop-common.sh # diff --git a/tests/test-cancel-signal-file.sh b/tests/test-cancel-signal-file.sh index 4d23eef3..9eb78d56 100755 --- a/tests/test-cancel-signal-file.sh +++ b/tests/test-cancel-signal-file.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for cancel-rlcr-loop signal file mechanism # diff --git a/tests/test-codex-review-merge.sh b/tests/test-codex-review-merge.sh index 8761bde7..ed1082bf 100755 --- a/tests/test-codex-review-merge.sh +++ b/tests/test-codex-review-merge.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Code Review log file analysis behavior # diff --git a/tests/test-error-scenarios.sh b/tests/test-error-scenarios.sh index c05c1d8a..65930b1b 100755 --- a/tests/test-error-scenarios.sh +++ b/tests/test-error-scenarios.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test error scenarios for template-loader.sh # diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index f33fe1c1..4efc4220 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for Finalize Phase feature # @@ -55,7 +55,7 @@ setup_mock_codex() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - outputs the provided content if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' @@ -79,7 +79,7 @@ setup_mock_codex_with_tracking() { local review_output="${2:-No issues found.}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Track that codex was called echo "CODEX_WAS_CALLED" > "$TEST_DIR/codex_called.marker" if [[ "\$1" == "exec" ]]; then @@ -104,7 +104,7 @@ setup_mock_codex_review_failure() { local review_exit_code="${2:-1}" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - fails on review command if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' @@ -126,7 +126,7 @@ setup_mock_codex_review_empty_stdout() { local exec_output="$1" mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << EOF -#!/bin/bash +#!/usr/bin/env bash # Mock codex - produces empty stdout on review if [[ "\$1" == "exec" ]]; then cat << 'REVIEW' diff --git a/tests/test-gen-plan.sh b/tests/test-gen-plan.sh index 51abab50..66e346fa 100755 --- a/tests/test-gen-plan.sh +++ b/tests/test-gen-plan.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for gen-plan command structure validation # diff --git a/tests/test-helpers.sh b/tests/test-helpers.sh index a93a2329..2fafff3b 100644 --- a/tests/test-helpers.sh +++ b/tests/test-helpers.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Shared test helper functions for all test scripts # diff --git a/tests/test-humanize-escape.sh b/tests/test-humanize-escape.sh index 8731cc98..644a349f 100755 --- a/tests/test-humanize-escape.sh +++ b/tests/test-humanize-escape.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for humanize-escape fixes # diff --git a/tests/test-monitor-e2e-deletion.sh b/tests/test-monitor-e2e-deletion.sh index bbca2c6d..afd738e6 100755 --- a/tests/test-monitor-e2e-deletion.sh +++ b/tests/test-monitor-e2e-deletion.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e deletion tests (parallel split 1/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tests/test-monitor-e2e-real.sh b/tests/test-monitor-e2e-real.sh index f0c8d0b0..1ef8d409 100755 --- a/tests/test-monitor-e2e-real.sh +++ b/tests/test-monitor-e2e-real.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # TRUE End-to-End Monitor Tests for monitor tests # @@ -105,7 +105,7 @@ GOALTRACKER_EOF1 # Create the test runner script # This script runs the REAL _humanize_monitor_codex function cat > "$TEST_PROJECT/run_real_monitor.sh" << 'MONITOR_SCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function PROJECT_DIR="$1" @@ -426,7 +426,7 @@ GOALTRACKER_SIGINT # Create the test runner script for SIGINT test cat > "$TEST_PROJECT_SIGINT/run_real_monitor_sigint.sh" << 'SIGINT_SCRIPT_EOF' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_codex function for SIGINT testing PROJECT_DIR="$1" @@ -747,7 +747,7 @@ GOALTRACKER_EOF # Create bash test runner script for PR monitor cat > "$TEST_PROJECT_PR/run_real_monitor_pr.sh" << 'MONITOR_SCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_pr function PROJECT_DIR="$1" @@ -890,7 +890,7 @@ PR_GOAL_EOF # Create bash test runner script for PR monitor without --once cat > "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" << 'PR_NO_ONCE_EOF' -#!/bin/bash +#!/usr/bin/env bash # Run the REAL _humanize_monitor_pr function WITHOUT --once flag PROJECT_DIR="$1" diff --git a/tests/test-monitor-e2e-sigint.sh b/tests/test-monitor-e2e-sigint.sh index 9a354a2b..a6bfe20a 100755 --- a/tests/test-monitor-e2e-sigint.sh +++ b/tests/test-monitor-e2e-sigint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Monitor e2e SIGINT tests (parallel split 2/3) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/tests/test-monitor-runtime.sh b/tests/test-monitor-runtime.sh index f73256c0..e146adaf 100755 --- a/tests/test-monitor-runtime.sh +++ b/tests/test-monitor-runtime.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Runtime Verification Tests for tests # @@ -63,7 +63,7 @@ echo "current_round: 1" > .humanize/rlcr/2026-01-16_10-00-00/state.md # Create a test script that sources humanize.sh and tests the graceful stop behavior cat > test_graceful_stop.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" # Source the monitor script @@ -141,7 +141,7 @@ echo "Test 2: Verify cleanup prevents double execution" echo "" cat > test_double_cleanup.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cleanup_done=false call_count=0 @@ -184,7 +184,7 @@ echo "Test 3: Main loop directory deletion detection" echo "" cat > test_loop_detection.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash cd "$1" loop_dir=".humanize/rlcr" @@ -261,7 +261,7 @@ echo "" # and would reset the scroll region cat > test_terminal_restore.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that _restore_terminal is defined and callable cd "$1" @@ -331,7 +331,7 @@ echo "Test 6: SIGINT triggers cleanup in bash" echo "" cat > test_sigint_bash.sh << 'TESTSCRIPT' -#!/bin/bash +#!/usr/bin/env bash # Test that SIGINT triggers cleanup in bash mode cleanup_done=false diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index d2e8af6f..00bd7138 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file hooks during RLCR loop # @@ -40,7 +40,7 @@ mkdir -p "$XDG_CACHE_HOME" setup_mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'MOCKEOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-hooks.sh if [[ "$1" == "exec" ]]; then echo "Mock review output" diff --git a/tests/test-plan-file-validation.sh b/tests/test-plan-file-validation.sh index 411c71f9..1fb10553 100755 --- a/tests/test-plan-file-validation.sh +++ b/tests/test-plan-file-validation.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for plan file validation in setup-rlcr-loop.sh # @@ -74,7 +74,7 @@ EOF mock_codex() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << 'EOF' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for test-plan-file-validation.sh echo "mock codex" EOF diff --git a/tests/test-pr-loop-1-scripts.sh b/tests/test-pr-loop-1-scripts.sh index a4088b71..38ccd846 100755 --- a/tests/test-pr-loop-1-scripts.sh +++ b/tests/test-pr-loop-1-scripts.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Script Tests Runner (parallel split 1/3) # diff --git a/tests/test-pr-loop-2-hooks.sh b/tests/test-pr-loop-2-hooks.sh index 254bdbdb..56f6219e 100755 --- a/tests/test-pr-loop-2-hooks.sh +++ b/tests/test-pr-loop-2-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Hook Tests Runner (parallel split 2/3) # diff --git a/tests/test-pr-loop-3-stophook.sh b/tests/test-pr-loop-3-stophook.sh index a17c27a8..6a9149f5 100755 --- a/tests/test-pr-loop-3-stophook.sh +++ b/tests/test-pr-loop-3-stophook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Stop Hook Tests Runner (parallel split 3/3) # diff --git a/tests/test-pr-loop-hooks.sh b/tests/test-pr-loop-hooks.sh index 1e8c6ca1..de4d09f7 100644 --- a/tests/test-pr-loop-hooks.sh +++ b/tests/test-pr-loop-hooks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Hook Tests # @@ -482,7 +482,7 @@ create_enhanced_mock_gh() { local trigger_timestamp="${3:-2026-01-18T12:00:00Z}" cat > "$mock_dir/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Enhanced mock gh CLI for stop hook testing case "\$1" in @@ -542,7 +542,7 @@ test_trigger_user_filter() { # Create mock that returns comments from different users cat > "$test_subdir/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -794,7 +794,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -816,7 +816,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -877,7 +877,7 @@ EOF # Mock gh that properly returns jq-parsed user and trigger comments cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -916,7 +916,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -979,7 +979,7 @@ EOF # Mock gh that simulates paginated response (returns multiple JSON arrays) # The trigger comment is on page 2 (second array) - only visible if pagination works cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -1023,7 +1023,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; @@ -1084,7 +1084,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -1112,7 +1112,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) echo "/tmp/git" ;; status) echo "" ;; diff --git a/tests/test-pr-loop-lib.sh b/tests/test-pr-loop-lib.sh index 3d7693cb..a619e052 100644 --- a/tests/test-pr-loop-lib.sh +++ b/tests/test-pr-loop-lib.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Common library for PR loop tests # @@ -30,7 +30,7 @@ if [[ -z "${TEST_PR_LOOP_LIB_LOADED:-}" ]]; then mkdir -p "$mock_dir" cat > "$mock_dir/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Mock gh CLI for testing case "$1" in @@ -92,7 +92,7 @@ MOCK_GH local mock_dir="$1" cat > "$mock_dir/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex CLI for testing echo "Mock codex output" exit 0 diff --git a/tests/test-pr-loop-scripts.sh b/tests/test-pr-loop-scripts.sh index b1ce5a42..d77b9067 100644 --- a/tests/test-pr-loop-scripts.sh +++ b/tests/test-pr-loop-scripts.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Script Tests # diff --git a/tests/test-pr-loop-stophook.sh b/tests/test-pr-loop-stophook.sh index 1e71dcdf..a73f8a4b 100644 --- a/tests/test-pr-loop-stophook.sh +++ b/tests/test-pr-loop-stophook.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # PR Loop Stop Hook Tests # @@ -56,7 +56,7 @@ EOF # Mock gh that returns OLD trigger comment (BEFORE latest_commit_at) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Check if --jq is in arguments (for transformed format) HAS_JQ=false for arg in "$@"; do @@ -103,7 +103,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -175,7 +175,7 @@ EOF # Mock gh that returns no trigger comments, but has codex +1 cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -206,7 +206,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -281,13 +281,13 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -363,7 +363,7 @@ EOF # Mock gh that returns bot comments (simulating comments arriving) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -411,7 +411,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -485,14 +485,14 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash exit 0 MOCK_GH chmod +x "$mock_bin/gh" # Mock git that reports unpushed commits cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -571,7 +571,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in pr) if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then @@ -595,7 +595,7 @@ MOCK_GH # Mock git that simulates force push: old commit is NOT ancestor of current HEAD cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -676,7 +676,7 @@ EOF # Mock gh that returns no trigger comments cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -711,7 +711,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -788,7 +788,7 @@ EOF # Mock gh that returns NO bot comments (simulates bot not responding) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -820,7 +820,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -930,7 +930,7 @@ EOF # Mock gh that returns +1 reaction from codex cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash case "$1" in api) if [[ "$2" == "user" ]]; then @@ -966,7 +966,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1045,7 +1045,7 @@ EOF # Mock gh that returns NO eyes reaction (simulates claude bot not configured) cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Check if --jq is in arguments (for transformed format) HAS_JQ=false for arg in "$@"; do @@ -1101,7 +1101,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1193,7 +1193,7 @@ EOF # check-pr-reviewer-status.sh uses --jq so needs transformed format # Use COMMENT_TS environment variable for dynamic timestamp cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Dynamic comment timestamp from test setup COMMENT_TS="$comment_ts" COMMIT_TS="$commit_ts" @@ -1307,7 +1307,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1425,7 +1425,7 @@ EOF # - Current repo (fork) doesn't have PR 456 # - Parent repo (upstream) has PR 456 cat > "$mock_bin/gh" << 'MOCK_GH' -#!/bin/bash +#!/usr/bin/env bash # Track which repo we're querying FORK_REPO="forkuser/forkrepo" UPSTREAM_REPO="upstreamowner/upstreamrepo" @@ -1475,7 +1475,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1583,7 +1583,7 @@ EOF # - claude: APPROVE (LGTM) # - codex: ISSUES (has issues) cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash # Dynamic timestamps from test setup CLAUDE_TS="$claude_ts" CODEX_TS="$codex_ts" @@ -1653,7 +1653,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1677,7 +1677,7 @@ MOCK_GIT # Mock codex that outputs mixed approval cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex output: claude approves, codex has issues cat << 'CODEX_OUTPUT' # PR Review Validation diff --git a/tests/test-pr-loop-system.sh b/tests/test-pr-loop-system.sh index e124cb6f..05cf3b87 100755 --- a/tests/test-pr-loop-system.sh +++ b/tests/test-pr-loop-system.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test runner for PR loop system # @@ -910,7 +910,7 @@ run_monitor_once_capture_output() { # Create wrapper script that runs monitor and captures output local wrapper="$project_dir/run_monitor_test.sh" cat > "$wrapper" << 'WRAPPER_EOF' -#!/bin/bash +#!/usr/bin/env bash PROJECT_DIR="$1" PROJECT_ROOT="$2" @@ -1597,7 +1597,7 @@ EOF mkdir -p "$mock_bin" cat > "$mock_bin/gh" << MOCK_GH -#!/bin/bash +#!/usr/bin/env bash COMMENT_TS="$comment_ts" COMMIT_TS="$commit_ts" @@ -1669,7 +1669,7 @@ MOCK_GH chmod +x "$mock_bin/gh" cat > "$mock_bin/git" << 'MOCK_GIT' -#!/bin/bash +#!/usr/bin/env bash case "$1" in rev-parse) if [[ "$2" == "HEAD" ]]; then @@ -1691,7 +1691,7 @@ MOCK_GIT # Mock codex command - returns ISSUES_REMAINING to trigger goal tracker update cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/bin/bash +#!/usr/bin/env bash # Mock codex for testing - output review analysis cat << 'CODEX_OUTPUT' ## Bot Review Analysis diff --git a/tests/test-pr-loop.sh b/tests/test-pr-loop.sh index 54af3829..0bb615b8 100755 --- a/tests/test-pr-loop.sh +++ b/tests/test-pr-loop.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for PR loop feature # diff --git a/tests/test-session-id.sh b/tests/test-session-id.sh index fa28b9de..0d2656a0 100755 --- a/tests/test-session-id.sh +++ b/tests/test-session-id.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for session_id feature in RLCR loop # diff --git a/tests/test-skill-monitor.sh b/tests/test-skill-monitor.sh index f50babdf..3ccebce5 100755 --- a/tests/test-skill-monitor.sh +++ b/tests/test-skill-monitor.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for _humanize_monitor_skill (humanize monitor skill) # diff --git a/tests/test-state-exit-naming.sh b/tests/test-state-exit-naming.sh index d48c8a95..7982baac 100755 --- a/tests/test-state-exit-naming.sh +++ b/tests/test-state-exit-naming.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for state.md rename on exit # diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 7682c853..d3238c49 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Tests for rlcr-stop-gate wrapper project root detection # diff --git a/tests/test-template-loader.sh b/tests/test-template-loader.sh index 74bbd784..e9d48639 100755 --- a/tests/test-template-loader.sh +++ b/tests/test-template-loader.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for template-loader.sh # diff --git a/tests/test-template-references.sh b/tests/test-template-references.sh index a302ad19..13d4c31b 100755 --- a/tests/test-template-references.sh +++ b/tests/test-template-references.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Template Reference Validation # diff --git a/tests/test-templates-comprehensive.sh b/tests/test-templates-comprehensive.sh index 0e7e8f3c..bf8c38af 100755 --- a/tests/test-templates-comprehensive.sh +++ b/tests/test-templates-comprehensive.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Comprehensive template validation tests for CI/CD # diff --git a/tests/test-todo-checker.sh b/tests/test-todo-checker.sh index b3e7b072..8588bf3b 100755 --- a/tests/test-todo-checker.sh +++ b/tests/test-todo-checker.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Test script for check-todos-from-transcript.py # From 43032491dcb2c35fc9179f99a3fe446758ba857e Mon Sep 17 00:00:00 2001 From: Qiming Chu Date: Tue, 17 Mar 2026 22:34:35 +0800 Subject: [PATCH 21/50] Bump version to 1.14.1 for branch CI validation Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d433af17..af4c0ce6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0" + "version": "1.14.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index d53f704d..8a69a6c7 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0", + "version": "1.14.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dac0f88f..d864489a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.14.0** +**Current Version: 1.14.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 4deb1db636e01586c4e687034e93635999e184e1 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Wed, 18 Mar 2026 21:48:29 +0800 Subject: [PATCH 22/50] Harden template-loader error handling and reduce code duplication - Remove spurious blank stderr line from load_template on missing files - Add awk exit code propagation in render_template to surface failures - Extract _emit_fallback helper to deduplicate load_and_render_safe - Prevent append_template from appending empty content on missing templates - Extend validate_template_dir to check all required subdirs (plan, pr-loop) - Bump version to 1.14.1 Signed-off-by: Chao Liu --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/lib/template-loader.sh | 58 +++++++++++++++++++++------------ 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d433af17..af4c0ce6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0" + "version": "1.14.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index d53f704d..8a69a6c7 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.14.0", + "version": "1.14.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dac0f88f..d864489a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.14.0** +**Current Version: 1.14.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7f46853d..028a66b3 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -41,9 +41,7 @@ load_template() { if [[ -f "$template_path" ]]; then cat "$template_path" else - echo "" >&2 echo "Warning: Template not found: $template_path" >&2 - echo "" fi } @@ -71,6 +69,7 @@ render_template() { # Single-pass replacement using awk # Scans for {{VAR}} patterns and replaces them with values from environment # Replaced content goes directly to output without re-scanning + local awk_exit=0 content=$(env "${env_vars[@]}" awk ' BEGIN { # Build lookup table from environment variables with TMPL_VAR_ prefix @@ -126,7 +125,12 @@ render_template() { } print result - }' <<< "$content") + }' <<< "$content") || awk_exit=$? + + if [[ $awk_exit -ne 0 ]]; then + echo "Error: Template rendering failed (awk exit code: $awk_exit)" >&2 + return 1 + fi echo "$content" } @@ -148,22 +152,36 @@ load_and_render() { # Append content from another template file # Usage: append_template "$base_content" "$TEMPLATE_DIR" "claude/post-alignment.md" +# Only appends if the template exists and is non-empty. append_template() { local base_content="$1" local template_dir="$2" local template_name="$3" local additional_content - additional_content=$(load_template "$template_dir" "$template_name") + additional_content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true echo "$base_content" - echo "$additional_content" + if [[ -n "$additional_content" ]]; then + echo "$additional_content" + fi } # ======================================== # Safe versions with fallback messages # ======================================== +# Emit a fallback message, optionally rendering template variables. +_emit_fallback() { + local fallback_msg="$1" + shift + if [[ $# -gt 0 ]]; then + render_template "$fallback_msg" "$@" + else + echo "$fallback_msg" + fi +} + # Load and render with a fallback message if template fails # Usage: load_and_render_safe "$TEMPLATE_DIR" "block/message.md" "fallback message" "VAR=value" ... # Returns fallback message if template is missing or empty @@ -174,28 +192,18 @@ load_and_render_safe() { shift 3 local content - content=$(load_template "$template_dir" "$template_name" 2>/dev/null) + content=$(load_template "$template_dir" "$template_name" 2>/dev/null) || true if [[ -z "$content" ]]; then - # Template missing - use fallback with variable substitution - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi local result - result=$(render_template "$content" "$@") + result=$(render_template "$content" "$@") || true if [[ -z "$result" ]]; then - # Rendering produced empty result - use fallback - if [[ $# -gt 0 ]]; then - render_template "$fallback_msg" "$@" - else - echo "$fallback_msg" - fi + _emit_fallback "$fallback_msg" "$@" return fi @@ -213,8 +221,16 @@ validate_template_dir() { return 1 fi - if [[ ! -d "$template_dir/block" ]] || [[ ! -d "$template_dir/codex" ]] || [[ ! -d "$template_dir/claude" ]]; then - echo "ERROR: Template directory missing subdirectories: $template_dir" >&2 + local required_subdirs=("block" "codex" "claude" "plan" "pr-loop") + local missing=() + local subdir + for subdir in "${required_subdirs[@]}"; do + if [[ ! -d "$template_dir/$subdir" ]]; then + missing+=("$subdir") + fi + done + if [[ ${#missing[@]} -gt 0 ]]; then + echo "ERROR: Template directory missing subdirectories (${missing[*]}): $template_dir" >&2 return 1 fi From 084464851a2c0a6c0e56358e49d14a1bd6c53938 Mon Sep 17 00:00:00 2001 From: Zhou Yaoyang Date: Sun, 15 Mar 2026 17:04:31 +0800 Subject: [PATCH 23/50] Add native Codex hook support and harden install/test flows - add native Codex hook config and installer, wire RLCR/PR stop hooks, and document Codex setup and usage - fix BitLesson selector routing and update related skills/docs - simplify install-skill target handling and harden RLCR test mock argument parsing --- .gitignore | 2 + config/codex-hooks.json | 23 ++ docs/bitlesson.md | 4 + docs/install-for-codex.md | 33 ++- docs/usage.md | 5 + hooks/loop-codex-stop-hook.sh | 11 +- hooks/pr-loop-stop-hook.sh | 5 +- scripts/bitlesson-select.sh | 77 +++-- scripts/install-codex-hooks.sh | 197 +++++++++++++ scripts/install-skill.sh | 226 ++++++++++++++- scripts/rlcr-stop-gate.sh | 7 + skills/humanize-rlcr/SKILL.md | 40 +-- skills/humanize/SKILL.md | 9 +- tests/run-all-tests.sh | 1 + tests/test-agent-teams.sh | 22 +- tests/test-bitlesson-select-routing.sh | 131 ++++++++- tests/test-codex-hook-install.sh | 340 +++++++++++++++++++++++ tests/test-disable-nested-codex-hooks.sh | 213 ++++++++++++++ tests/test-finalize-phase.sh | 44 ++- tests/test-task-tag-routing.sh | 13 +- 20 files changed, 1301 insertions(+), 102 deletions(-) create mode 100644 config/codex-hooks.json create mode 100755 scripts/install-codex-hooks.sh create mode 100755 tests/test-codex-hook-install.sh create mode 100644 tests/test-disable-nested-codex-hooks.sh diff --git a/.gitignore b/.gitignore index 8ef0d573..2308545e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ temp # Humanize state directories (runtime-generated, project-local) .humanize/ +.claude-flow/ +.swarm/ # Python cache __pycache__/ diff --git a/config/codex-hooks.json b/config/codex-hooks.json new file mode 100644 index 00000000..7a04402a --- /dev/null +++ b/config/codex-hooks.json @@ -0,0 +1,23 @@ +{ + "description": "Humanize Codex Hooks - Native Stop hooks for RLCR and PR loops", + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/loop-codex-stop-hook.sh", + "timeout": 7200, + "statusMessage": "humanize RLCR stop hook" + }, + { + "type": "command", + "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/pr-loop-stop-hook.sh", + "timeout": 7200, + "statusMessage": "humanize PR stop hook" + } + ] + } + ] + } +} diff --git a/docs/bitlesson.md b/docs/bitlesson.md index bb2c3bac..01bb32e5 100644 --- a/docs/bitlesson.md +++ b/docs/bitlesson.md @@ -18,6 +18,10 @@ Provider routing is automatic: If the configured provider binary is missing, the selector falls back to the default Codex model so the loop can still proceed. +On Codex-only installs, Humanize writes `provider_mode: "codex-only"` into the user config. +When that mode is present, the selector forces BitLesson selection onto the Codex/OpenAI path +before provider resolution, even if an older default such as `haiku` would otherwise route to Claude. + ## Workflow Each project keeps its BitLesson knowledge base at `.humanize/bitlesson.md`. diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index a0c5dac2..8698d001 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -1,6 +1,6 @@ # Install Humanize Skills for Codex -This guide explains how to install the Humanize skills for Codex skill runtime (`$CODEX_HOME/skills`). +This guide explains how to install Humanize for Codex CLI, including the skill runtime (`$CODEX_HOME/skills`) and the native Codex `Stop` hook (`$CODEX_HOME/hooks.json`). ## Quick Install (Recommended) @@ -25,8 +25,14 @@ Or use the unified installer directly: This will: - Sync `humanize`, `humanize-gen-plan`, `humanize-refine-plan`, and `humanize-rlcr` into `${CODEX_HOME:-~/.codex}/skills` - Copy runtime dependencies into `${CODEX_HOME:-~/.codex}/skills/humanize` +- Install/update native Humanize Stop hooks in `${CODEX_HOME:-~/.codex}/hooks.json` +- Enable the experimental `codex_hooks` feature in `${CODEX_HOME:-~/.codex}/config.toml` when `codex` is available +- Seed `~/.config/humanize/config.json` with a Codex/OpenAI `bitlesson_model` when that key is not already set +- Mark the install as `provider_mode: "codex-only"` when using `--target codex` - Use RLCR defaults: `codex exec` with `gpt-5.4:high`, `codex review` with `gpt-5.4:high` +Requires Codex CLI `0.114.0` or newer for native hooks. Older Codex builds are not supported by the Codex install path. + ## Verify ```bash @@ -58,6 +64,21 @@ Installed files/directories: - `${CODEX_HOME:-~/.codex}/skills/humanize/templates/` - `${CODEX_HOME:-~/.codex}/skills/humanize/config/` - `${CODEX_HOME:-~/.codex}/skills/humanize/agents/` +- `${CODEX_HOME:-~/.codex}/hooks.json` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` (created or updated only when Humanize config keys are unset) + +Verify native hooks: + +```bash +codex features list | rg codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` + +Expected: +- `codex_hooks` is `true` +- `hooks.json` contains `loop-codex-stop-hook.sh` and `pr-loop-stop-hook.sh` +- `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.4` +- for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` ## Optional: Install for Both Codex and Kimi @@ -73,6 +94,9 @@ Installed files/directories: # Custom Codex skills dir ./scripts/install-skills-codex.sh --codex-skills-dir /custom/codex/skills + +# Reinstall only the native hooks/config +./scripts/install-codex-hooks.sh ``` ## Troubleshooting @@ -82,3 +106,10 @@ If scripts are not found from installed skills: ```bash ls -la "${CODEX_HOME:-$HOME/.codex}/skills/humanize/scripts" ``` + +If native exit gating does not trigger: + +```bash +codex features enable codex_hooks +sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" +``` diff --git a/docs/usage.md b/docs/usage.md index e12d45b9..b5625bec 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -277,6 +277,7 @@ Current built-in keys: | `codex_model` | `gpt-5.4` | Shared default model for Codex-backed review and analysis | | `codex_effort` | `high` | Shared default reasoning effort (`xhigh`, `high`, `medium`, `low`) | | `bitlesson_model` | `haiku` | Model used by the BitLesson selector agent | +| `provider_mode` | unset | Optional runtime mode hint such as `codex-only` | | `agent_teams` | `false` | Project-level default for agent teams workflow | | `alternative_plan_language` | `""` | Optional translated plan variant language; supported values include `Chinese`, `Korean`, `Japanese`, `Spanish`, `French`, `German`, `Portuguese`, `Russian`, `Arabic`, or ISO codes like `zh` | | `gen_plan_mode` | `discussion` | Default plan-generation mode | @@ -300,6 +301,10 @@ To override, add to `.humanize/config.json`: } ``` +On Codex installs, Humanize also seeds `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` +with a Codex/OpenAI `bitlesson_model` and `provider_mode: "codex-only"` when those keys +are unset, so BitLesson selection stays on the Codex/OpenAI path without probing Claude. + Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) 2. Feature-specific defaults (e.g., PR loop defaults to `medium` effort) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 4d397a19..ae703d30 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -980,6 +980,9 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above +# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. +CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + # Build command arguments for summary review (codex exec) CODEX_EXEC_ARGS=("-m" "$CODEX_EXEC_MODEL") if [[ -n "$CODEX_EXEC_EFFORT" ]]; then @@ -1056,14 +1059,14 @@ Provider: codex echo "# Review base ($review_base_type): $review_base" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex review --base $review_base ${CODEX_REVIEW_ARGS[*]}" + echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} review --base $review_base ${CODEX_REVIEW_ARGS[*]}" } > "$CODEX_REVIEW_CMD_FILE" echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 CODEX_REVIEW_EXIT_CODE=0 - (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ + (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" review --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 @@ -1387,7 +1390,7 @@ CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex exec ${CODEX_EXEC_ARGS[*]} \"\"" + echo "codex ${CODEX_DISABLE_HOOKS_ARGS[*]} exec ${CODEX_EXEC_ARGS[*]} \"\"" echo "" echo "# Prompt content:" echo "$CODEX_PROMPT_CONTENT" @@ -1397,7 +1400,7 @@ echo "Codex command saved to: $CODEX_CMD_FILE" >&2 echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? echo "Codex exit code: $CODEX_EXIT_CODE" >&2 diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index f02710e2..8dedd8c0 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1334,12 +1334,15 @@ if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_ CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" fi +# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. +CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") CODEX_PROMPT_CONTENT=$(cat "$CODEX_PROMPT_FILE") CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex exec "${CODEX_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_ARGS[@]}" - \ > "$CHECK_FILE" 2>/dev/null || CODEX_EXIT_CODE=$? if [[ $CODEX_EXIT_CODE -ne 0 ]]; then diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index 9399b06c..4d2b668d 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -15,6 +15,10 @@ PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$PROJECT_ROOT")" BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" +CODEX_FALLBACK_MODEL="$(get_config_value "$MERGED_CONFIG" "codex_model")" +CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" +PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" +PROVIDER_MODE="${PROVIDER_MODE:-auto}" # Source portable timeout wrapper source "$SCRIPT_DIR/portable-timeout.sh" @@ -82,12 +86,34 @@ if [[ -z "$BITLESSON_FILE" ]]; then exit 1 fi +if [[ ! -f "$BITLESSON_FILE" ]]; then + echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 + exit 1 +fi + +BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" +if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then + echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 + exit 1 +fi + +if ! printf '%s\n' "$BITLESSON_CONTENT" | grep -Eq '^[[:space:]]*##[[:space:]]+Lesson:'; then + printf 'LESSON_IDS: NONE\n' + printf 'RATIONALE: The BitLesson file has no recorded lessons yet.\n' + exit 0 +fi + # ======================================== # Determine Provider from BITLESSON_MODEL # ======================================== BITLESSON_PROVIDER="$(detect_provider "$BITLESSON_MODEL")" +if [[ "$PROVIDER_MODE" == "codex-only" ]] && [[ "$BITLESSON_PROVIDER" == "claude" ]]; then + BITLESSON_MODEL="$CODEX_FALLBACK_MODEL" + BITLESSON_PROVIDER="codex" +fi + # ======================================== # Conditional Dependency Check (with fallback) # ======================================== @@ -99,17 +125,6 @@ if ! check_provider_dependency "$BITLESSON_PROVIDER" 2>/dev/null; then check_provider_dependency "$BITLESSON_PROVIDER" fi -if [[ ! -f "$BITLESSON_FILE" ]]; then - echo "Error: BitLesson file not found: $BITLESSON_FILE" >&2 - exit 1 -fi - -BITLESSON_CONTENT="$(cat "$BITLESSON_FILE")" -if [[ -z "$(printf '%s' "$BITLESSON_CONTENT" | tr -d ' \t\n\r')" ]]; then - echo "Error: BitLesson file is empty (whitespace only): $BITLESSON_FILE" >&2 - exit 1 -fi - # ======================================== # Detect Project Root (for -C) # ======================================== @@ -148,6 +163,7 @@ $BITLESSON_CONTENT 1. Match only lessons that are directly relevant to the sub-task scope and failure mode. 2. Prefer precision over recall: do not include weakly related lessons. 3. If nothing is relevant, return \`NONE\`. +4. Use only the information in this prompt. Do not use tools, shell commands, browser access, MCP servers, or repository inspection. ## Output Format (Stable) @@ -164,21 +180,35 @@ EOF SELECTOR_TIMEOUT=120 -CODEX_EXIT_CODE=0 -if [[ "$BITLESSON_PROVIDER" == "codex" ]]; then - CODEX_EXEC_ARGS=("-m" "$BITLESSON_MODEL" "-c" "model_reasoning_effort=high") +run_selector() { + local provider="$1" + local model="$2" + + if [[ "$provider" == "codex" ]]; then + local codex_exec_args=( + "--disable" "codex_hooks" + "--skip-git-repo-check" + "--ephemeral" + "-s" "read-only" + "-m" "$model" + "-c" "model_reasoning_effort=low" + "-C" "$CODEX_PROJECT_ROOT" + ) + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${codex_exec_args[@]}" - + return $? + fi - # Determine automation flag based on environment variable (same as ask-codex.sh) - CODEX_AUTO_FLAG="--full-auto" - if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" + if [[ "$provider" == "claude" ]]; then + printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$model" - + return $? fi - CODEX_EXEC_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$CODEX_PROJECT_ROOT") - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" -)" || CODEX_EXIT_CODE=$? -elif [[ "$BITLESSON_PROVIDER" == "claude" ]]; then - RAW_OUTPUT="$(printf '%s' "$PROMPT" | run_with_timeout "$SELECTOR_TIMEOUT" claude --print --model "$BITLESSON_MODEL" -)" || CODEX_EXIT_CODE=$? -fi + echo "Error: Unsupported BitLesson provider '$provider'" >&2 + return 1 +} + +CODEX_EXIT_CODE=0 +RAW_OUTPUT="$(run_selector "$BITLESSON_PROVIDER" "$BITLESSON_MODEL" 2>&1)" || CODEX_EXIT_CODE=$? if [[ $CODEX_EXIT_CODE -eq 124 ]]; then echo "Error: BitLesson selector timed out after ${SELECTOR_TIMEOUT} seconds" >&2 @@ -187,6 +217,7 @@ fi if [[ $CODEX_EXIT_CODE -ne 0 ]]; then echo "Error: BitLesson selector failed (exit code $CODEX_EXIT_CODE)" >&2 + printf '%s\n' "$RAW_OUTPUT" >&2 exit "$CODEX_EXIT_CODE" fi diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh new file mode 100755 index 00000000..362b822f --- /dev/null +++ b/scripts/install-codex-hooks.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# +# Install/update Humanize native Codex hooks in CODEX_HOME/hooks.json. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +CODEX_CONFIG_DIR="${CODEX_HOME:-${HOME}/.codex}" +RUNTIME_ROOT="$CODEX_CONFIG_DIR/skills/humanize" +DRY_RUN="false" +ENABLE_FEATURE="true" +HOOKS_TEMPLATE="$REPO_ROOT/config/codex-hooks.json" + +usage() { + cat <<'EOF' +Install/update Humanize native Codex hooks. + +Usage: + scripts/install-codex-hooks.sh [options] + +Options: + --codex-config-dir PATH Codex config dir (default: ${CODEX_HOME:-~/.codex}) + --runtime-root PATH Installed Humanize runtime root (default: /skills/humanize) + --skip-enable-feature Do not run `codex features enable codex_hooks` + --dry-run Print actions without writing + -h, --help Show help +EOF +} + +log() { + printf '[install-codex-hooks] %s\n' "$*" +} + +die() { + printf '[install-codex-hooks] Error: %s\n' "$*" >&2 + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --runtime-root) + [[ -n "${2:-}" ]] || die "--runtime-root requires a value" + RUNTIME_ROOT="$2" + shift 2 + ;; + --skip-enable-feature) + ENABLE_FEATURE="false" + shift + ;; + --dry-run) + DRY_RUN="true" + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +[[ -f "$HOOKS_TEMPLATE" ]] || die "hook template not found: $HOOKS_TEMPLATE" + +HOOKS_FILE="$CODEX_CONFIG_DIR/hooks.json" + +require_codex_hooks_support() { + if ! command -v codex >/dev/null 2>&1; then + die "Codex CLI with native hooks support is required. Install Codex 0.114.0+ first." + fi + + if ! codex features list 2>/dev/null | grep -qE '^codex_hooks[[:space:]]'; then + die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." + fi +} + +merge_hooks_json() { + local hooks_file="$1" + local template_file="$2" + local runtime_root="$3" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to merge Codex hooks" + fi + + python3 - "$hooks_file" "$template_file" "$runtime_root" <<'PY' +import json +import pathlib +import re +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +template_file = pathlib.Path(sys.argv[2]) +runtime_root = sys.argv[3] + +template_text = template_file.read_text(encoding="utf-8") +template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", runtime_root) +template = json.loads(template_text) + +existing = {} +if hooks_file.exists(): + with hooks_file.open("r", encoding="utf-8") as fh: + existing = json.load(fh) + +if not isinstance(existing, dict): + raise SystemExit(f"existing hooks config must be a JSON object: {hooks_file}") + +hooks = existing.setdefault("hooks", {}) +if not isinstance(hooks, dict): + raise SystemExit(f"existing hooks config has invalid 'hooks' object: {hooks_file}") + +stop_groups = hooks.get("Stop", []) +if stop_groups is None: + stop_groups = [] +if not isinstance(stop_groups, list): + raise SystemExit(f"existing hooks config has invalid Stop array: {hooks_file}") + +managed_pattern = re.compile(r"(^|/)humanize/hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)$") + +filtered_groups = [] +for group in stop_groups: + if not isinstance(group, dict): + filtered_groups.append(group) + continue + group_hooks = group.get("hooks") + if not isinstance(group_hooks, list): + filtered_groups.append(group) + continue + kept_hooks = [] + for hook in group_hooks: + if not isinstance(hook, dict): + kept_hooks.append(hook) + continue + command = hook.get("command") + if isinstance(command, str) and managed_pattern.search(command): + continue + kept_hooks.append(hook) + if kept_hooks: + new_group = dict(group) + new_group["hooks"] = kept_hooks + filtered_groups.append(new_group) + +managed_stop_groups = template.get("hooks", {}).get("Stop", []) +filtered_groups.extend(managed_stop_groups) +hooks["Stop"] = filtered_groups + +if not existing.get("description"): + existing["description"] = template.get("description", "Humanize Codex Hooks") + +hooks_file.parent.mkdir(parents=True, exist_ok=True) +hooks_file.write_text(json.dumps(existing, indent=2) + "\n", encoding="utf-8") +PY +} + +enable_feature() { + local config_dir="$1" + + [[ "$ENABLE_FEATURE" == "true" ]] || return 0 + + if CODEX_HOME="$config_dir" codex features enable codex_hooks >/dev/null 2>&1; then + log "enabled codex_hooks feature in $config_dir/config.toml" + else + die "failed to enable codex_hooks feature automatically in $config_dir/config.toml" + fi +} + +log "codex config dir: $CODEX_CONFIG_DIR" +log "runtime root: $RUNTIME_ROOT" +log "hooks file: $HOOKS_FILE" + +require_codex_hooks_support + +if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN merge $HOOKS_TEMPLATE -> $HOOKS_FILE" + if [[ "$ENABLE_FEATURE" == "true" ]]; then + log "DRY-RUN enable codex_hooks feature in $CODEX_CONFIG_DIR/config.toml" + fi + exit 0 +fi + +merge_hooks_json "$HOOKS_FILE" "$HOOKS_TEMPLATE" "$RUNTIME_ROOT" +enable_feature "$CODEX_CONFIG_DIR" + +cat </skills//SKILL.md + # /scripts + if [[ -d "$candidate_root/skills" ]] && [[ -d "$candidate_root/scripts" ]]; then + SKILLS_SOURCE_ROOT="$candidate_root/skills" + RUNTIME_SOURCE_ROOT="$candidate_root" + return 0 + fi + + # Installed runtime layout: + # /humanize/scripts/install-skill.sh + # /humanize-gen-plan/SKILL.md + # /humanize-rlcr/SKILL.md + if [[ -d "$runtime_root/scripts" ]] && [[ -d "$runtime_root/hooks" ]] && [[ -d "$runtime_root/prompt-template" ]]; then + skills_root="$(cd "$runtime_root/.." && pwd)" + if [[ -f "$skills_root/humanize/SKILL.md" ]] && [[ -f "$skills_root/humanize-gen-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-refine-plan/SKILL.md" ]] && [[ -f "$skills_root/humanize-rlcr/SKILL.md" ]]; then + SKILLS_SOURCE_ROOT="$skills_root" + RUNTIME_SOURCE_ROOT="$runtime_root" + return 0 + fi + fi + + die "could not resolve Humanize source layout from: $candidate_root" +} + sync_dir() { local src="$1" local dst="$2" @@ -107,7 +147,7 @@ sync_dir() { sync_one_skill() { local skill="$1" local target_dir="$2" - local src="$REPO_ROOT/skills/$skill" + local src="$SKILLS_SOURCE_ROOT/$skill" local dst="$target_dir/$skill" sync_dir "$src" "$dst" } @@ -120,7 +160,7 @@ install_runtime_bundle() { log "syncing runtime bundle into: $runtime_root" for component in scripts hooks prompt-template templates config agents; do - sync_dir "$REPO_ROOT/$component" "$runtime_root/$component" + sync_dir "$RUNTIME_SOURCE_ROOT/$component" "$runtime_root/$component" done } @@ -192,6 +232,7 @@ strip_claude_specific_frontmatter() { sync_target() { local label="$1" local target_dir="$2" + local selected_skills=("${SKILL_NAMES[@]}") log "target: $label" log "skills dir: $target_dir" @@ -200,7 +241,7 @@ sync_target() { mkdir -p "$target_dir" fi - for skill in "${SKILL_NAMES[@]}"; do + for skill in "${selected_skills[@]}"; do log "syncing [$label] skill: $skill" sync_one_skill "$skill" "$target_dir" done @@ -209,6 +250,140 @@ sync_target() { strip_claude_specific_frontmatter "$target_dir" } +install_codex_native_hooks() { + local target_dir="$1" + local runtime_root="$target_dir/humanize" + local hooks_installer="$REPO_ROOT/scripts/install-codex-hooks.sh" + local args=( + --codex-config-dir "$CODEX_CONFIG_DIR" + --runtime-root "$runtime_root" + ) + + [[ -x "$hooks_installer" ]] || die "missing Codex hooks installer: $hooks_installer" + [[ "$DRY_RUN" == "true" ]] && args+=(--dry-run) + + log "installing native Codex hooks into: $CODEX_CONFIG_DIR" + "$hooks_installer" "${args[@]}" +} + +install_codex_user_config() { + local runtime_root="$1" + local install_target="$2" + local user_config_dir="${HUMANIZE_USER_CONFIG_DIR}" + local user_config_file="$user_config_dir/config.json" + local default_config_file="$runtime_root/config/default_config.json" + + [[ -f "$default_config_file" ]] || die "missing default config: $default_config_file" + + if ! command -v python3 >/dev/null 2>&1; then + die "python3 is required to update Humanize user config for Codex installs" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN seed Codex-friendly BitLesson config in $user_config_file" + return + fi + + mkdir -p "$user_config_dir" + + python3 - "$default_config_file" "$user_config_file" "$install_target" <<'PY' +import json +import pathlib +import sys + +default_config = pathlib.Path(sys.argv[1]) +user_config = pathlib.Path(sys.argv[2]) +install_target = sys.argv[3] + +defaults = json.loads(default_config.read_text(encoding="utf-8")) +default_codex_model = defaults.get("codex_model") or "gpt-5.4" + +if user_config.exists(): + try: + data = json.loads(user_config.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"malformed existing user config: {user_config}: {exc}", file=sys.stderr) + sys.exit(2) + if not isinstance(data, dict): + print(f"existing user config is not a JSON object: {user_config}", file=sys.stderr) + sys.exit(2) +else: + data = {} + +if not data.get("bitlesson_model"): + data["bitlesson_model"] = data.get("codex_model") or default_codex_model + +if install_target == "codex" and not data.get("provider_mode"): + data["provider_mode"] = "codex-only" + +user_config.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8") +PY + case "$?" in + 0) + log "ensured BitLesson uses a Codex/OpenAI model in $user_config_file" + ;; + 2) + die "failed to update $user_config_file because it is malformed; fix it manually and rerun install" + ;; + *) + die "failed to update Humanize user config at $user_config_file" + ;; + esac +} + +install_bitlesson_selector_shim() { + local primary_runtime_root="$1" + local secondary_runtime_root="${2:-}" + local shim_path="$COMMAND_BIN_DIR/bitlesson-selector" + + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN install bitlesson-selector shim into $shim_path" + return + fi + + mkdir -p "$COMMAND_BIN_DIR" + + cat > "$shim_path" <> "$shim_path" <> "$shim_path" <<'EOF' +) + +for candidate in "${candidate_paths[@]}"; do + if [[ -x "$candidate" ]]; then + exec "$candidate" "$@" + fi +done + +echo "Error: Humanize bitlesson selector runtime not found. Re-run install-skill.sh." >&2 +exit 1 +EOF + + chmod +x "$shim_path" + log "installed bitlesson-selector shim into: $shim_path" +} + +install_kimi_target() { + sync_target "kimi" "$KIMI_SKILLS_DIR" +} + +install_codex_target() { + sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_user_config "$CODEX_SKILLS_DIR/humanize" "$TARGET" + install_codex_native_hooks "$CODEX_SKILLS_DIR" +} + while [[ $# -gt 0 ]]; do case "$1" in --target) @@ -239,6 +414,16 @@ while [[ $# -gt 0 ]]; do CODEX_SKILLS_DIR="$2" shift 2 ;; + --codex-config-dir) + [[ -n "${2:-}" ]] || die "--codex-config-dir requires a value" + CODEX_CONFIG_DIR="$2" + shift 2 + ;; + --command-bin-dir) + [[ -n "${2:-}" ]] || die "--command-bin-dir requires a value" + COMMAND_BIN_DIR="$2" + shift 2 + ;; --dry-run) DRY_RUN="true" shift @@ -253,6 +438,7 @@ while [[ $# -gt 0 ]]; do esac done +resolve_source_layout "$REPO_ROOT" validate_repo if [[ -n "$LEGACY_SKILLS_DIR" ]]; then @@ -273,18 +459,23 @@ if [[ "$TARGET" == "kimi" || "$TARGET" == "both" ]]; then fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then log "codex skills dir: $CODEX_SKILLS_DIR" + log "codex config dir: $CODEX_CONFIG_DIR" fi +log "command bin dir: $COMMAND_BIN_DIR" case "$TARGET" in kimi) - sync_target "kimi" "$KIMI_SKILLS_DIR" + install_kimi_target + install_bitlesson_selector_shim "$KIMI_SKILLS_DIR/humanize" ;; codex) - sync_target "codex" "$CODEX_SKILLS_DIR" + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; both) - sync_target "kimi" "$KIMI_SKILLS_DIR" - sync_target "codex" "$CODEX_SKILLS_DIR" + install_kimi_target + install_codex_target + install_bitlesson_selector_shim "$CODEX_SKILLS_DIR/humanize" "$KIMI_SKILLS_DIR/humanize" ;; esac @@ -304,6 +495,7 @@ fi if [[ "$TARGET" == "codex" || "$TARGET" == "both" ]]; then cat </humanize +Codex installs also update native hook/config state in: + $CODEX_CONFIG_DIR + No shell profile changes were made. +If $COMMAND_BIN_DIR is on PATH, the bitlesson-selector shim is now available there. EOF diff --git a/scripts/rlcr-stop-gate.sh b/scripts/rlcr-stop-gate.sh index 306f875c..c707941c 100755 --- a/scripts/rlcr-stop-gate.sh +++ b/scripts/rlcr-stop-gate.sh @@ -24,6 +24,8 @@ HOOK_SCRIPT="$HUMANIZE_ROOT/hooks/loop-codex-stop-hook.sh" SESSION_ID="${CLAUDE_SESSION_ID:-}" TRANSCRIPT_PATH="${CLAUDE_TRANSCRIPT_PATH:-}" PRINT_JSON="false" +HOOK_MODEL="${CODEX_MODEL:-humanize-skill-gate}" +HOOK_PERMISSION_MODE="${CODEX_PERMISSION_MODE:-default}" usage() { cat <<'EOF' @@ -88,10 +90,15 @@ HOOK_INPUT=$(jq -n \ --arg session_id "$SESSION_ID" \ --arg transcript_path "$TRANSCRIPT_PATH" \ --arg cwd "$PROJECT_ROOT" \ + --arg model "$HOOK_MODEL" \ + --arg permission_mode "$HOOK_PERMISSION_MODE" \ '{ hook_event_name: "Stop", stop_hook_active: false, cwd: $cwd, + model: $model, + permission_mode: $permission_mode, + last_assistant_message: null, session_id: ($session_id | select(length > 0)), transcript_path: ($transcript_path | select(length > 0)) }') diff --git a/skills/humanize-rlcr/SKILL.md b/skills/humanize-rlcr/SKILL.md index e65a05b6..d9873b47 100644 --- a/skills/humanize-rlcr/SKILL.md +++ b/skills/humanize-rlcr/SKILL.md @@ -1,21 +1,15 @@ --- name: humanize-rlcr -description: Start RLCR (Ralph-Loop with Codex Review) with hook-equivalent enforcement from skill mode by reusing the existing stop-hook logic. +description: Start RLCR (Ralph-Loop with Codex Review) on Codex using the native Stop hook. type: flow user-invocable: false disable-model-invocation: true --- -# Humanize RLCR Loop (Hook-Equivalent) +# Humanize RLCR Loop -Use this flow to run RLCR in environments without native hooks. -Do not re-implement review logic manually. Always call the RLCR stop gate wrapper: - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` - -The wrapper executes `hooks/loop-codex-stop-hook.sh`, so skill-mode behavior stays aligned with hook-mode behavior. +Use this flow as the Codex entrypoint for RLCR. +Codex installs of Humanize require native hooks support and install the Humanize `Stop` hooks automatically. ## Runtime Root @@ -49,24 +43,13 @@ For each round: 4. Write required summary file: - Normal phase: `.humanize/rlcr//round--summary.md` - Finalize phase: `.humanize/rlcr//finalize-summary.md` -5. Run gate command: - -```bash -GATE_CMD=("{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh") -[[ -n "${CLAUDE_SESSION_ID:-}" ]] && GATE_CMD+=(--session-id "$CLAUDE_SESSION_ID") -[[ -n "${CLAUDE_TRANSCRIPT_PATH:-}" ]] && GATE_CMD+=(--transcript-path "$CLAUDE_TRANSCRIPT_PATH") -"${GATE_CMD[@]}" -GATE_EXIT=$? -``` - -6. Handle gate result: - - `0`: loop is allowed to exit (done). - - `10`: blocked by RLCR logic. Follow returned instructions exactly, continue next round. - - `20`: infrastructure error (wrapper/hook/runtime). Report error, do not fake completion. +5. Stop or exit normally. +6. Let the native Humanize `Stop` hook run automatically. +7. If the hook blocks exit, follow the returned instructions exactly and continue the next round. ## What This Enforces -By routing through the stop-hook logic, this skill enforces: +The native Stop-hook path enforces: - state/schema validation (`current_round`, `max_iterations`, `review_started`, `base_branch`, etc.) - branch consistency checks @@ -86,8 +69,8 @@ By routing through the stop-hook logic, this skill enforces: ## Critical Rules 1. Never manually edit `state.md` or `finalize-state.md`. -2. Never skip a blocked gate result by declaring completion manually. -3. Never run ad-hoc `codex exec` / `codex review` in place of the gate for phase transitions. +2. Never skip a blocked hook result by declaring completion manually. +3. Never run ad-hoc `codex exec` / `codex review` in place of the hook-managed phase transitions. 4. Always use files generated by the loop (`round-*-prompt.md`, `round-*-review-result.md`) as source of truth. ## Options @@ -121,9 +104,6 @@ Review phase `codex review` runs with `gpt-5.4:high`. # Review-only mode /flow:humanize-rlcr --skip-impl - -# Load skill without auto-execution -/skill:humanize-rlcr ``` ## Cancel diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 1b916306..b9a6ccd5 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -45,7 +45,7 @@ The RLCR (Ralph-Loop with Codex Review) loop has two phases: - Issues marked with `[P0-9]` severity markers - If issues found → AI fixes them and continues - If no issues → loop completes with Finalize Phase -- In skill mode, always run `{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh` to enforce hook-equivalent transitions and blocking +- On Codex CLI `0.114.0+` with `codex_hooks` enabled, Humanize installs a native `Stop` hook so exit gating runs automatically ### 2. PR Loop - Automated PR Review Handling @@ -80,10 +80,7 @@ Transforms a rough draft document into a structured implementation plan with: "{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-rlcr-loop.sh" --skip-impl ``` -```bash -# For each round, run the RLCR gate (required) -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" -``` +After each round, write the required summary and stop/exit normally. Humanize's native Codex `Stop` hook handles review gating automatically. **Common Options:** - `--max N` - Maximum iterations before auto-stop (default: 42) @@ -207,7 +204,7 @@ The RLCR loop uses a Goal Tracker to prevent goal drift: 2. **Maintain Goal Tracker**: Keep goal-tracker.md up-to-date with progress 3. **Be thorough**: Include details about implementation, files changed, tests added 4. **No cheating**: Don't try to exit by editing state files or running cancel commands -5. **Run stop gate each round**: Use `scripts/rlcr-stop-gate.sh` instead of manual phase control +5. **Use the native Stop hook on Codex**: After writing the required summary, stop/exit normally so Codex runs the Humanize Stop hook 6. **Trust the process**: External review helps improve implementation quality ## Prerequisites diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index cd3fb58a..8c1a4d67 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -84,6 +84,7 @@ TEST_SUITES=( "test-task-tag-routing.sh" "test-config-merge.sh" "test-config-error-handling.sh" + "test-codex-hook-install.sh" "test-unified-codex-config.sh" "test-pr-loop-1-scripts.sh" "test-pr-loop-2-hooks.sh" diff --git a/tests/test-agent-teams.sh b/tests/test-agent-teams.sh index 27285561..9f5b3663 100755 --- a/tests/test-agent-teams.sh +++ b/tests/test-agent-teams.sh @@ -532,11 +532,18 @@ setup_mock_codex_impl_feedback() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $feedback REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." fi MOCK_EOF @@ -550,9 +557,16 @@ setup_mock_codex_review_issues() { mkdir -p "$TEST_DIR/bin" cat > "$TEST_DIR/bin/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then echo "Should not be called in review phase" -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index d3c205c3..113c92a5 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -26,6 +26,25 @@ create_mock_bitlesson() { EOF } +create_real_bitlesson() { + local dir="$1" + mkdir -p "$dir" + cat > "$dir/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + # Helper: create a mock codex binary that outputs valid bitlesson-selector format create_mock_codex() { local bin_dir="$1" @@ -102,7 +121,7 @@ echo "--- Test 1: gpt-* model routes to codex ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_codex "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -166,7 +185,7 @@ echo "--- Test 2: haiku model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -195,7 +214,7 @@ echo "--- Test 3: sonnet model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -224,7 +243,7 @@ echo "--- Test 4: OPUS (uppercase) model routes to claude ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -253,7 +272,7 @@ echo "--- Test 5: Unknown model exits non-zero with error ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "unknown-xyz-model"}' > "$TEST_DIR/.humanize/config.json" @@ -279,7 +298,7 @@ echo "--- Test 6: gpt-* model with missing codex binary exits non-zero ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "gpt-4o"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub claude but NOT codex. @@ -315,7 +334,7 @@ echo "--- Test 7: haiku model falls back to codex when claude binary is missing echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "haiku"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub codex but NOT claude. @@ -348,4 +367,102 @@ fi # Summary # ======================================== +echo "" +echo "--- Test 8: codex-only provider mode forces codex routing ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "haiku", "codex_model": "gpt-5.4", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" +FALLBACK_BIN="$TEST_DIR/fallback-bin" +create_mock_codex "$FALLBACK_BIN" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$FALLBACK_BIN:$PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Initialize tracker" \ + --paths "plans/plan.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "mock codex"; then + pass "codex-only provider mode forces codex routing" +else + fail "codex-only provider mode forces codex routing" "exit=0 + mock codex rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 9: Placeholder BitLesson file short-circuits to NONE ---" +echo "" + +setup_test_dir +create_mock_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" + +exit_code=0 +stdout_out="" +stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Any task" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" && echo "$stdout_out" | grep -q "no recorded lessons"; then + pass "Placeholder BitLesson file returns NONE without invoking a model" +else + fail "Placeholder BitLesson file returns NONE without invoking a model" "exit=0 + NONE rationale" "exit=$exit_code, stdout=$stdout_out" +fi + +echo "" +echo "--- Test 10: Codex selector disables hooks and avoids full-auto ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "gpt-5.4"}' > "$TEST_DIR/.humanize/config.json" +CAPTURE_BIN="$TEST_DIR/capture-bin" +mkdir -p "$CAPTURE_BIN" +cat > "$CAPTURE_BIN/codex" <<'EOF' +#!/bin/bash +printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" +cat > /dev/null +cat <<'OUT' +LESSON_IDS: BL-20260315-tracker-drift +RATIONALE: The tracker lesson directly matches the task. +OUT +EOF +chmod +x "$CAPTURE_BIN/codex" + +CAPTURE_ARGS="$TEST_DIR/codex-args.txt" +exit_code=0 +stdout_out="" +stdout_out=$(TEST_CAPTURE_ARGS="$CAPTURE_ARGS" CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$CAPTURE_BIN:$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Update the goal tracker after verification" \ + --paths "goal-tracker.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +captured_args="$(cat "$CAPTURE_ARGS")" + +if [[ $exit_code -eq 0 ]] \ + && echo "$stdout_out" | grep -q "BL-20260315-tracker-drift" \ + && echo "$captured_args" | grep -q -- '--disable' \ + && echo "$captured_args" | grep -q -- 'codex_hooks' \ + && echo "$captured_args" | grep -q -- '--skip-git-repo-check' \ + && echo "$captured_args" | grep -q -- '--ephemeral' \ + && echo "$captured_args" | grep -q -- 'read-only' \ + && ! echo "$captured_args" | grep -q -- '--full-auto'; then + pass "Codex selector runs as a direct helper without hooks or full-auto" +else + fail "Codex selector runs as a direct helper without hooks or full-auto" \ + "exit=0 + direct-helper args" \ + "exit=$exit_code, stdout=$stdout_out, args=$captured_args" +fi + print_test_summary "Bitlesson Select Routing Test Summary" diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh new file mode 100755 index 00000000..55fc71d3 --- /dev/null +++ b/tests/test-codex-hook-install.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# +# Tests for Codex-native hook installation and merge behavior. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +INSTALL_SCRIPT="$PROJECT_ROOT/scripts/install-skill.sh" + +echo "==========================================" +echo "Codex Hook Install Tests" +echo "==========================================" +echo "" + +if [[ ! -x "$INSTALL_SCRIPT" ]]; then + echo "FATAL: install-skill.sh not found at $INSTALL_SCRIPT" >&2 + exit 1 +fi + +if ! command -v python3 >/dev/null 2>&1; then + echo "FATAL: python3 is required for this test" >&2 + exit 1 +fi + +setup_test_dir + +FAKE_BIN="$TEST_DIR/bin" +CODEX_HOME_DIR="$TEST_DIR/codex-home" +HOOKS_FILE="$CODEX_HOME_DIR/hooks.json" +FEATURE_LOG="$TEST_DIR/codex-features.log" +XDG_CONFIG_HOME_DIR="$TEST_DIR/xdg-config" +HUMANIZE_USER_CONFIG="$XDG_CONFIG_HOME_DIR/humanize/config.json" +COMMAND_BIN_DIR="$TEST_DIR/command-bin" +mkdir -p "$FAKE_BIN" "$CODEX_HOME_DIR" "$COMMAND_BIN_DIR" + +cat > "$FAKE_BIN/codex" <<'EOF' +#!/bin/bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +codex_hooks under development false +LIST + exit 0 +fi + +if [[ "${1:-}" == "features" && "${2:-}" == "enable" && "${3:-}" == "codex_hooks" ]]; then + printf 'CODEX_HOME=%s\n' "${CODEX_HOME:-}" >> "${TEST_CODEX_FEATURE_LOG:?}" + mkdir -p "${CODEX_HOME:?}" + : > "${CODEX_HOME}/.codex-hooks-enabled" + exit 0 +fi + +if [[ "${1:-}" == "exec" ]]; then + cat <<'OUT' +LESSON_IDS: NONE +RATIONALE: No matching lessons found (fake codex exec). +OUT + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$FAKE_BIN/codex" + +cat > "$HOOKS_FILE" <<'EOF' +{ + "description": "Existing hooks", + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "/custom/session-start.sh", + "timeout": 15 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "/tmp/old/skills/humanize/hooks/loop-codex-stop-hook.sh", + "timeout": 30 + }, + { + "type": "command", + "command": "/tmp/old/skills/humanize/hooks/pr-loop-stop-hook.sh", + "timeout": 30 + } + ] + }, + { + "hooks": [ + { + "type": "command", + "command": "/custom/keep-me.sh", + "timeout": 5 + } + ] + } + ] + } +} +EOF + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install.log" 2>&1 + +if [[ -f "$CODEX_HOME_DIR/skills/humanize/SKILL.md" ]]; then + pass "Codex install syncs Humanize skill bundle" +else + fail "Codex install syncs Humanize skill bundle" "skills/humanize/SKILL.md exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/skills/humanize-rlcr/SKILL.md" ]]; then + pass "Codex install keeps humanize-rlcr entrypoint skill" +else + fail "Codex install keeps humanize-rlcr entrypoint skill" "skills/humanize-rlcr/SKILL.md exists" "missing" +fi + +if [[ -f "$HOOKS_FILE" ]]; then + pass "Codex install writes hooks.json" +else + fail "Codex install writes hooks.json" "$HOOKS_FILE exists" "missing" +fi + +if [[ -f "$CODEX_HOME_DIR/.codex-hooks-enabled" ]]; then + pass "Codex install enables codex_hooks feature" +else + fail "Codex install enables codex_hooks feature" ".codex-hooks-enabled marker exists" "missing" +fi + +if [[ -f "$HUMANIZE_USER_CONFIG" ]]; then + pass "Codex install writes Humanize user config" +else + fail "Codex install writes Humanize user config" "$HUMANIZE_USER_CONFIG exists" "missing" +fi + +if [[ -x "$COMMAND_BIN_DIR/bitlesson-selector" ]]; then + pass "Codex install writes a PATH-ready bitlesson-selector shim" +else + fail "Codex install writes a PATH-ready bitlesson-selector shim" "$COMMAND_BIN_DIR/bitlesson-selector exists" "missing" +fi + +if [[ "$(jq -r '.bitlesson_model // empty' "$HUMANIZE_USER_CONFIG")" == "gpt-5.4" ]]; then + pass "Codex install seeds bitlesson_model with a Codex/OpenAI model" +else + fail "Codex install seeds bitlesson_model with a Codex/OpenAI model" \ + "gpt-5.4" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +if [[ "$(jq -r '.provider_mode // empty' "$HUMANIZE_USER_CONFIG")" == "codex-only" ]]; then + pass "Codex install marks Humanize user config as codex-only" +else + fail "Codex install marks Humanize user config as codex-only" \ + "codex-only" "$(jq -c '.' "$HUMANIZE_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +runtime_root="$CODEX_HOME_DIR/skills/humanize" +PY_OUTPUT="$( + python3 - "$HOOKS_FILE" "$runtime_root" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +runtime_root = sys.argv[2] +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +expected = { + f"{runtime_root}/hooks/loop-codex-stop-hook.sh", + f"{runtime_root}/hooks/pr-loop-stop-hook.sh", +} + +print("FOUND=" + ("1" if expected.issubset(set(commands)) else "0")) +print("KEEP=" + ("1" if "/custom/keep-me.sh" in commands else "0")) +print("OLD=" + ("1" if any("/tmp/old/skills/humanize/hooks/" in cmd for cmd in commands) else "0")) +print("SESSION=" + ("1" if data["hooks"]["SessionStart"][0]["hooks"][0]["command"] == "/custom/session-start.sh" else "0")) +print("COUNT=" + str(sum(1 for cmd in commands if "/humanize/hooks/" in cmd))) +PY +)" + +if grep -q '^FOUND=1$' <<<"$PY_OUTPUT"; then + pass "Codex install adds managed Humanize Stop hook commands" +else + fail "Codex install adds managed Humanize Stop hook commands" "FOUND=1" "$PY_OUTPUT" +fi + +if grep -q '^KEEP=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves unrelated Stop hooks" +else + fail "Codex install preserves unrelated Stop hooks" "KEEP=1" "$PY_OUTPUT" +fi + +if grep -q '^OLD=0$' <<<"$PY_OUTPUT"; then + pass "Codex install removes stale Humanize hook commands" +else + fail "Codex install removes stale Humanize hook commands" "OLD=0" "$PY_OUTPUT" +fi + +if grep -q '^SESSION=1$' <<<"$PY_OUTPUT"; then + pass "Codex install preserves SessionStart hooks" +else + fail "Codex install preserves SessionStart hooks" "SESSION=1" "$PY_OUTPUT" +fi + +if grep -q '^COUNT=2$' <<<"$PY_OUTPUT"; then + pass "Codex install writes exactly two managed Humanize Stop hooks" +else + fail "Codex install writes exactly two managed Humanize Stop hooks" "COUNT=2" "$PY_OUTPUT" +fi + +mkdir -p "$TEST_DIR/project" +cat > "$TEST_DIR/project/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +EOF + +shim_output="$( + CLAUDE_PROJECT_DIR="$TEST_DIR/project" \ + XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + PATH="$COMMAND_BIN_DIR:$FAKE_BIN:$PATH" \ + "$COMMAND_BIN_DIR/bitlesson-selector" \ + --task "Verify the shim dispatches into the installed runtime" \ + --paths "README.md" \ + --bitlesson-file "$TEST_DIR/project/bitlesson.md" +)" + +if grep -q '^LESSON_IDS: NONE$' <<<"$shim_output"; then + pass "bitlesson-selector shim dispatches into installed runtime" +else + fail "bitlesson-selector shim dispatches into installed runtime" "LESSON_IDS: NONE" "$shim_output" +fi + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$CODEX_HOME_DIR" \ + --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + > "$TEST_DIR/install-2.log" 2>&1 + +PY_OUTPUT_2="$( + python3 - "$HOOKS_FILE" <<'PY' +import json +import pathlib +import sys + +hooks_file = pathlib.Path(sys.argv[1]) +data = json.loads(hooks_file.read_text(encoding="utf-8")) + +commands = [] +for group in data["hooks"]["Stop"]: + for hook in group.get("hooks", []): + command = hook.get("command") + if isinstance(command, str): + commands.append(command) + +print(sum(1 for cmd in commands if "/humanize/hooks/" in cmd)) +PY +)" + +if [[ "$PY_OUTPUT_2" == "2" ]]; then + pass "Codex install is idempotent for managed hook commands" +else + fail "Codex install is idempotent for managed hook commands" "2" "$PY_OUTPUT_2" +fi + +if [[ "$(wc -l < "$FEATURE_LOG" | tr -d ' ')" == "2" ]]; then + pass "Codex feature enable runs on each Codex install/update" +else + fail "Codex feature enable runs on each Codex install/update" "2 log entries" "$(cat "$FEATURE_LOG")" +fi + +UNSUPPORTED_BIN="$TEST_DIR/bin-unsupported" +UNSUPPORTED_HOME="$TEST_DIR/codex-home-unsupported" +mkdir -p "$UNSUPPORTED_BIN" "$UNSUPPORTED_HOME" + +cat > "$UNSUPPORTED_BIN/codex" <<'EOF' +#!/bin/bash +set -euo pipefail + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +apply_patch_freeform under development false +LIST + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$UNSUPPORTED_BIN/codex" + +set +e +PATH="$UNSUPPORTED_BIN:$PATH" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$UNSUPPORTED_HOME" \ + --codex-skills-dir "$UNSUPPORTED_HOME/skills" \ + > "$TEST_DIR/install-unsupported.log" 2>&1 +UNSUPPORTED_EXIT=$? +set -e + +if [[ "$UNSUPPORTED_EXIT" -ne 0 ]]; then + pass "Codex install rejects builds without native hooks support" +else + fail "Codex install rejects builds without native hooks support" "non-zero exit" "exit 0" +fi + +if grep -q "codex_hooks feature" "$TEST_DIR/install-unsupported.log"; then + pass "Unsupported Codex failure explains missing codex_hooks feature" +else + fail "Unsupported Codex failure explains missing codex_hooks feature" \ + "error mentioning codex_hooks feature" \ + "$(cat "$TEST_DIR/install-unsupported.log")" +fi + +print_test_summary "Codex Hook Install Tests" diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh new file mode 100644 index 00000000..ae0f8bba --- /dev/null +++ b/tests/test-disable-nested-codex-hooks.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# +# Ensure Humanize's nested Codex reviewer calls disable native hooks to avoid recursion. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { + echo -e "${GREEN}PASS${NC}: $1" + TESTS_PASSED=$((TESTS_PASSED + 1)) +} + +fail() { + echo -e "${RED}FAIL${NC}: $1" + echo " Expected: $2" + echo " Got: $3" + TESTS_FAILED=$((TESTS_FAILED + 1)) +} + +echo "==========================================" +echo "Disable Nested Codex Hooks Tests" +echo "==========================================" +echo "" + +TEST_DIR="$(mktemp -d)" +trap 'rm -rf "$TEST_DIR"' EXIT + +export XDG_CACHE_HOME="$TEST_DIR/.cache" +mkdir -p "$XDG_CACHE_HOME" + +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" +PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" + +setup_repo() { + local repo_dir="$1" + + mkdir -p "$repo_dir" + cd "$repo_dir" + git init -q + git config user.email "test@test.com" + git config user.name "Test User" + git config commit.gpgsign false + + cat > .gitignore <<'EOF' +.humanize/ +plans/ +.cache/ +EOF + mkdir -p plans + cat > plans/test-plan.md <<'EOF' +# Test Plan +EOF + echo "init" > init.txt + git add .gitignore init.txt + git -c commit.gpgsign=false commit -q -m "initial" +} + +setup_mock_codex() { + local bin_dir="$1" + local args_file="$2" + + mkdir -p "$bin_dir" + cat > "$bin_dir/codex" < "$args_file" + +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done + +if [[ "\$subcommand" == "exec" ]]; then + echo "Review: keep iterating." + exit 0 +fi + +if [[ "\$subcommand" == "review" ]]; then + echo "No issues found." + exit 0 +fi + +echo "unexpected codex args: \$*" >&2 +exit 1 +EOF + chmod +x "$bin_dir/codex" +} + +setup_loop_dir() { + local repo_dir="$1" + local review_started="$2" + local loop_dir="$repo_dir/.humanize/rlcr/2026-03-14_12-00-00" + local current_branch + local base_commit + + current_branch="$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD)" + base_commit="$(git -C "$repo_dir" rev-parse HEAD)" + + mkdir -p "$loop_dir" + cat > "$loop_dir/state.md" < "$loop_dir/goal-tracker.md" <<'EOF' +# Goal Tracker +## IMMUTABLE SECTION +### Ultimate Goal +Test nested codex disable +### Acceptance Criteria +- AC-1: Hook can run + +## MUTABLE SECTION +### Active Tasks +- Verify hook argv +EOF + + cat > "$loop_dir/round-1-summary.md" <<'EOF' +# Round Summary +Implemented initial changes. +EOF + + if [[ "$review_started" == "true" ]]; then + echo "build_finish_round=1" > "$loop_dir/.review-phase-started" + fi +} + +run_loop_hook() { + local repo_dir="$1" + local args_file="$2" + local review_started="$3" + local bin_dir="$TEST_DIR/bin-${review_started}" + + setup_mock_codex "$bin_dir" "$args_file" + setup_loop_dir "$repo_dir" "$review_started" + + set +e + OUTPUT=$(echo '{}' | PATH="$bin_dir:$PATH" CLAUDE_PROJECT_DIR="$repo_dir" bash "$STOP_HOOK" 2>&1) + EXIT_CODE=$? + set -e + + if [[ $EXIT_CODE -ne 0 ]]; then + fail "loop hook completes in $review_started mode" "exit 0" "exit=$EXIT_CODE output=$OUTPUT" + return + fi +} + +REPO_IMPL="$TEST_DIR/repo-impl" +setup_repo "$REPO_IMPL" +run_loop_hook "$REPO_IMPL" "$TEST_DIR/impl.args" "false" + +if grep -q -- '--disable codex_hooks exec' "$TEST_DIR/impl.args"; then + pass "implementation-phase stop hook disables codex_hooks for codex exec" +else + fail "implementation-phase stop hook disables codex_hooks for codex exec" \ + "--disable codex_hooks exec" "$(cat "$TEST_DIR/impl.args" 2>/dev/null || echo missing)" +fi + +REPO_REVIEW="$TEST_DIR/repo-review" +setup_repo "$REPO_REVIEW" +run_loop_hook "$REPO_REVIEW" "$TEST_DIR/review.args" "true" + +if grep -q -- '--disable codex_hooks review' "$TEST_DIR/review.args"; then + pass "review-phase stop hook disables codex_hooks for codex review" +else + fail "review-phase stop hook disables codex_hooks for codex review" \ + "--disable codex_hooks review" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" +fi + +if grep -q 'codex "\${CODEX_DISABLE_HOOKS_ARGS\[@\]}" exec' "$PR_STOP_HOOK"; then + pass "PR stop hook disables codex_hooks for nested codex exec" +else + fail "PR stop hook disables codex_hooks for nested codex exec" \ + 'codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec' "not found" +fi + +echo "" +echo "========================================" +echo "Disable Nested Codex Hooks Tests" +echo "========================================" +echo "Passed: $TESTS_PASSED" +echo "Failed: $TESTS_FAILED" + +if [[ $TESTS_FAILED -ne 0 ]]; then + exit 1 +fi diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 96890a41..4eaef4b6 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -57,11 +57,18 @@ setup_mock_codex() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - outputs the provided content -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Handle codex review command cat << 'REVIEWOUT' $review_output @@ -82,11 +89,18 @@ setup_mock_codex_with_tracking() { #!/bin/bash # Track that codex was called echo "CODEX_WAS_CALLED" > "$TEST_DIR/codex_called.marker" -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then cat << 'REVIEWOUT' $review_output REVIEWOUT @@ -106,11 +120,18 @@ setup_mock_codex_review_failure() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - fails on review command -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Simulate failure with non-zero exit echo "Error: Codex review failed" >&2 exit $review_exit_code @@ -128,11 +149,18 @@ setup_mock_codex_review_empty_stdout() { cat > "$TEST_DIR/bin/codex" << EOF #!/bin/bash # Mock codex - produces empty stdout on review -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'REVIEW' $exec_output REVIEW -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then # Exit successfully but produce no output exit 0 fi diff --git a/tests/test-task-tag-routing.sh b/tests/test-task-tag-routing.sh index ae9365f7..24871e00 100755 --- a/tests/test-task-tag-routing.sh +++ b/tests/test-task-tag-routing.sh @@ -28,14 +28,21 @@ create_mock_codex() { mkdir -p "$bin_dir" cat > "$bin_dir/codex" << MOCK_EOF #!/bin/bash -if [[ "\$1" == "exec" ]]; then +subcommand="" +for arg in "\$@"; do + if [[ "\$arg" == "exec" || "\$arg" == "review" ]]; then + subcommand="\$arg" + break + fi +done +if [[ "\$subcommand" == "exec" ]]; then cat << 'OUT' $exec_output OUT -elif [[ "\$1" == "review" ]]; then +elif [[ "\$subcommand" == "review" ]]; then echo "No issues found." else - echo "mock-codex: unsupported command \$1" >&2 + echo "mock-codex: unsupported command \$*" >&2 exit 1 fi MOCK_EOF From dbb88bcadee1713be65529c88534836541408b85 Mon Sep 17 00:00:00 2001 From: Zhou Yaoyang Date: Fri, 27 Mar 2026 00:56:46 +0800 Subject: [PATCH 24/50] fixed tests for bitlesson-select-routing --- tests/test-bitlesson-select-routing.sh | 37 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index 113c92a5..dee42a5f 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -45,6 +45,25 @@ Source Rounds: 0 EOF } +create_real_humanize_bitlesson() { + local dir="$1" + mkdir -p "$dir/.humanize" + cat > "$dir/.humanize/bitlesson.md" <<'EOF' +# BitLesson Knowledge Base +## Entries + +## Lesson: Avoid tracker drift +Lesson ID: BL-20260315-tracker-drift +Scope: goal-tracker.md +Problem Description: Tracker diverges from actual task status. +Root Cause: Status rows are not updated after verification. +Solution: Update tracker rows immediately after each verification step. +Constraints: Keep tracker edits minimal. +Validation Evidence: Verified in test fixture. +Source Rounds: 0 +EOF +} + # Helper: create a mock codex binary that outputs valid bitlesson-selector format create_mock_codex() { local bin_dir="$1" @@ -121,7 +140,7 @@ echo "--- Test 1: gpt-* model routes to codex ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_codex "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -150,7 +169,7 @@ echo "--- Test 1b: gpt-* codex path passes stdin prompt via trailing '-' ---" echo "" setup_test_dir -create_mock_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" STDIN_FILE="$TEST_DIR/codex-stdin.txt" create_recording_mock_codex "$BIN_DIR" "$STDIN_FILE" @@ -185,7 +204,7 @@ echo "--- Test 2: haiku model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -214,7 +233,7 @@ echo "--- Test 3: sonnet model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -243,7 +262,7 @@ echo "--- Test 4: OPUS (uppercase) model routes to claude ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" BIN_DIR="$TEST_DIR/bin" create_mock_claude "$BIN_DIR" mkdir -p "$TEST_DIR/.humanize" @@ -272,7 +291,7 @@ echo "--- Test 5: Unknown model exits non-zero with error ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "unknown-xyz-model"}' > "$TEST_DIR/.humanize/config.json" @@ -298,7 +317,7 @@ echo "--- Test 6: gpt-* model with missing codex binary exits non-zero ---" echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "gpt-4o"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub claude but NOT codex. @@ -334,7 +353,7 @@ echo "--- Test 7: haiku model falls back to codex when claude binary is missing echo "" setup_test_dir -create_real_bitlesson "$TEST_DIR" +create_real_humanize_bitlesson "$TEST_DIR" mkdir -p "$TEST_DIR/.humanize" printf '{"bitlesson_model": "haiku"}' > "$TEST_DIR/.humanize/config.json" # Use a bin dir that contains a stub codex but NOT claude. @@ -409,7 +428,7 @@ stdout_out=$(CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" bash "$BITLESSON_SELECT" \ --task "Any task" \ --paths "README.md" \ - --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + --bitlesson-file "$TEST_DIR/.humanize/bitlesson.md" 2>/dev/null) || exit_code=$? if [[ $exit_code -eq 0 ]] && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" && echo "$stdout_out" | grep -q "no recorded lessons"; then pass "Placeholder BitLesson file returns NONE without invoking a model" From 9ed3e1a953bf6761bb97fb21daa8ec5476b83587 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 16:37:58 -0700 Subject: [PATCH 25/50] bump version of humanize --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 7c5e9bc4..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.0" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ef503495..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.0", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 1d959f31..dd961c9a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.0** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 611a437ae3026ab1ec16fbd8f1d97899f35b793d Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:01:18 -0700 Subject: [PATCH 26/50] Fix codex_hooks flag probe, managed-hook regex, and legacy compat - Guard --disable codex_hooks behind a feature probe so older Codex builds that lack this flag do not fail with an unknown-argument error - Widen managed-hook cleanup regex to match any runtime root, not just the hardcoded humanize/hooks/ path, preserving idempotent installs - Make round contract enforcement conditional on drift_status presence so legacy loops without anti-drift fields are not blocked - Fix pre-existing test-stop-hook-legacy-compat Test 1b by adding a .humanize-old directory (global gitignore covers .humanize/ itself) - Add drift_status fields to test-plan-file-hooks fixture for contract enforcement coverage --- hooks/loop-codex-stop-hook.sh | 12 ++++++++++-- hooks/pr-loop-stop-hook.sh | 7 ++++++- scripts/install-codex-hooks.sh | 2 +- tests/test-plan-file-hooks.sh | 3 +++ tests/test-stop-hook-legacy-compat.sh | 5 +++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 87a7f060..5f40a1c9 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -757,7 +757,10 @@ fi # Check Round Contract Exists # ======================================== -if [[ "$IS_FINALIZE_PHASE" != "true" ]]; then +# Only enforce round contract when anti-drift is active (drift_status present in raw state). +# Legacy loops that pre-date the anti-drift feature will not have this field. +RAW_DRIFT_STATUS=$(echo "$RAW_FRONTMATTER" | grep "^drift_status:" || true) +if [[ "$IS_FINALIZE_PHASE" != "true" ]] && [[ -n "$RAW_DRIFT_STATUS" ]]; then if [[ ! -f "$ROUND_CONTRACT_FILE" ]]; then FALLBACK="# Round Contract Missing @@ -1060,7 +1063,12 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# so older builds do not fail with an unknown-argument error. +CODEX_DISABLE_HOOKS_ARGS=() +if codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +fi # Build command arguments for summary review (codex exec) CODEX_EXEC_ARGS=("-m" "$CODEX_EXEC_MODEL") diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh index 93eac614..c87abee3 100755 --- a/hooks/pr-loop-stop-hook.sh +++ b/hooks/pr-loop-stop-hook.sh @@ -1335,7 +1335,12 @@ if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_ fi # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# so older builds do not fail with an unknown-argument error. +CODEX_DISABLE_HOOKS_ARGS=() +if codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +fi CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index b6c32804..665eff46 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -123,7 +123,7 @@ if stop_groups is None: if not isinstance(stop_groups, list): raise SystemExit(f"existing hooks config has invalid Stop array: {hooks_file}") -managed_pattern = re.compile(r"(^|/)humanize/hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)$") +managed_pattern = re.compile(r"(^|/)hooks/(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh)(\s|$)") filtered_groups = [] for group in stop_groups: diff --git a/tests/test-plan-file-hooks.sh b/tests/test-plan-file-hooks.sh index b15ca134..ec3eac06 100755 --- a/tests/test-plan-file-hooks.sh +++ b/tests/test-plan-file-hooks.sh @@ -130,6 +130,9 @@ plan_tracked: false start_branch: $CURRENT_BRANCH base_branch: $CURRENT_BRANCH review_started: false +mainline_stall_count: 0 +last_mainline_verdict: unknown +drift_status: normal --- EOF diff --git a/tests/test-stop-hook-legacy-compat.sh b/tests/test-stop-hook-legacy-compat.sh index 3527f75b..7fa1f449 100755 --- a/tests/test-stop-hook-legacy-compat.sh +++ b/tests/test-stop-hook-legacy-compat.sh @@ -163,6 +163,11 @@ echo "Test 1b: Untracked .humanizeconfig still blocks dirty checks" TEST1B_REPO="$TEST_DIR/test1b" create_stop_hook_fixture "$TEST1B_REPO" touch "$TEST1B_REPO/.humanizeconfig" +# Also create a .humanize-old directory to trigger the "Special Case" note. +# The .humanize/ directory itself may be covered by a global gitignore +# so it might not appear as untracked; .humanize-old/ is never globally ignored. +mkdir -p "$TEST1B_REPO/.humanize-old" +echo "legacy" > "$TEST1B_REPO/.humanize-old/legacy.txt" run_stop_hook "$TEST1B_REPO" if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ ! -f "$RUN_MARKER" ]] && \ From 2a0faf6e3cec0db56e10fc126f8e9416d594a173 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:10:55 -0700 Subject: [PATCH 27/50] Fix BSD sed portability, awk field splitting, and strict parser defaults - Replace GNU-only sed /I flag with portable grep -oEi for verdict extraction in extract_mainline_progress_verdict - Fix upsert_state_fields awk to split on first = only using index/substr, preventing silent value truncation on values containing = - Add STATE_PRIVACY_MODE default to parse_state_file_strict for parity with the tolerant parser - Cache codex --disable feature probe per loop to avoid running codex --help on every stop-hook invocation --- hooks/lib/loop-common.sh | 13 +++++++++---- hooks/loop-codex-stop-hook.sh | 10 ++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 797f6e31..a3283a02 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -532,6 +532,7 @@ parse_state_file_strict() { STATE_FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" STATE_ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-true}" STATE_AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + STATE_PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" STATE_MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" STATE_LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" STATE_DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" @@ -585,7 +586,9 @@ extract_mainline_progress_verdict() { return fi - verdict_value=$(printf '%s\n' "$verdict_line" | sed -E 's/.*Mainline Progress Verdict:[[:space:]]*(ADVANCED|STALLED|REGRESSED).*/\1/I') + # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). + # The preceding grep -Ei already ensures the line contains one of the three verdicts. + verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | tail -1) normalize_mainline_progress_verdict "$verdict_value" } @@ -602,9 +605,11 @@ upsert_state_fields() { BEGIN { count = split(assignments, pairs, " "); for (i = 1; i <= count; i++) { - split(pairs[i], kv, "="); - keys[kv[1]] = kv[2]; - order[i] = kv[1]; + eq = index(pairs[i], "="); + key = substr(pairs[i], 1, eq - 1); + val = substr(pairs[i], eq + 1); + keys[key] = val; + order[i] = key; } separator_count = 0; } diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 5f40a1c9..0682ff6e 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1063,11 +1063,17 @@ mkdir -p "$CACHE_DIR" # portable-timeout.sh already sourced above # Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -# Probe whether the installed Codex CLI supports --disable; fall back to empty args +# Probe whether the installed Codex CLI supports --disable; cache the result per loop # so older builds do not fail with an unknown-argument error. CODEX_DISABLE_HOOKS_ARGS=() -if codex --help 2>&1 | grep -q -- '--disable'; then +_CODEX_FEATURE_CACHE="$CACHE_DIR/.codex-disable-hooks-supported" +if [[ -f "$_CODEX_FEATURE_CACHE" ]]; then + [[ "$(cat "$_CODEX_FEATURE_CACHE")" == "yes" ]] && CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +elif codex --help 2>&1 | grep -q -- '--disable'; then CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + echo "yes" > "$_CODEX_FEATURE_CACHE" 2>/dev/null +else + echo "no" > "$_CODEX_FEATURE_CACHE" 2>/dev/null fi # Build command arguments for summary review (codex exec) From d122c82591023cda401d29221b71581ed5e9f1c4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:17:22 -0700 Subject: [PATCH 28/50] Fix path injection in shim generation, JSON escaping, and legacy tracker compat - Escape runtime paths in bitlesson-selector shim using single-quoted strings to prevent command injection via paths containing shell metacharacters - JSON-escape runtime_root before template substitution in install-codex-hooks.sh to prevent JSON corruption from paths containing quotes or backslashes - Allow goal-tracker edits on legacy trackers that lack the IMMUTABLE SECTION header instead of blocking all writes --- hooks/lib/loop-common.sh | 3 ++- scripts/install-codex-hooks.sh | 5 ++++- scripts/install-skill.sh | 17 +++++++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index a3283a02..30dcbb11 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -888,7 +888,8 @@ goal_tracker_mutable_update_allowed() { current_immutable=$(extract_goal_tracker_immutable_from_file "$tracker_file" 2>/dev/null || true) updated_immutable=$(extract_goal_tracker_immutable_from_text "$updated_content" 2>/dev/null || true) - [[ -n "$current_immutable" ]] || return 1 + # Legacy trackers without IMMUTABLE SECTION: allow edits unconditionally. + [[ -n "$current_immutable" ]] || return 0 [[ "$current_immutable" == "$updated_immutable" ]] } diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index 665eff46..cd920690 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -102,7 +102,10 @@ template_file = pathlib.Path(sys.argv[2]) runtime_root = sys.argv[3] template_text = template_file.read_text(encoding="utf-8") -template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", runtime_root) +# JSON-escape the runtime root so metacharacters (quotes, backslashes) do not +# corrupt the template before json.loads parses it. +escaped_root = json.dumps(runtime_root)[1:-1] # strip outer quotes from dumps output +template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", escaped_root) template = json.loads(template_text) existing = {} diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 1e85b743..fa546618 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -343,18 +343,23 @@ install_bitlesson_selector_shim() { mkdir -p "$COMMAND_BIN_DIR" - cat > "$shim_path" < "$shim_path" <> "$shim_path" <> "$shim_path" <> "$shim_path" <<'EOF' From febbc00e46477bfb67dff67e4469ab311535af44 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:46:57 -0700 Subject: [PATCH 29/50] Fix relative path bypass in methodology analysis read guard When realpath is unavailable, the fallback assigned FILE_PATH verbatim which could remain relative. The project-root prefix check only matched absolute paths, allowing relative reads to slip through during the methodology analysis phase. Now normalize relative paths to absolute by prepending PROJECT_ROOT before the guard checks. --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/loop-read-validator.sh | 17 +++++++++++++++-- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 588e3b1d..8cb47b76 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.16.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0f9ab32b..5f6a99dd 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.16.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dd961c9a..1c5c88b8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.16.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 5cb32a17..9832b4e7 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -97,8 +97,21 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_CHECK_DIR" + # Ensure paths are absolute so prefix guards cannot be bypassed + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_CHECK_DIR" == /* ]]; then + _ma_real_loop="$_MA_CHECK_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_CHECK_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") # Allowlist: only methodology artifacts (not raw development records). From 17fabf7b22be9d9373517f7dfec42308e9d49656 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 17:57:44 -0700 Subject: [PATCH 30/50] Revert version back to 1.16.0 on dev branch --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8cb47b76..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 5f6a99dd..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.1", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 1c5c88b8..dd961c9a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.1** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 2ab5361b366c4d1b6607815ac04da54ae3df0600 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:05:15 -0700 Subject: [PATCH 31/50] Fix relative path bypass in write validator and add Codex flag probes in bitlesson selector Write validator had the same relative-path fallback issue as the read validator: when realpath is unavailable, relative FILE_PATH bypassed the absolute prefix guard during methodology analysis. Bitlesson selector unconditionally passed --disable codex_hooks, --skip-git-repo-check, and --ephemeral to codex exec without checking if the CLI supports them. Added capability probes matching the pattern already used in loop-codex-stop-hook.sh. --- hooks/loop-write-validator.sh | 17 +++++++++++++++-- scripts/bitlesson-select.sh | 17 +++++++++++++---- tests/test-bitlesson-select-routing.sh | 9 +++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 5dabb7ec..8abc2e8c 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -121,8 +121,21 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + # Ensure paths are absolute so prefix guards cannot be bypassed + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index cd002063..d30ef319 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -185,10 +185,19 @@ run_selector() { local model="$2" if [[ "$provider" == "codex" ]]; then - local codex_exec_args=( - "--disable" "codex_hooks" - "--skip-git-repo-check" - "--ephemeral" + local codex_exec_args=() + # Probe whether the installed Codex CLI supports --disable flag + if codex --help 2>&1 | grep -q -- '--disable'; then + codex_exec_args+=("--disable" "codex_hooks") + fi + # Probe for --skip-git-repo-check and --ephemeral support + if codex exec --help 2>&1 | grep -q -- '--skip-git-repo-check'; then + codex_exec_args+=("--skip-git-repo-check") + fi + if codex exec --help 2>&1 | grep -q -- '--ephemeral'; then + codex_exec_args+=("--ephemeral") + fi + codex_exec_args+=( "-s" "read-only" "-m" "$model" "-c" "model_reasoning_effort=low" diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index acf657d2..bd23ab45 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -448,6 +448,15 @@ CAPTURE_BIN="$TEST_DIR/capture-bin" mkdir -p "$CAPTURE_BIN" cat > "$CAPTURE_BIN/codex" <<'EOF' #!/usr/bin/env bash +# Respond to help probes with supported flags +for arg in "$@"; do + if [[ "$arg" == "--help" ]]; then + echo " --disable Disable a feature" + echo " --skip-git-repo-check Skip git repo check" + echo " --ephemeral Ephemeral mode" + exit 0 + fi +done printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" cat > /dev/null cat <<'OUT' From 4326860a5484202753b2c8822ad1bbe8b1985ad4 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:22:01 -0700 Subject: [PATCH 32/50] Fix mainline verdict parser picking last keyword instead of first When the verdict line contains multiple keywords (e.g. reviewer echoes all options), grep -oEi | tail -1 selected the last match which could be REGRESSED even when ADVANCED was the actual verdict. Changed to head -1 to pick the first keyword after the label. --- hooks/lib/loop-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 30dcbb11..480a7ab0 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -588,7 +588,7 @@ extract_mainline_progress_verdict() { # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). # The preceding grep -Ei already ensures the line contains one of the three verdicts. - verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | tail -1) + verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | head -1) normalize_mainline_progress_verdict "$verdict_value" } From 016caca398c5b450db14c928f69b6927a6ac557f Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:28:04 -0700 Subject: [PATCH 33/50] Add ask-gemini skill and tool-filtered monitor subcommands Introduce ask-gemini skill that wraps the Gemini CLI in non-interactive mode with built-in web-search instruction, defaulting to gemini-3.1-pro-preview. Supports --gemini-model and --gemini-timeout flags. Add tool metadata tagging (tool: codex / tool: gemini) to both ask-codex and ask-gemini invocations so the monitor can distinguish them. Extend humanize monitor with codex and gemini subcommands that filter skill invocations by tool type. The existing skill subcommand continues to show all invocations. Bump version to 1.17.0. --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 14 +- scripts/ask-codex.sh | 5 + scripts/ask-gemini.sh | 385 ++++++++++++++++++++++++++++++++ scripts/humanize.sh | 16 +- scripts/lib/monitor-skill.sh | 108 +++++++-- skills/ask-gemini/SKILL.md | 61 +++++ 8 files changed, 571 insertions(+), 22 deletions(-) create mode 100755 scripts/ask-gemini.sh create mode 100644 skills/ask-gemini/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 588e3b1d..e4e1adf3 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.17.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 0f9ab32b..bbab412d 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.17.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index dd961c9a..0a93dd30 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.17.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. @@ -55,10 +55,18 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ /humanize:start-rlcr-loop docs/plan.md ``` -4. **Monitor progress**: +4. **Consult Gemini** for deep web research (requires Gemini CLI): + ```bash + /humanize:ask-gemini What are the latest best practices for X? + ``` + +5. **Monitor progress**: ```bash source /scripts/humanize.sh - humanize monitor rlcr + humanize monitor rlcr # RLCR loop + humanize monitor skill # All skill invocations (codex + gemini) + humanize monitor codex # Codex invocations only + humanize monitor gemini # Gemini invocations only ``` ## Monitor Dashboard diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index ac26fc32..bea40b4e 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -234,6 +234,7 @@ $QUESTION - Effort: $CODEX_EFFORT - Timeout: ${CODEX_TIMEOUT}s - Timestamp: $TIMESTAMP +- Tool: codex EOF # ======================================== @@ -317,6 +318,7 @@ if [[ $CODEX_EXIT_CODE -eq 124 ]]; then # Save metadata even on timeout cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -343,6 +345,7 @@ if [[ $CODEX_EXIT_CODE -ne 0 ]]; then # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -368,6 +371,7 @@ if [[ ! -s "$CODEX_STDOUT_FILE" ]]; then cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT @@ -390,6 +394,7 @@ cp "$CODEX_STDOUT_FILE" "$SKILL_DIR/output.md" # Save metadata cat > "$SKILL_DIR/metadata.md" << EOF --- +tool: codex model: $CODEX_MODEL effort: $CODEX_EFFORT timeout: $CODEX_TIMEOUT diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh new file mode 100755 index 00000000..489bc4db --- /dev/null +++ b/scripts/ask-gemini.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash +# +# Ask Gemini - One-shot consultation with Gemini CLI +# +# Sends a question or task to gemini in non-interactive mode and returns +# the response. Gemini is always instructed to leverage Google Search +# for deep web research. +# +# Usage: +# ask-gemini.sh [--gemini-model MODEL] [--gemini-timeout SECONDS] [question...] +# +# Output: +# stdout: Gemini's response (for Claude to read) +# stderr: Status/debug info (model, log paths) +# +# Storage: +# Project-local: .humanize/skill//{input,output,metadata}.md +# Cache: ~/.cache/humanize//skill-/gemini-run.{cmd,out,log} +# + +set -euo pipefail + +# ======================================== +# Source Shared Libraries +# ======================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + +# Source portable timeout wrapper +source "$SCRIPT_DIR/portable-timeout.sh" + +# ======================================== +# Default Configuration +# ======================================== + +DEFAULT_GEMINI_MODEL="gemini-3.1-pro-preview" +DEFAULT_ASK_GEMINI_TIMEOUT=3600 + +GEMINI_MODEL="$DEFAULT_GEMINI_MODEL" +GEMINI_TIMEOUT="$DEFAULT_ASK_GEMINI_TIMEOUT" + +# ======================================== +# Help +# ======================================== + +show_help() { + cat << 'HELP_EOF' +ask-gemini - One-shot deep-research consultation with Gemini + +USAGE: + /humanize:ask-gemini [OPTIONS] + +OPTIONS: + --gemini-model + Gemini model name (default: gemini-3.1-pro-preview) + --gemini-timeout + Timeout for the Gemini query in seconds (default: 3600) + -h, --help Show this help message + +DESCRIPTION: + Sends a one-shot question or task to the Gemini CLI in non-interactive + mode (-p). The prompt is augmented with an instruction to perform web + research via Google Search, making this ideal for deep-research tasks + that benefit from up-to-date internet information. + + The response is saved to .humanize/skill//output.md for reference. + +EXAMPLES: + /humanize:ask-gemini What are the latest best practices for Rust error handling? + /humanize:ask-gemini --gemini-model gemini-2.5-pro Review recent CVEs for OpenSSL 3.x + /humanize:ask-gemini --gemini-timeout 600 Compare React Server Components vs Astro Islands + +ENVIRONMENT: + HUMANIZE_GEMINI_YOLO + Set to "true" or "1" to auto-approve all Gemini tool calls (--yolo). + Default behaviour uses --sandbox mode. +HELP_EOF + exit 0 +} + +# ======================================== +# Parse Arguments +# ======================================== + +QUESTION_PARTS=() +OPTIONS_DONE=false + +while [[ $# -gt 0 ]]; do + if [[ "$OPTIONS_DONE" == "true" ]]; then + QUESTION_PARTS+=("$1") + shift + continue + fi + case $1 in + -h|--help) + show_help + ;; + --) + OPTIONS_DONE=true + shift + ;; + --gemini-model) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-model requires a MODEL argument" >&2 + exit 1 + fi + GEMINI_MODEL="$2" + shift 2 + ;; + --gemini-timeout) + if [[ -z "${2:-}" ]]; then + echo "Error: --gemini-timeout requires a number argument (seconds)" >&2 + exit 1 + fi + if ! [[ "$2" =~ ^[0-9]+$ ]]; then + echo "Error: --gemini-timeout must be a positive integer (seconds), got: $2" >&2 + exit 1 + fi + GEMINI_TIMEOUT="$2" + shift 2 + ;; + -*) + echo "Error: Unknown option: $1" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + *) + QUESTION_PARTS+=("$1") + OPTIONS_DONE=true + shift + ;; + esac +done + +# Join question parts into a single string +QUESTION="${QUESTION_PARTS[*]}" + +# ======================================== +# Validate Prerequisites +# ======================================== + +if ! command -v gemini &>/dev/null; then + echo "Error: 'gemini' command is not installed or not in PATH" >&2 + echo "" >&2 + echo "Please install Gemini CLI: npm install -g @anthropic-ai/gemini-cli or https://github.com/anthropics/gemini-cli" >&2 + echo "Then retry: /humanize:ask-gemini " >&2 + exit 1 +fi + +if [[ -z "$QUESTION" ]]; then + echo "Error: No question or task provided" >&2 + echo "" >&2 + echo "Usage: /humanize:ask-gemini [OPTIONS] " >&2 + echo "" >&2 + echo "For help: /humanize:ask-gemini --help" >&2 + exit 1 +fi + +# Validate model name for safety (alphanumeric, hyphen, underscore, dot) +if [[ ! "$GEMINI_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: Gemini model contains invalid characters" >&2 + echo " Model: $GEMINI_MODEL" >&2 + echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 + exit 1 +fi + +# ======================================== +# Detect Project Root +# ======================================== + +if git rev-parse --show-toplevel &>/dev/null; then + PROJECT_ROOT=$(git rev-parse --show-toplevel) +else + PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" +fi + +# ======================================== +# Create Storage Directories +# ======================================== + +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +UNIQUE_ID="${TIMESTAMP}-$$-$(head -c 4 /dev/urandom | od -An -tx1 | tr -d ' \n')" + +# Project-local storage: .humanize/skill// +SKILL_DIR="$PROJECT_ROOT/.humanize/skill/$UNIQUE_ID" +mkdir -p "$SKILL_DIR" + +# Cache storage: ~/.cache/humanize//skill-/ +SANITIZED_PROJECT_PATH=$(echo "$PROJECT_ROOT" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') +CACHE_BASE="${XDG_CACHE_HOME:-$HOME/.cache}" +CACHE_DIR="$CACHE_BASE/humanize/$SANITIZED_PROJECT_PATH/skill-$UNIQUE_ID" +if ! mkdir -p "$CACHE_DIR" 2>/dev/null; then + CACHE_DIR="$SKILL_DIR/cache" + mkdir -p "$CACHE_DIR" + echo "ask-gemini: warning: home cache not writable, using $CACHE_DIR" >&2 +fi + +# ======================================== +# Save Input +# ======================================== + +cat > "$SKILL_DIR/input.md" << EOF +# Ask Gemini Input + +## Question + +$QUESTION + +## Configuration + +- Model: $GEMINI_MODEL +- Timeout: ${GEMINI_TIMEOUT}s +- Timestamp: $TIMESTAMP +- Tool: gemini +EOF + +# ======================================== +# Build Gemini Command +# ======================================== + +GEMINI_ARGS=("-m" "$GEMINI_MODEL") + +# Determine approval mode +if [[ "${HUMANIZE_GEMINI_YOLO:-}" == "true" ]] || [[ "${HUMANIZE_GEMINI_YOLO:-}" == "1" ]]; then + GEMINI_ARGS+=("--yolo") +else + GEMINI_ARGS+=("--sandbox") +fi + +# Use text output format for clean stdout +GEMINI_ARGS+=("-o" "text") + +# Build the augmented prompt with web-search instruction +AUGMENTED_PROMPT="You MUST use Google Search to find the most up-to-date and accurate information before answering. Perform thorough web research. Cite sources where possible. + +--- + +$QUESTION" + +# ======================================== +# Save Debug Command +# ======================================== + +GEMINI_CMD_FILE="$CACHE_DIR/gemini-run.cmd" +GEMINI_STDOUT_FILE="$CACHE_DIR/gemini-run.out" +GEMINI_STDERR_FILE="$CACHE_DIR/gemini-run.log" + +{ + echo "# Gemini ask-gemini invocation debug info" + echo "# Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "# Working directory: $PROJECT_ROOT" + echo "# Timeout: $GEMINI_TIMEOUT seconds" + echo "" + echo "gemini ${GEMINI_ARGS[*]} -p \"\"" + echo "" + echo "# Prompt content:" + echo "$AUGMENTED_PROMPT" +} > "$GEMINI_CMD_FILE" + +# ======================================== +# Run Gemini +# ======================================== + +echo "ask-gemini: model=$GEMINI_MODEL timeout=${GEMINI_TIMEOUT}s" >&2 +echo "ask-gemini: cache=$CACHE_DIR" >&2 +echo "ask-gemini: running gemini -p ..." >&2 + +# Portable epoch-to-ISO8601 formatter +epoch_to_iso() { + local epoch="$1" + date -u -d "@$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + date -u -r "$epoch" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + echo "unknown" +} + +START_TIME=$(date +%s) + +GEMINI_EXIT_CODE=0 +run_with_timeout "$GEMINI_TIMEOUT" gemini "${GEMINI_ARGS[@]}" -p "$AUGMENTED_PROMPT" \ + > "$GEMINI_STDOUT_FILE" 2> "$GEMINI_STDERR_FILE" || GEMINI_EXIT_CODE=$? + +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) + +echo "ask-gemini: exit_code=$GEMINI_EXIT_CODE duration=${DURATION}s" >&2 + +# ======================================== +# Handle Results +# ======================================== + +if [[ $GEMINI_EXIT_CODE -eq 124 ]]; then + echo "Error: Gemini timed out after ${GEMINI_TIMEOUT} seconds" >&2 + echo "" >&2 + echo "Try increasing the timeout:" >&2 + echo " /humanize:ask-gemini --gemini-timeout $((GEMINI_TIMEOUT * 2)) " >&2 + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 124 +duration: ${DURATION}s +status: timeout +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 124 +fi + +if [[ $GEMINI_EXIT_CODE -ne 0 ]]; then + echo "Error: Gemini exited with code $GEMINI_EXIT_CODE" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: $GEMINI_EXIT_CODE +duration: ${DURATION}s +status: error +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit "$GEMINI_EXIT_CODE" +fi + +if [[ ! -s "$GEMINI_STDOUT_FILE" ]]; then + echo "Error: Gemini returned empty response" >&2 + if [[ -s "$GEMINI_STDERR_FILE" ]]; then + echo "" >&2 + echo "Gemini stderr (last 20 lines):" >&2 + tail -20 "$GEMINI_STDERR_FILE" >&2 + fi + echo "" >&2 + echo "Debug logs: $CACHE_DIR" >&2 + + cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: empty_response +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + exit 1 +fi + +# ======================================== +# Save Output and Metadata +# ======================================== + +cp "$GEMINI_STDOUT_FILE" "$SKILL_DIR/output.md" + +cat > "$SKILL_DIR/metadata.md" << EOF +--- +tool: gemini +model: $GEMINI_MODEL +timeout: $GEMINI_TIMEOUT +exit_code: 0 +duration: ${DURATION}s +status: success +started_at: $(epoch_to_iso "$START_TIME") +--- +EOF + +echo "ask-gemini: response saved to $SKILL_DIR/output.md" >&2 + +# ======================================== +# Output Response +# ======================================== + +cat "$GEMINI_STDOUT_FILE" diff --git a/scripts/humanize.sh b/scripts/humanize.sh index a3492844..c5ac3f20 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1182,13 +1182,21 @@ humanize() { skill) _humanize_monitor_skill "$@" ;; + codex) + _humanize_monitor_skill --tool-filter codex "$@" + ;; + gemini) + _humanize_monitor_skill --tool-filter gemini "$@" + ;; *) - echo "Usage: humanize monitor " + echo "Usage: humanize monitor " echo "" echo "Subcommands:" echo " rlcr Monitor the latest RLCR loop log from .humanize/rlcr" echo " pr Monitor the latest PR loop from .humanize/pr-loop" - echo " skill Monitor ask-codex skill invocations from .humanize/skill" + echo " skill Monitor all skill invocations (codex + gemini)" + echo " codex Monitor ask-codex skill invocations only" + echo " gemini Monitor ask-gemini skill invocations only" echo "" echo "Features:" echo " - Fixed status bar showing session info, round progress, model config" @@ -1205,7 +1213,9 @@ humanize() { echo "Commands:" echo " monitor rlcr Monitor the latest RLCR loop log" echo " monitor pr Monitor the latest PR loop" - echo " monitor skill Monitor ask-codex skill invocations" + echo " monitor skill Monitor all skill invocations (codex + gemini)" + echo " monitor codex Monitor ask-codex skill invocations only" + echo " monitor gemini Monitor ask-gemini skill invocations only" return 1 ;; esac diff --git a/scripts/lib/monitor-skill.sh b/scripts/lib/monitor-skill.sh index 218fab92..8803f139 100644 --- a/scripts/lib/monitor-skill.sh +++ b/scripts/lib/monitor-skill.sh @@ -3,15 +3,18 @@ # monitor-skill.sh - Skill monitor for humanize # # Provides the _humanize_monitor_skill function for monitoring -# ask-codex skill invocations from .humanize/skill directory. +# skill invocations (ask-codex, ask-gemini) from .humanize/skill directory. # # This file is sourced by humanize.sh and depends on: # - monitor-common.sh (monitor_get_yaml_value, monitor_format_timestamp, etc.) # - humanize.sh (humanize_split_to_array) -# Monitor ask-codex skill invocations from .humanize/skill +# Monitor skill invocations from .humanize/skill # Shows a fixed status bar with aggregate stats and latest invocation details, # with live output display in the scrollable area below. +# +# Accepts --tool-filter to show only invocations from a +# specific tool. Without the filter, all invocations are shown. _humanize_monitor_skill() { # Enable 0-indexed arrays in zsh for bash compatibility # no_monitor suppresses background job notifications ([1] PID) @@ -23,11 +26,16 @@ _humanize_monitor_skill() { local check_interval=2 local status_bar_height=9 local once_mode=false + local tool_filter="" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --once) once_mode=true; shift ;; + --tool-filter) + tool_filter="${2:-}" + shift 2 + ;; *) shift ;; esac done @@ -35,10 +43,37 @@ _humanize_monitor_skill() { # Check if .humanize/skill exists if [[ ! -d "$skill_dir" ]]; then echo "Error: $skill_dir directory not found in current directory" - echo "Run /humanize:ask-codex first to create skill invocations" + echo "Run /humanize:ask-codex or /humanize:ask-gemini first to create skill invocations" return 1 fi + # Determine the tool for a given invocation directory. + # Reads metadata.md first (completed), falls back to input.md (running). + # Returns: codex, gemini, or unknown + _skill_get_tool() { + local dir="$1" + if [[ -f "$dir/metadata.md" ]]; then + local t=$(monitor_get_yaml_value "tool" "$dir/metadata.md") + [[ -n "$t" ]] && { echo "$t"; return; } + fi + if [[ -f "$dir/input.md" ]]; then + local t=$(grep -E '^- Tool:' "$dir/input.md" 2>/dev/null | sed 's/- Tool: //') + [[ -n "$t" ]] && { echo "$t"; return; } + fi + echo "unknown" + } + + # Check whether a directory passes the current tool filter. + # Returns 0 (pass) or 1 (skip). + _skill_passes_filter() { + [[ -z "$tool_filter" ]] && return 0 + local t=$(_skill_get_tool "$1") + [[ "$t" == "$tool_filter" ]] && return 0 + # Legacy invocations without a tool tag are treated as codex + [[ "$t" == "unknown" && "$tool_filter" == "codex" ]] && return 0 + return 1 + } + # List all valid skill invocation directories sorted newest-first # Skill dirs use YYYY-MM-DD_HH-MM-SS or YYYY-MM-DD_HH-MM-SS-PID-RANDOM naming _skill_list_dirs_sorted() { @@ -47,7 +82,9 @@ _humanize_monitor_skill() { [[ -z "$d" ]] && continue [[ ! -d "$d" ]] && continue local name=$(basename "$d") - [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && dirs+=("$d") + [[ "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] || continue + _skill_passes_filter "$d" || continue + dirs+=("$d") done < <(find "$skill_dir" -mindepth 1 -maxdepth 1 -type d 2>/dev/null) printf '%s\n' "${dirs[@]}" | sort -r } @@ -88,6 +125,7 @@ _humanize_monitor_skill() { [[ ! -d "$d" ]] && continue local name=$(basename "$d") [[ ! "$name" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2} ]] && continue + _skill_passes_filter "$d" || continue ((total++)) if [[ -f "$d/metadata.md" ]]; then local st=$(monitor_get_yaml_value "status" "$d/metadata.md") @@ -127,6 +165,7 @@ _humanize_monitor_skill() { # Find the best file to monitor for a skill invocation # Searches both global cache (~/.cache/humanize/), local cache ($dir/cache/), # and project-local files (.humanize/skill/) for the best content. + # Supports both codex (codex-run.*) and gemini (gemini-run.*) cache files. _skill_find_monitored_file() { local dir="$1" local gcache=$(_skill_find_cache_dir "$dir") @@ -134,18 +173,29 @@ _humanize_monitor_skill() { local is_running=false [[ ! -f "$dir/metadata.md" ]] && is_running=true + # Determine which tool produced this invocation for cache file naming + local inv_tool=$(_skill_get_tool "$dir") + local run_prefix="codex-run" + [[ "$inv_tool" == "gemini" ]] && run_prefix="gemini-run" + # Helper: check a cache directory for best file # Args: cache_dir, prefer_log (true for running, false for completed) _check_cache_files() { local c="$1" prefer_log="$2" [[ ! -d "$c" ]] && return if [[ "$prefer_log" == "true" ]]; then + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback: try the other prefix for legacy/mixed invocations [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } - [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.log" && -s "$c/gemini-run.log" ]] && { echo "$c/gemini-run.log"; return; } else + [[ -f "$c/${run_prefix}.out" && -s "$c/${run_prefix}.out" ]] && { echo "$c/${run_prefix}.out"; return; } + [[ -f "$c/${run_prefix}.log" && -s "$c/${run_prefix}.log" ]] && { echo "$c/${run_prefix}.log"; return; } + # Fallback [[ -f "$c/codex-run.out" && -s "$c/codex-run.out" ]] && { echo "$c/codex-run.out"; return; } - [[ -f "$c/codex-run.log" && -s "$c/codex-run.log" ]] && { echo "$c/codex-run.log"; return; } + [[ -f "$c/gemini-run.out" && -s "$c/gemini-run.out" ]] && { echo "$c/gemini-run.out"; return; } fi } @@ -166,6 +216,15 @@ _humanize_monitor_skill() { echo "" } + # Build the monitor title based on filter + _skill_monitor_title() { + case "$tool_filter" in + codex) echo " Humanize Skill Monitor [codex]" ;; + gemini) echo " Humanize Skill Monitor [gemini]" ;; + *) echo " Humanize Skill Monitor" ;; + esac + } + # Draw the status bar at the top _skill_draw_status_bar() { local latest_dir="$1" @@ -186,17 +245,21 @@ _humanize_monitor_skill() { # Parse latest invocation metadata local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -n "$latest_dir" && -f "$latest_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$latest_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$latest_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$latest_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$latest_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$latest_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$latest_dir/metadata.md") elif [[ -n "$latest_dir" && -f "$latest_dir/input.md" ]]; then model=$(grep -E '^- Model:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Model: //') effort=$(grep -E '^- Effort:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Effort: //') + inv_tool=$(grep -E '^- Tool:' "$latest_dir/input.md" 2>/dev/null | sed 's/- Tool: //') fi inv_status="${inv_status:-unknown}"; model="${model:-N/A}"; effort="${effort:-N/A}" + inv_tool="${inv_tool:-unknown}" # Status color local status_color="$dim" @@ -235,11 +298,19 @@ _humanize_monitor_skill() { cache_display="...${cache_display: -$csuffix_len}" fi + # Model display: for gemini, no effort; for codex, show (effort) + local model_display="$model" + if [[ "$inv_tool" == "gemini" ]] || [[ "$effort" == "N/A" ]]; then + model_display="$model" + else + model_display="$model ($effort)" + fi + tput sc tput cup 0 0 # Line 1: Title - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " Humanize Skill Monitor" + printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" "$(_skill_monitor_title)" # Line 2: Aggregate stats printf "${cyan}Total:${reset} ${bold}${total}${reset} invocations" [[ "$success" -gt 0 ]] && printf " | ${green}${success} success${reset}" @@ -248,8 +319,8 @@ _humanize_monitor_skill() { [[ "$empty" -gt 0 ]] && printf " | ${yellow}${empty} empty${reset}" [[ "$running" -gt 0 ]] && printf " | ${yellow}${running} running${reset}" printf "${clr_eol}\n" - # Line 3: Focused invocation status + model + duration - printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${yellow}Model:${reset} %s (%s) | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$model" "$effort" "${duration:-N/A}" + # Line 3: Focused invocation status + tool + model + duration + printf "${magenta}Focused:${reset} ${status_color}%s${reset} | ${dim}[%s]${reset} ${yellow}Model:${reset} %s | ${cyan}Duration:${reset} %s${clr_eol}\n" "$inv_status" "$inv_tool" "$model_display" "${duration:-N/A}" # Line 4: Started at printf "${cyan}Started:${reset} %s${clr_eol}\n" "$start_display" # Line 5: Question @@ -269,7 +340,9 @@ _humanize_monitor_skill() { if [[ "$once_mode" == "true" ]]; then local latest=$(_skill_find_latest_dir) if [[ -z "$latest" ]]; then - echo "No skill invocations found in $skill_dir" + local filter_msg="" + [[ -n "$tool_filter" ]] && filter_msg=" (filter: $tool_filter)" + echo "No skill invocations found in $skill_dir$filter_msg" return 1 fi @@ -283,24 +356,29 @@ _humanize_monitor_skill() { local -a stats humanize_split_to_array stats "$(_skill_count_stats)" local inv_status="running" model="N/A" effort="N/A" duration="N/A" started_at="N/A" + local inv_tool="unknown" if [[ -f "$focus_dir/metadata.md" ]]; then inv_status=$(monitor_get_yaml_value "status" "$focus_dir/metadata.md") model=$(monitor_get_yaml_value "model" "$focus_dir/metadata.md") effort=$(monitor_get_yaml_value "effort" "$focus_dir/metadata.md") duration=$(monitor_get_yaml_value "duration" "$focus_dir/metadata.md") started_at=$(monitor_get_yaml_value "started_at" "$focus_dir/metadata.md") + inv_tool=$(monitor_get_yaml_value "tool" "$focus_dir/metadata.md") fi + inv_tool="${inv_tool:-unknown}" local question=$(_skill_get_question "$focus_dir") local cache_dir=$(_skill_find_cache_dir "$focus_dir") + local title=$(_skill_monitor_title) echo "==========================================" - echo " Humanize Skill Monitor" + echo "$title" echo "==========================================" echo "" echo "Total Invocations: ${stats[0]}" echo " Success: ${stats[1]} Error: ${stats[2]} Timeout: ${stats[3]} Empty: ${stats[4]} Running: ${stats[5]}" echo "" echo "Focused: $(basename "$focus_dir")" + echo " Tool: ${inv_tool}" echo " Status: ${inv_status:-unknown}" echo " Model: ${model:-N/A} (${effort:-N/A})" echo " Duration: ${duration:-N/A}" @@ -329,14 +407,16 @@ _humanize_monitor_skill() { while IFS= read -r d; do [[ -z "$d" ]] && continue local name=$(basename "$d") - local st="running" dur="" + local st="running" dur="" t="?" if [[ -f "$d/metadata.md" ]]; then st=$(monitor_get_yaml_value "status" "$d/metadata.md") dur=$(monitor_get_yaml_value "duration" "$d/metadata.md") + t=$(monitor_get_yaml_value "tool" "$d/metadata.md") fi + t="${t:-?}" local q=$(_skill_get_question "$d") [[ ${#q} -gt 50 ]] && q="${q:0:47}..." - printf " %-38s %-14s %-6s %s\n" "$name" "$st" "$dur" "$q" + printf " %-38s %-7s %-14s %-6s %s\n" "$name" "[$t]" "$st" "$dur" "$q" ((count++)) [[ $count -ge 10 ]] && break done < <(_skill_list_dirs_sorted) diff --git a/skills/ask-gemini/SKILL.md b/skills/ask-gemini/SKILL.md new file mode 100644 index 00000000..e31cdd57 --- /dev/null +++ b/skills/ask-gemini/SKILL.md @@ -0,0 +1,61 @@ +--- +name: ask-gemini +description: Consult Gemini as an independent expert with deep web research. Sends a question or task to Gemini CLI and returns a research-backed response. +argument-hint: "[--gemini-model MODEL] [--gemini-timeout SECONDS] [question or task]" +allowed-tools: "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh:*)" +--- + +# Ask Gemini + +Send a question or task to Gemini and return a research-backed response. +Gemini is always instructed to perform web research via Google Search, +making this ideal for deep-research tasks that benefit from up-to-date +internet information. + +## How to Use + +Do not pass free-form user text to the shell unquoted. The question or task may contain spaces or shell metacharacters such as `(`, `)`, `;`, `#`, `*`, or `[`. + +If the user only supplied a question or task, execute: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" "$ARGUMENTS" +``` + +If the user supplied flags such as `--gemini-model` or `--gemini-timeout`, reconstruct the command so those flags remain separate shell arguments and the remaining free-form question is passed as one quoted final argument. + +Example: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" --gemini-model gemini-2.5-pro "What are the latest Rust async runtime benchmarks?" +``` + +Never run this unsafe form: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/ask-gemini.sh" $ARGUMENTS +``` + +because the shell will re-parse the question text and can fail before `ask-gemini.sh` starts. + +## Interpreting Output + +- The script outputs Gemini's response to **stdout** and status info to **stderr** +- Read the stdout output carefully and incorporate Gemini's response into your answer +- Gemini's responses are research-backed with web sources; relay source citations when available +- If the script exits with a non-zero code, report the error to the user + +## Error Handling + +| Exit Code | Meaning | +|-----------|---------| +| 0 | Success - Gemini response is in stdout | +| 1 | Validation error (missing gemini, empty question, invalid flags) | +| 124 | Timeout - suggest using `--gemini-timeout` with a larger value | +| Other | Gemini process error - report the exit code and any stderr output | + +## Notes + +- The response is saved to `.humanize/skill//output.md` for reference +- Default model is `gemini-3.1-pro-preview` with a 3600-second timeout +- Gemini is always instructed to perform Google Search for up-to-date information From 883e3f5bb8106cea4153d9f5e469b2fa7a8d6849 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:29:34 -0700 Subject: [PATCH 34/50] Revert version back to 1.16.0 to match dev branch --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e4e1adf3..588e3b1d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.17.0" + "version": "1.16.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index bbab412d..0f9ab32b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.17.0", + "version": "1.16.0", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 0a93dd30..bd7f9145 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.17.0** +**Current Version: 1.16.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 916fc5086ead2c89808efa7c06d390773d7a213e Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:32:10 -0700 Subject: [PATCH 35/50] Reject path traversal segments in methodology analysis fallback guards When realpath is unavailable, paths containing ".." could bypass the prefix-based allowlist check by matching the loop directory prefix as a raw string while resolving to a location outside it. Now reject any path with ".." segments in the fallback code path (fail closed). --- hooks/loop-read-validator.sh | 10 +++++++++- hooks/loop-write-validator.sh | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 9832b4e7..3188c1a4 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -97,8 +97,16 @@ if [[ -n "$_MA_CHECK_DIR" ]]; then fi _ma_real_loop=$(realpath "$_MA_CHECK_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - # Ensure paths are absolute so prefix guards cannot be bypassed + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Read Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 8abc2e8c..b7314ed9 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -121,8 +121,16 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - # Ensure paths are absolute so prefix guards cannot be bypassed + # Ensure paths are absolute so prefix guards cannot be bypassed. + # Reject paths with ".." segments to prevent traversal bypasses + # when we cannot canonicalize (fail closed). if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Write Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi if [[ "$FILE_PATH" == /* ]]; then _ma_real_path="$FILE_PATH" else From 95a6b7b3ee48d5928758bf738e3e689f83e333d2 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:43:04 -0700 Subject: [PATCH 36/50] Fix edit validator path fallback and reject whitespace-only completion markers Edit validator had the same relative-path and traversal fallback issue as the read/write validators. Applied the same normalization and ".." rejection pattern. Methodology completion checker now trims whitespace before emptiness validation, preventing whitespace-only done markers or report files from passing the content gate. --- hooks/lib/methodology-analysis.sh | 3 +++ hooks/loop-edit-validator.sh | 23 +++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index fb654b68..a95e81af 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -122,6 +122,8 @@ complete_methodology_analysis() { local done_content done_content=$(cat "$done_file" 2>/dev/null || echo "") + # Trim whitespace to reject whitespace-only markers + done_content="${done_content#"${done_content%%[![:space:]]*}"}" if [[ -z "$done_content" ]]; then return 1 fi @@ -134,6 +136,7 @@ complete_methodology_analysis() { fi local report_content report_content=$(cat "$report_file" 2>/dev/null || echo "") + report_content="${report_content#"${report_content%%[![:space:]]*}"}" if [[ -z "$report_content" ]]; then echo "Warning: methodology-analysis-report.md is empty, blocking completion" >&2 return 1 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 64e43795..32ba3a9d 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -104,8 +104,27 @@ if [[ -n "$_MA_LOOP_DIR" ]] && [[ -f "$_MA_LOOP_DIR/methodology-analysis-state.m fi _ma_real_loop=$(realpath "$_MA_LOOP_DIR" 2>/dev/null || echo "") # Fallback to raw paths when realpath is unavailable (older macOS/BSD) - [[ -z "$_ma_real_path" ]] && _ma_real_path="$FILE_PATH" - [[ -z "$_ma_real_loop" ]] && _ma_real_loop="$_MA_LOOP_DIR" + # Ensure paths are absolute and reject ".." to prevent traversal bypasses. + if [[ -z "$_ma_real_path" ]]; then + if [[ "$FILE_PATH" == *".."* ]]; then + echo "# Edit Blocked During Methodology Analysis + +Path contains traversal segments that cannot be resolved without realpath." >&2 + exit 2 + fi + if [[ "$FILE_PATH" == /* ]]; then + _ma_real_path="$FILE_PATH" + else + _ma_real_path="$PROJECT_ROOT/$FILE_PATH" + fi + fi + if [[ -z "$_ma_real_loop" ]]; then + if [[ "$_MA_LOOP_DIR" == /* ]]; then + _ma_real_loop="$_MA_LOOP_DIR" + else + _ma_real_loop="$PROJECT_ROOT/$_MA_LOOP_DIR" + fi + fi if [[ "$_ma_real_path" == "$_ma_real_loop/"* ]]; then _ma_basename=$(basename "$_ma_real_path") case "$_ma_basename" in From bd506f2233d1c535f6204fdbe0fe916334b057c6 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 18:52:54 -0700 Subject: [PATCH 37/50] Reject ambiguous multi-keyword verdict lines and fix Gemini CLI install path Mainline verdict parser now rejects lines containing multiple verdict keywords (e.g. template placeholders listing all options) as unknown, preventing silent false-positive ADVANCED readings that reset drift counters. Fixed Gemini CLI install guidance to point to the correct package (@google/gemini-cli) and repository (google-gemini/gemini-cli). --- hooks/lib/loop-common.sh | 12 +++++++++++- scripts/ask-gemini.sh | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 480a7ab0..f349931a 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -588,7 +588,17 @@ extract_mainline_progress_verdict() { # Extract the verdict word using grep -oEi (portable) instead of sed /I (GNU-only). # The preceding grep -Ei already ensures the line contains one of the three verdicts. - verdict_value=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED' | head -1) + # Reject lines with multiple verdict keywords (e.g. placeholder template formats) + # to avoid silently accepting an ambiguous verdict. + local _verdict_matches + _verdict_matches=$(printf '%s\n' "$verdict_line" | grep -oEi 'ADVANCED|STALLED|REGRESSED') + local _match_count + _match_count=$(printf '%s\n' "$_verdict_matches" | wc -l) + if [[ "$_match_count" -gt 1 ]]; then + echo "$MAINLINE_VERDICT_UNKNOWN" + return + fi + verdict_value=$(printf '%s\n' "$_verdict_matches" | head -1) normalize_mainline_progress_verdict "$verdict_value" } diff --git a/scripts/ask-gemini.sh b/scripts/ask-gemini.sh index 489bc4db..9f59b804 100755 --- a/scripts/ask-gemini.sh +++ b/scripts/ask-gemini.sh @@ -142,7 +142,7 @@ QUESTION="${QUESTION_PARTS[*]}" if ! command -v gemini &>/dev/null; then echo "Error: 'gemini' command is not installed or not in PATH" >&2 echo "" >&2 - echo "Please install Gemini CLI: npm install -g @anthropic-ai/gemini-cli or https://github.com/anthropics/gemini-cli" >&2 + echo "Please install Gemini CLI: npm install -g @google/gemini-cli or https://github.com/google-gemini/gemini-cli" >&2 echo "Then retry: /humanize:ask-gemini " >&2 exit 1 fi From 4561bc43ecd27304ebfdc50f18d8477af147cbc5 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 19:23:46 -0700 Subject: [PATCH 38/50] Shell-quote hook command paths and reject ambiguous verdict lines Hook installer now shell-quotes command paths via shlex.quote() so runtime roots containing spaces do not split the command at execution. Mainline verdict parser rejects lines with multiple verdict keywords as unknown to prevent template placeholders from being parsed as a valid verdict. Corrected Gemini CLI install guidance to the actual package and repo. --- scripts/install-codex-hooks.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index cd920690..c7907bf3 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -95,6 +95,7 @@ merge_hooks_json() { import json import pathlib import re +import shlex import sys hooks_file = pathlib.Path(sys.argv[1]) @@ -108,6 +109,14 @@ escaped_root = json.dumps(runtime_root)[1:-1] # strip outer quotes from dumps o template_text = template_text.replace("{{HUMANIZE_RUNTIME_ROOT}}", escaped_root) template = json.loads(template_text) +# Shell-quote command paths so spaces in runtime_root do not split the command +for group_list in template.get("hooks", {}).values(): + for group in group_list: + if isinstance(group, dict): + for hook in group.get("hooks", []): + if isinstance(hook, dict) and "command" in hook: + hook["command"] = shlex.quote(hook["command"]) + existing = {} if hooks_file.exists(): with hooks_file.open("r", encoding="utf-8") as fh: From 338b4dd9e12c1b2f1c12e6dbf107dc1c5922d2c0 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sat, 28 Mar 2026 20:21:25 -0700 Subject: [PATCH 39/50] Remove PR loop feature entirely The PR loop workflow is superseded by the /loop command combined with GitHub PR review polling. This removes all PR loop implementation, tests, documentation, templates, and supporting scripts. Deleted PR-loop-only files: commands (start/cancel-pr-loop), hooks (pr-loop-stop-hook), scripts (setup/cancel-pr-loop, check-bot-reactions, check-pr-reviewer-status, fetch-pr-comments, poll-pr-reviews), prompt templates (pr-loop/), test fixtures (setup-fixture-mock-gh), and all PR loop test files. Cleaned PR loop references from mixed files: hook validators, loop library, template loader, bash validator, humanize.sh monitor, monitor-common.sh, codex hooks config, install script, RLCR setup mutual exclusion, SKILL.md, usage docs, and test harnesses. The install script retains pr-loop-stop-hook.sh in its managed pattern so upgrading users get stale hooks cleaned from their hooks.json. --- .gitignore | 1 + commands/cancel-pr-loop.md | 25 - commands/start-pr-loop.md | 61 - config/codex-hooks.json | 8 +- docs/install-for-claude.md | 1 - docs/install-for-codex.md | 2 +- docs/usage.md | 42 +- hooks/hooks.json | 7 +- hooks/lib/loop-common.sh | 364 +--- hooks/lib/template-loader.sh | 2 +- hooks/loop-bash-validator.sh | 62 +- hooks/loop-edit-validator.sh | 30 +- hooks/loop-read-validator.sh | 5 +- hooks/loop-write-validator.sh | 30 +- hooks/pr-loop-stop-hook.sh | 1654 -------------- prompt-template/block/force-push-detected.md | 17 - prompt-template/block/no-trigger-comment.md | 17 - prompt-template/block/pr-loop-prompt-write.md | 9 - .../block/pr-loop-state-modification.md | 12 - .../pr-loop/codex-goal-tracker-update.md | 64 - .../critical-requirements-has-comments.md | 24 - .../critical-requirements-no-comments.md | 21 - .../pr-loop/goal-tracker-initial.md | 33 - prompt-template/pr-loop/round-0-header.md | 15 - .../pr-loop/round-0-task-has-comments.md | 43 - .../pr-loop/round-0-task-no-comments.md | 30 - scripts/cancel-pr-loop.sh | 132 -- scripts/check-bot-reactions.sh | 308 --- scripts/check-pr-reviewer-status.sh | 275 --- scripts/fetch-pr-comments.sh | 452 ---- scripts/humanize.sh | 443 +--- scripts/lib/monitor-common.sh | 165 +- scripts/poll-pr-reviews.sh | 328 --- scripts/setup-pr-loop.sh | 945 -------- scripts/setup-rlcr-loop.sh | 15 +- skills/humanize/SKILL.md | 47 +- tests/mocks/gh | 2 +- .../test-concurrent-state-robustness.sh | 55 - .../robustness/test-hook-system-robustness.sh | 17 - tests/robustness/test-pr-loop-api-fetch.sh | 19 - tests/robustness/test-pr-loop-api-poll.sh | 18 - .../robustness/test-pr-loop-api-robustness.sh | 866 -------- .../test-setup-scripts-robustness.sh | 148 +- tests/run-all-tests.sh | 6 - tests/setup-fixture-mock-gh.sh | 101 - tests/setup-monitor-test-env.sh | 67 +- tests/test-codex-hook-install.sh | 16 +- tests/test-disable-nested-codex-hooks.sh | 8 - tests/test-monitor-e2e-deletion.sh | 1 - tests/test-monitor-e2e-real.sh | 314 --- tests/test-monitor-e2e-sigint.sh | 1 - tests/test-pr-loop-1-scripts.sh | 24 - tests/test-pr-loop-2-hooks.sh | 24 - tests/test-pr-loop-3-stophook.sh | 31 - tests/test-pr-loop-hooks.sh | 1623 -------------- tests/test-pr-loop-lib.sh | 145 -- tests/test-pr-loop-scripts.sh | 410 ---- tests/test-pr-loop-stophook.sh | 1782 --------------- tests/test-pr-loop-system.sh | 1904 ----------------- tests/test-pr-loop.sh | 53 - tests/test-unified-codex-config.sh | 191 -- 61 files changed, 32 insertions(+), 13483 deletions(-) delete mode 100644 commands/cancel-pr-loop.md delete mode 100644 commands/start-pr-loop.md delete mode 100755 hooks/pr-loop-stop-hook.sh delete mode 100644 prompt-template/block/force-push-detected.md delete mode 100644 prompt-template/block/no-trigger-comment.md delete mode 100644 prompt-template/block/pr-loop-prompt-write.md delete mode 100644 prompt-template/block/pr-loop-state-modification.md delete mode 100644 prompt-template/pr-loop/codex-goal-tracker-update.md delete mode 100644 prompt-template/pr-loop/critical-requirements-has-comments.md delete mode 100644 prompt-template/pr-loop/critical-requirements-no-comments.md delete mode 100644 prompt-template/pr-loop/goal-tracker-initial.md delete mode 100644 prompt-template/pr-loop/round-0-header.md delete mode 100644 prompt-template/pr-loop/round-0-task-has-comments.md delete mode 100644 prompt-template/pr-loop/round-0-task-no-comments.md delete mode 100755 scripts/cancel-pr-loop.sh delete mode 100755 scripts/check-bot-reactions.sh delete mode 100755 scripts/check-pr-reviewer-status.sh delete mode 100755 scripts/fetch-pr-comments.sh delete mode 100755 scripts/poll-pr-reviews.sh delete mode 100755 scripts/setup-pr-loop.sh delete mode 100755 tests/robustness/test-pr-loop-api-fetch.sh delete mode 100755 tests/robustness/test-pr-loop-api-poll.sh delete mode 100755 tests/robustness/test-pr-loop-api-robustness.sh delete mode 100755 tests/setup-fixture-mock-gh.sh delete mode 100755 tests/test-pr-loop-1-scripts.sh delete mode 100755 tests/test-pr-loop-2-hooks.sh delete mode 100755 tests/test-pr-loop-3-stophook.sh delete mode 100644 tests/test-pr-loop-hooks.sh delete mode 100644 tests/test-pr-loop-lib.sh delete mode 100644 tests/test-pr-loop-scripts.sh delete mode 100644 tests/test-pr-loop-stophook.sh delete mode 100755 tests/test-pr-loop-system.sh delete mode 100755 tests/test-pr-loop.sh diff --git a/.gitignore b/.gitignore index 2308545e..e5bcf34c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ temp # Local Claude client settings /.claude/settings.json +/.claude/scheduled_tasks.lock # Humanize state directories (runtime-generated, project-local) .humanize/ diff --git a/commands/cancel-pr-loop.md b/commands/cancel-pr-loop.md deleted file mode 100644 index 9f7b5bd4..00000000 --- a/commands/cancel-pr-loop.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -description: "Cancel active PR loop" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh)", "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh --force)"] -disable-model-invocation: true ---- - -# Cancel PR Loop - -To cancel the active PR loop: - -1. Run the cancel script: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/cancel-pr-loop.sh" -``` - -2. Check the first line of output: - - **NO_LOOP** or **NO_ACTIVE_LOOP**: Say "No active PR loop found." - - **CANCELLED**: Report the cancellation message from the output - -**Key principle**: The script handles all cancellation logic. A PR loop is active if `state.md` exists in the newest PR loop directory (.humanize/pr-loop/). - -The loop directory with comments, resolution summaries, and state information will be preserved for reference. - -**Note**: This command only affects PR loops. RLCR loops (.humanize/rlcr/) are not affected. Use `/humanize:cancel-rlcr-loop` to cancel RLCR loops. diff --git a/commands/start-pr-loop.md b/commands/start-pr-loop.md deleted file mode 100644 index aebdf207..00000000 --- a/commands/start-pr-loop.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -description: "Start PR review loop with bot monitoring" -argument-hint: "--claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS]" -allowed-tools: ["Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh:*)"] ---- - -# Start PR Loop - -Execute the setup script to initialize the PR review loop: - -```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/setup-pr-loop.sh" $ARGUMENTS -``` - -This command starts a PR review loop that: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. You analyze and fix issues identified by the bot(s) -4. Push changes and trigger re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout) -6. Local Codex validates if remote concerns are valid or approved - -## Bot Flags (Required) - -At least one bot flag is required: -- `--claude` - Monitor reviews from claude[bot] (trigger with @claude) -- `--codex` - Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -## Comment Prioritization - -Comments are processed in this order: -1. **Human comments first** - They always take precedence over bots -2. **Bot comments** - Newest comments analyzed first - -## Workflow - -1. Analyze PR comments and fix issues -2. Commit and push changes -3. Comment on PR to trigger re-review using the bot mentions shown in the prompt -4. Write resolution summary to the specified file -5. Try to exit - Stop Hook intercepts and polls for bot reviews -6. If issues remain, receive feedback and continue -7. If all bots approve, loop ends - -**Note:** The setup script provides the exact mention string to use (e.g., `@claude @codex`). -Use whatever bot mentions are shown in the initial prompt - they match the flags you provided. - -## Important Rules - -1. **Write summaries**: Always write your resolution summary to the specified file before exiting -2. **Push changes**: Your fixes must be pushed for bots to review them -3. **Tag bots**: Use the correct @mention format to trigger bot reviews -4. **No cheating**: Do not try to exit the loop by editing state files or running cancel commands -5. **Trust the process**: The Stop Hook manages polling and Codex validation - -## Stopping the Loop - -- Reach the maximum iteration count -- All monitored bots approve the changes -- User runs `/humanize:cancel-pr-loop` diff --git a/config/codex-hooks.json b/config/codex-hooks.json index 7a04402a..98d31c33 100644 --- a/config/codex-hooks.json +++ b/config/codex-hooks.json @@ -1,5 +1,5 @@ { - "description": "Humanize Codex Hooks - Native Stop hooks for RLCR and PR loops", + "description": "Humanize Codex Hooks - Native Stop hooks for RLCR loops", "hooks": { "Stop": [ { @@ -9,12 +9,6 @@ "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/loop-codex-stop-hook.sh", "timeout": 7200, "statusMessage": "humanize RLCR stop hook" - }, - { - "type": "command", - "command": "{{HUMANIZE_RUNTIME_ROOT}}/hooks/pr-loop-stop-hook.sh", - "timeout": 7200, - "statusMessage": "humanize PR stop hook" } ] } diff --git a/docs/install-for-claude.md b/docs/install-for-claude.md index 78481c99..5af9b4ca 100644 --- a/docs/install-for-claude.md +++ b/docs/install-for-claude.md @@ -68,7 +68,6 @@ Then use: ```bash humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Other Install Guides diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 8698d001..8a9aecd0 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -76,7 +76,7 @@ sed -n '1,220p' "${CODEX_HOME:-$HOME/.codex}/hooks.json" Expected: - `codex_hooks` is `true` -- `hooks.json` contains `loop-codex-stop-hook.sh` and `pr-loop-stop-hook.sh` +- `hooks.json` contains `loop-codex-stop-hook.sh` - `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` contains `bitlesson_model` set to a Codex/OpenAI model such as `gpt-5.4` - for `--target codex`, `${XDG_CONFIG_HOME:-~/.config}/humanize/config.json` also contains `provider_mode: "codex-only"` diff --git a/docs/usage.md b/docs/usage.md index b5625bec..b7e9738a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -63,8 +63,6 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha | `/cancel-rlcr-loop` | Cancel active loop | | `/gen-plan --input --output ` | Generate structured plan from draft | | `/refine-plan --input ` | Refine an annotated plan and generate a QA ledger | -| `/start-pr-loop --claude\|--codex` | Start PR review loop with bot monitoring | -| `/cancel-pr-loop` | Cancel active PR loop | | `/ask-codex [question]` | One-shot consultation with Codex | ## Command Reference @@ -209,39 +207,6 @@ If `--alt-language` is set to a supported non-English language, the command also translated plan and QA variants by inserting `_` before the file extension, such as `plan_zh.md` and `plan-qa_zh.md`. -### start-pr-loop - -``` -/humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger with @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger with @codex) - -OPTIONS: - --max Maximum iterations before auto-stop (default: 42) - --codex-model - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout - Timeout for each Codex review in seconds (default: 900) - -h, --help Show help message -``` - -The PR loop automates the process of handling GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from the specified bot(s) -3. Claude analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are approved or have issues -7. Loop continues until all bots approve or max iterations reached - -**Prerequisites:** -- GitHub CLI (`gh`) must be installed and authenticated -- Codex CLI must be installed -- Current branch must have an associated open PR - ### ask-codex ``` @@ -284,7 +249,7 @@ Current built-in keys: ### Codex Model Configuration -All Codex-using features (RLCR loop, PR loop, ask-codex) share the same model configuration: +All Codex-using features (RLCR loop, ask-codex) share the same model configuration: | Key | Default | Description | |-----|---------|-------------| @@ -307,7 +272,7 @@ are unset, so BitLesson selection stays on the Codex/OpenAI path without probing Codex model is resolved with this precedence: 1. CLI `--codex-model` flag (highest priority) -2. Feature-specific defaults (e.g., PR loop defaults to `medium` effort) +2. Feature-specific defaults 3. Config-backed defaults from the 4-layer hierarchy above 4. Hardcoded fallback (`gpt-5.4:high`) @@ -327,8 +292,6 @@ source ~/.claude/plugins/cache/humania/humanize//scripts/humaniz # Monitor RLCR loop progress humanize monitor rlcr -# Monitor PR loop progress -humanize monitor pr ``` Progress data is stored in `.humanize/rlcr//` for each loop session. @@ -336,7 +299,6 @@ Progress data is stored in `.humanize/rlcr//` for each loop session. ## Cancellation - **RLCR loop**: `/humanize:cancel-rlcr-loop` -- **PR loop**: `/humanize:cancel-pr-loop` ## Environment Variables diff --git a/hooks/hooks.json b/hooks/hooks.json index e25ebe30..dcb99266 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -1,5 +1,5 @@ { - "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop and /start-pr-loop", + "description": "Humanize Plugin Hooks - Validation hooks and Stop hooks for /start-rlcr-loop", "hooks": { "UserPromptSubmit": [ { @@ -67,11 +67,6 @@ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/loop-codex-stop-hook.sh", "timeout": 7200 - }, - { - "type": "command", - "command": "${CLAUDE_PLUGIN_ROOT}/hooks/pr-loop-stop-hook.sh", - "timeout": 7200 } ] } diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index f349931a..2425449b 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -193,8 +193,8 @@ DEFAULT_BITLESSON_MODEL="$(get_config_value "$_LOOP_COMMON_CONFIG" "bitlesson_mo DEFAULT_BITLESSON_MODEL="${DEFAULT_BITLESSON_MODEL:-haiku}" # Load codex model/effort from merged config so .humanize/config.json can set persistent -# defaults for all Codex-using features (RLCR, PR loop, ask-codex). -# Precedence: pre-set by caller (e.g. PR loop) > config value > hardcoded fallback (gpt-5.4/high) +# defaults for all Codex-using features (RLCR, ask-codex). +# Precedence: pre-set by caller > config value > hardcoded fallback (gpt-5.4/high) _cfg_codex_model="$(get_config_value "$_LOOP_COMMON_CONFIG" "codex_model" 2>/dev/null || true)" if [[ -n "$_cfg_codex_model" && ! "$_cfg_codex_model" =~ ^[a-zA-Z0-9._-]+$ ]]; then echo "Warning: Invalid codex_model in merged config: $_cfg_codex_model" >&2 @@ -1159,196 +1159,6 @@ is_in_humanize_loop_dir() { echo "$path" | grep -q '\.humanize/rlcr/' } -# ======================================== -# PR Loop Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -# -# Usage: author=$(map_bot_to_author "codex") -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Reverse mapping: author name to bot name -# - chatgpt-codex-connector[bot] -> codex -# - chatgpt-codex-connector -> codex -# - claude[bot] -> claude -# -# Usage: bot=$(map_author_to_bot "chatgpt-codex-connector[bot]") -map_author_to_bot() { - local author="$1" - # Remove [bot] suffix if present - local author_clean="${author%\[bot\]}" - case "$author_clean" in - chatgpt-codex-connector) echo "codex" ;; - *) echo "$author_clean" ;; - esac -} - -# Build a YAML list string from an array of values -# Returns multiline string with " - value" for each item -# -# Usage: yaml_list=$(build_yaml_list "${array[@]}") -build_yaml_list() { - local result="" - for item in "$@"; do - result="${result} - - ${item}" - done - echo "$result" -} - -# Build a mention string from bot names (e.g., "@claude @codex") -# -# Usage: mentions=$(build_bot_mention_string "${bots[@]}") -build_bot_mention_string() { - local result="" - for bot in "$@"; do - if [[ -n "$result" ]]; then - result="${result} @${bot}" - else - result="@${bot}" - fi - done - echo "$result" -} - -# ======================================== -# PR Loop Directory Functions -# ======================================== - -# Check if a path is inside .humanize/pr-loop directory -is_in_pr_loop_dir() { - local path="$1" - echo "$path" | grep -q '\.humanize/pr-loop/' -} - -# Check if a path is inside any loop directory (RLCR or PR loop) -is_in_any_loop_dir() { - local path="$1" - is_in_humanize_loop_dir "$path" || is_in_pr_loop_dir "$path" -} - -# Find the most recent active PR loop directory with state.md -# Similar to find_active_loop but for PR loops -# Outputs the directory path to stdout, or empty string if none found -find_active_pr_loop() { - local loop_base_dir="$1" - - if [[ ! -d "$loop_base_dir" ]]; then - echo "" - return - fi - - local newest_dir - newest_dir=$(ls -1d "$loop_base_dir"/*/ 2>/dev/null | sort -r | head -1) - - if [[ -n "$newest_dir" && -f "${newest_dir}state.md" ]]; then - echo "${newest_dir%/}" - else - echo "" - fi -} - -# Check if a path (lowercase) matches a PR loop round file pattern -# Types: pr-comment, pr-resolve, pr-check, pr-feedback, prompt, codex-prompt -is_pr_round_file_type() { - local path_lower="$1" - local file_type="$2" - - echo "$path_lower" | grep -qE "round-[0-9]+-${file_type}\\.md\$" -} - -# Check if a path matches any PR loop read-only file type -# These files are generated by the system and should not be modified by Claude -is_pr_loop_readonly_file() { - local path_lower="$1" - - is_pr_round_file_type "$path_lower" "pr-comment" || \ - is_pr_round_file_type "$path_lower" "prompt" || \ - is_pr_round_file_type "$path_lower" "codex-prompt" || \ - is_pr_round_file_type "$path_lower" "pr-check" || \ - is_pr_round_file_type "$path_lower" "pr-feedback" -} - -# Validate PR loop pr-resolve file round number -# Returns 0 if valid (correct round or no active loop), exits with error message if wrong round -# Usage: validate_pr_resolve_round "$file_path_lower" "$action_verb" -# Arguments: -# $1 - File path (lowercase) -# $2 - Action verb for error message ("edit" or "write to") -validate_pr_resolve_round() { - local file_path_lower="$1" - local action_verb="$2" - - local project_root="${CLAUDE_PROJECT_DIR:-$(pwd)}" - local pr_loop_base_dir="$project_root/.humanize/pr-loop" - local active_pr_loop_dir - active_pr_loop_dir=$(find_active_pr_loop "$pr_loop_base_dir") - - if [[ -z "$active_pr_loop_dir" ]]; then - return 0 - fi - - local pr_state_file="$active_pr_loop_dir/state.md" - if [[ ! -f "$pr_state_file" ]]; then - return 0 - fi - - local pr_current_round - pr_current_round=$(sed -n '/^---$/,/^---$/{ /^current_round:/{ s/current_round: *//; p; } }' "$pr_state_file" | tr -d ' ') - pr_current_round="${pr_current_round:-0}" - - local claude_pr_round - claude_pr_round=$(echo "$file_path_lower" | sed -n 's|.*round-\([0-9]*\)-pr-resolve\.md$|\1|p') - - if [[ -n "$claude_pr_round" ]] && [[ "$claude_pr_round" != "$pr_current_round" ]]; then - local correct_path="$active_pr_loop_dir/round-${pr_current_round}-pr-resolve.md" - # NOTE: Avoid ${var^} (Bash 4+ only) for macOS Bash 3.2 compatibility - # Use tr for portable capitalization of first letter - local action_verb_cap - action_verb_cap=$(echo "$action_verb" | sed 's/^\(.\)/\U\1/') - # Fallback for systems where \U doesn't work (use awk instead) - if [[ "$action_verb_cap" == "$action_verb" ]] || [[ "$action_verb_cap" == *'U'* ]]; then - action_verb_cap=$(echo "$action_verb" | awk '{print toupper(substr($0,1,1)) tolower(substr($0,2))}') - fi - echo "# Wrong Round Number" >&2 - echo "" >&2 - echo "You tried to $action_verb round-${claude_pr_round}-pr-resolve.md but current PR loop round is **${pr_current_round}**." >&2 - echo "" >&2 - echo "$action_verb_cap: \`$correct_path\`" >&2 - return 2 - fi - - return 0 -} - -# Standard message for blocking PR loop state file modifications -pr_loop_state_blocked_message() { - local fallback="# PR Loop State File Modification Blocked - -You cannot modify state.md in .humanize/pr-loop/. This file is managed by the PR loop system." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-state-modification.md" "$fallback" -} - -# Standard message for blocking PR loop prompt/comment file writes -pr_loop_prompt_blocked_message() { - local fallback="# PR Loop File Write Blocked - -You cannot write to round-*-pr-comment.md or round-*-prompt.md files in .humanize/pr-loop/. -These files are generated by the PR loop system and are read-only." - - load_and_render_safe "$TEMPLATE_DIR" "block/pr-loop-prompt-write.md" "$fallback" -} - # Check if a git add command would add .humanize files to version control # Usage: git_adds_humanize "$command_lower" # Returns 0 if the command would add .humanize files, 1 otherwise @@ -1608,173 +1418,3 @@ end_loop() { return 1 fi } - -# ======================================== -# PR Loop Goal Tracker Functions -# ======================================== - -# Update the PR goal tracker after Codex analysis -# Usage: update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$ROUND" "$BOT_RESULTS_JSON" -# -# Arguments: -# $1 - Path to goal-tracker.md -# $2 - Current round number -# $3 - JSON containing per-bot analysis results (optional) -# Format: {"bot": "name", "issues": N, "resolved": N} -# -# Updates: -# - Issue Summary table with new row -# - Total Statistics section -# - Issue Log with round entry -# -# Note: This is a helper function for the stop hook. The primary update -# mechanism is through Codex prompt instructions, but this ensures -# consistency when Codex doesn't update correctly. -update_pr_goal_tracker() { - local tracker_file="$1" - local round="$2" - local bot_results="${3:-}" - - if [[ ! -f "$tracker_file" ]]; then - echo "Warning: Goal tracker not found: $tracker_file" >&2 - return 1 - fi - - # Extract reviewer early for idempotency check (need to check round+reviewer combo) - local reviewer="Codex" - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - reviewer=$(echo "$bot_results" | jq -r '.bot // "Codex"' 2>/dev/null || echo "Codex") - fi - - # IDEMPOTENCY CHECK: Check for BOTH round AND reviewer to support multi-bot rounds - # This allows multiple bots to add their own rows for the same round - local has_summary_row=false - local has_log_entry=false - - # Check if this specific round+reviewer combo already exists in Issue Summary - # Table format: | Round | Reviewer | Issues Found | Issues Resolved | Status | - if grep -qE "^\|[[:space:]]*${round}[[:space:]]*\|[[:space:]]*${reviewer}[[:space:]]*\|" "$tracker_file" 2>/dev/null; then - has_summary_row=true - fi - - # Check if this specific round+reviewer combo already exists in Issue Log - # Log format: "### Round N" followed by "Reviewer: ..." - if awk -v round="$round" -v reviewer="$reviewer" ' - /^### Round / { current_round = $3 } - current_round == round && $1 == reviewer":" { found = 1; exit } - END { exit !found } - ' "$tracker_file" 2>/dev/null; then - has_log_entry=true - fi - - if [[ "$has_summary_row" == "true" && "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer already has both Issue Summary and Issue Log entries, skipping update" >&2 - return 0 - fi - - # Track what we need to add (for partial updates) - local need_summary_row=true - local need_log_entry=true - [[ "$has_summary_row" == "true" ]] && need_summary_row=false - [[ "$has_log_entry" == "true" ]] && need_log_entry=false - - if [[ "$has_summary_row" == "true" || "$has_log_entry" == "true" ]]; then - echo "Goal tracker: Round $round/$reviewer has partial update (summary=$has_summary_row, log=$has_log_entry), completing..." >&2 - fi - - # Extract current totals - local current_found - current_found=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_found=${current_found:-0} - - local current_resolved - current_resolved=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - current_resolved=${current_resolved:-0} - - # Parse bot results if provided (reviewer already extracted above for idempotency check) - local new_issues=0 - local new_resolved=0 - - if [[ -n "$bot_results" && "$bot_results" != "null" ]]; then - new_issues=$(echo "$bot_results" | jq -r '.issues // 0' 2>/dev/null || echo "0") - new_resolved=$(echo "$bot_results" | jq -r '.resolved // 0' 2>/dev/null || echo "0") - fi - - # Calculate new totals - local total_found=$((current_found + new_issues)) - local total_resolved=$((current_resolved + new_resolved)) - local remaining=$((total_found - total_resolved)) - - # Determine status for this round - local status="In Progress" - if [[ $new_issues -eq 0 && $new_resolved -eq 0 ]]; then - status="Approved" - elif [[ $new_issues -gt 0 ]]; then - status="Issues Found" - elif [[ $new_resolved -gt 0 ]]; then - status="Resolved" - fi - - # Create temp file for updates - local temp_file="${tracker_file}.update.$$" - - # Step 1: Update Total Statistics (only if we're adding to totals) - # Only update totals if we're adding a new summary row (to avoid double-counting) - if [[ "$need_summary_row" == "true" ]]; then - sed -e "s/^- Total Issues Found:.*/- Total Issues Found: $total_found/" \ - -e "s/^- Total Issues Resolved:.*/- Total Issues Resolved: $total_resolved/" \ - -e "s/^- Remaining:.*/- Remaining: $remaining/" \ - "$tracker_file" > "$temp_file" - else - cp "$tracker_file" "$temp_file" - fi - - # Step 2: Add row to Issue Summary table (only if needed) - if [[ "$need_summary_row" == "true" ]]; then - # Insert row INSIDE the table (after last table row, before blank line) - local new_row="| $round | $reviewer | $new_issues | $new_resolved | $status |" - - # Use awk to find the last row of the Issue Summary table and insert after it - awk -v row="$new_row" ' - BEGIN { in_table = 0; last_row_printed = 0 } - /^## Issue Summary/ { in_table = 1 } - /^## Total Statistics/ { in_table = 0 } - { - # If we hit Total Statistics and havent printed the new row yet, print it first - if (/^## Total Statistics/ && !last_row_printed) { - print row - print "" - last_row_printed = 1 - } - # If in table and this is a table row (starts with |), store it - if (in_table && /^\|/) { - last_table_line = NR - } - # If in table and this is a blank line after table rows, insert new row - if (in_table && /^[[:space:]]*$/ && last_table_line > 0 && !last_row_printed) { - print row - last_row_printed = 1 - } - print - } - ' "$temp_file" > "${temp_file}.2" - mv "${temp_file}.2" "$temp_file" - fi - - # Step 3: Add Issue Log entry for this round (only if needed) - if [[ "$need_log_entry" == "true" ]]; then - local timestamp - timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - local log_entry="### Round $round -$reviewer: Found $new_issues issues, Resolved $new_resolved -Updated: $timestamp -" - # Append to Issue Log section - echo "" >> "$temp_file" - echo "$log_entry" >> "$temp_file" - fi - - mv "$temp_file" "$tracker_file" - echo "Goal tracker updated: Round $round, Reviewer=$reviewer, Found=$new_issues, Resolved=$new_resolved" >&2 - return 0 -} diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 7ad32657..13d29f6e 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -221,7 +221,7 @@ validate_template_dir() { return 1 fi - local required_subdirs=("block" "codex" "claude" "plan" "pr-loop") + local required_subdirs=("block" "codex" "claude" "plan") local missing=() local subdir for subdir in "${required_subdirs[@]}"; do diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index d3cf6434..547210fc 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -1,14 +1,12 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Bash commands for RLCR loop and PR loop +# PreToolUse Hook: Validate Bash commands for RLCR loop # # Blocks attempts to bypass Write/Edit hooks using shell commands: # - cat/echo/printf > file.md (redirection) # - tee file.md # - sed -i file.md (in-place edit) # - goal-tracker.md modifications via Bash -# - PR loop state.md modifications -# - PR loop read-only file modifications (pr-comment, prompt, codex-prompt, etc.) # set -euo pipefail @@ -60,10 +58,6 @@ HOOK_SESSION_ID=$(extract_session_id "$HOOK_INPUT") LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" ACTIVE_LOOP_DIR=$(find_active_loop "$LOOP_BASE_DIR" "$HOOK_SESSION_ID") -# Check for active PR loop -PR_LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" -ACTIVE_PR_LOOP_DIR=$(find_active_pr_loop "$PR_LOOP_BASE_DIR") - # ======================================== # Methodology Analysis Phase Bash Restriction # ======================================== @@ -158,8 +152,8 @@ File redirection is not allowed during the methodology analysis phase." >&2 fi fi -# If no active loop of either type, allow all commands -if [[ -z "$ACTIVE_LOOP_DIR" ]] && [[ -z "$ACTIVE_PR_LOOP_DIR" ]]; then +# If no active RLCR loop, allow all commands +if [[ -z "$ACTIVE_LOOP_DIR" ]]; then exit 0 fi @@ -169,7 +163,7 @@ fi # Prevents Claude from manually running stop hook or stop gate scripts. # These scripts should only be invoked by the hooks system, not via Bash. -BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|pr-loop-stop-hook\.sh|rlcr-stop-gate\.sh)" +BLOCKED_HOOK_SCRIPTS="(loop-codex-stop-hook\.sh|rlcr-stop-gate\.sh)" HOOK_ASSIGNMENT_PREFIX="[[:alpha:]_][[:alnum:]_]*=[^[:space:];&|]+" HOOK_COMMAND_PREFIX="command([[:space:]]+(-[^[:space:];&|]+|--))*" HOOK_ENV_PREFIX="env([[:space:]]+(-[^[:space:];&|]+|--|${HOOK_ASSIGNMENT_PREFIX}))*" @@ -558,52 +552,4 @@ fi fi # End of RLCR-specific checks -# ======================================== -# PR Loop File Protection -# ======================================== -# Block modifications to PR loop state and read-only files -# Note: ACTIVE_PR_LOOP_DIR was already set at the top of the script - -if [[ -n "$ACTIVE_PR_LOOP_DIR" ]]; then - # Block PR loop state.md modifications - # Check both full path pattern AND bare filename to catch relative path bypass - # (e.g., cd .humanize/pr-loop/timestamp && sed -i state.md) - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - # Bare filename check for state.md (catches relative path usage) - if command_modifies_file "$COMMAND_LOWER" "state\.md"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block PR loop read-only files: - # - round-N-pr-comment.md (fetched comments) - # - round-N-prompt.md (prompts from system) - # - round-N-codex-prompt.md (Codex prompts) - # - round-N-pr-check.md (Codex output) - # - round-N-pr-feedback.md (feedback for next round) - PR_LOOP_READONLY_PATTERNS=( - "round-[0-9]+-pr-comment\.md" - "round-[0-9]+-prompt\.md" - "round-[0-9]+-codex-prompt\.md" - "round-[0-9]+-pr-check\.md" - "round-[0-9]+-pr-feedback\.md" - ) - - for pattern in "${PR_LOOP_READONLY_PATTERNS[@]}"; do - # Check both full path pattern AND bare filename to catch relative path bypass - if command_modifies_file "$COMMAND_LOWER" "\.humanize/pr-loop(/[^/]+)?/${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - # Bare filename check (catches relative path usage from within loop dir) - if command_modifies_file "$COMMAND_LOWER" "${pattern}"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - done -fi - exit 0 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index 32ba3a9d..17fae65c 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Edit paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Edit paths for RLCR loop # # Blocks Claude from editing: # - Todos files (should use native Task tools instead) @@ -8,8 +8,6 @@ # - State files (managed by hooks, not Claude) # - Wrong round number contract files # - Goal tracker edits outside the active loop or that alter the immutable section -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # set -euo pipefail @@ -54,32 +52,6 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then exit 2 fi -# ======================================== -# PR Loop File Protection -# ======================================== - -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md edits in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "edit" || exit $? - exit 0 - fi -fi - # ======================================== # Methodology Analysis Phase Edit Restriction # ======================================== diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index 3188c1a4..cbd9aa1e 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Read access for RLCR loop and PR loop files +# PreToolUse Hook: Validate Read access for RLCR loop files # # Blocks Claude from reading: # - Wrong round's prompt/summary/contract files (outdated information) @@ -9,9 +9,6 @@ # - Todos files (should use native Task tools instead) # - goal-tracker.md from old RLCR sessions # -# PR loop files (.humanize/pr-loop/) are generally allowed to read -# to give Claude access to comments, prompts, and feedback. -# set -euo pipefail diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index b7314ed9..a8747e9d 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# PreToolUse Hook: Validate Write paths for RLCR loop and PR loop +# PreToolUse Hook: Validate Write paths for RLCR loop # # Blocks Claude from writing to: # - Todos files (should use native Task tools instead) @@ -9,8 +9,6 @@ # - Wrong round number contract files # - Summary files outside .humanize/rlcr/ # - Goal tracker writes outside the active loop or that alter the immutable section -# - PR loop state files (.humanize/pr-loop/) -# - PR loop read-only files (pr-comment, prompt, codex-prompt, pr-check, pr-feedback) # set -euo pipefail @@ -71,32 +69,6 @@ if is_round_file_type "$FILE_PATH_LOWER" "prompt"; then exit 2 fi -# ======================================== -# PR Loop File Protection -# ======================================== - -IN_PR_LOOP_DIR=$(is_in_pr_loop_dir "$FILE_PATH" && echo "true" || echo "false") - -if [[ "$IN_PR_LOOP_DIR" == "true" ]]; then - # Block state.md writes in PR loop - if is_state_file_path "$FILE_PATH_LOWER"; then - pr_loop_state_blocked_message >&2 - exit 2 - fi - - # Block read-only PR loop files - if is_pr_loop_readonly_file "$FILE_PATH_LOWER"; then - pr_loop_prompt_blocked_message >&2 - exit 2 - fi - - # For round-N-pr-resolve.md (Claude's resolution summary), validate round number - if is_pr_round_file_type "$FILE_PATH_LOWER" "pr-resolve"; then - validate_pr_resolve_round "$FILE_PATH_LOWER" "write to" || exit $? - exit 0 - fi -fi - # ======================================== # Methodology Analysis Phase Write Restriction # ======================================== diff --git a/hooks/pr-loop-stop-hook.sh b/hooks/pr-loop-stop-hook.sh deleted file mode 100755 index c87abee3..00000000 --- a/hooks/pr-loop-stop-hook.sh +++ /dev/null @@ -1,1654 +0,0 @@ -#!/usr/bin/env bash -# -# Stop Hook for PR loop -# -# Intercepts Claude's exit attempts, polls for remote bot reviews, -# and uses local Codex to validate if bot concerns are addressed. -# -# Key features: -# - Polls until ALL active bots respond (per-bot tracking with 15min timeout each) -# - Checks PR state before polling (detects CLOSED/MERGED) -# - Uses APPROVE marker for Codex approval -# - Updates active_bots list based on per-bot approval -# -# State directory: .humanize/pr-loop// -# State file: state.md (current_round, pr_number, active_bots as YAML list, etc.) -# Resolve file: round-N-pr-resolve.md (Claude's resolution summary) -# Comment file: round-N-pr-comment.md (Fetched PR comments) -# Check file: round-N-pr-check.md (Local Codex validation) -# Feedback file: round-N-pr-feedback.md (Feedback for next round) -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_POLL_INTERVAL=30 -DEFAULT_POLL_TIMEOUT=900 # 15 minutes per bot - -# Note: Bot name mapping functions (map_bot_to_author, map_author_to_bot) -# and helper functions (build_yaml_list, build_bot_mention_string) are -# provided by loop-common.sh which is sourced below. - -# ======================================== -# Read Hook Input -# ======================================== - -HOOK_INPUT=$(cat) - -# ======================================== -# Find Active Loop -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Source shared loop functions -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/lib/loop-common.sh" - -# Source portable timeout wrapper -PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -TEMPLATE_DIR="$PLUGIN_ROOT/prompt-template" -source "$PLUGIN_ROOT/scripts/portable-timeout.sh" - -# Default timeout for git/gh operations -GIT_TIMEOUT=30 -GH_TIMEOUT=60 - -# Use shared find_active_pr_loop function from loop-common.sh -LOOP_DIR=$(find_active_pr_loop "$LOOP_BASE_DIR") - -# If no active PR loop, let other hooks handle -if [[ -z "$LOOP_DIR" ]]; then - exit 0 -fi - -STATE_FILE="$LOOP_DIR/state.md" - -if [[ ! -f "$STATE_FILE" ]]; then - exit 0 -fi - -# ======================================== -# Parse State File (YAML list format for active_bots) -# ======================================== - -# Declare arrays outside function for macOS Bash 3.2 compatibility -# (declare -g requires Bash 4.2+, which macOS doesn't have by default) -PR_CONFIGURED_BOTS_ARRAY=() -PR_ACTIVE_BOTS_ARRAY=() - -parse_pr_loop_state() { - local state_file="$1" - - STATE_FRONTMATTER=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - PR_CURRENT_ROUND=$(echo "$STATE_FRONTMATTER" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ' || true) - PR_MAX_ITERATIONS=$(echo "$STATE_FRONTMATTER" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ' || true) - PR_NUMBER=$(echo "$STATE_FRONTMATTER" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ' || true) - PR_START_BRANCH=$(echo "$STATE_FRONTMATTER" | grep "^start_branch:" | sed "s/start_branch: *//; s/^\"//; s/\"\$//" || true) - PR_CODEX_MODEL=$(echo "$STATE_FRONTMATTER" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ' || true) - PR_CODEX_EFFORT=$(echo "$STATE_FRONTMATTER" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ' || true) - PR_CODEX_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^codex_timeout:" | sed "s/codex_timeout: *//" | tr -d ' ' || true) - PR_POLL_INTERVAL=$(echo "$STATE_FRONTMATTER" | grep "^poll_interval:" | sed "s/poll_interval: *//" | tr -d ' ' || true) - PR_POLL_TIMEOUT=$(echo "$STATE_FRONTMATTER" | grep "^poll_timeout:" | sed "s/poll_timeout: *//" | tr -d ' ' || true) - PR_STARTED_AT=$(echo "$STATE_FRONTMATTER" | grep "^started_at:" | sed "s/started_at: *//" || true) - PR_LAST_TRIGGER_AT=$(echo "$STATE_FRONTMATTER" | grep "^last_trigger_at:" | sed "s/last_trigger_at: *//" || true) - - # New state fields for Cases 1-5 and force push detection - PR_STARTUP_CASE=$(echo "$STATE_FRONTMATTER" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_SHA=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_sha:" | sed "s/latest_commit_sha: *//" | tr -d ' ' || true) - PR_LATEST_COMMIT_AT=$(echo "$STATE_FRONTMATTER" | grep "^latest_commit_at:" | sed "s/latest_commit_at: *//" || true) - PR_TRIGGER_COMMENT_ID=$(echo "$STATE_FRONTMATTER" | grep "^trigger_comment_id:" | sed "s/trigger_comment_id: *//" | tr -d ' ' || true) - - # Parse configured_bots and active_bots as YAML lists - # configured_bots: never changes, used for polling all bots (allows re-add) - # active_bots: current bots with issues, shrinks as bots approve - # Arrays are declared outside function for macOS Bash 3.2 compatibility - PR_CONFIGURED_BOTS_ARRAY=() - PR_ACTIVE_BOTS_ARRAY=() - - # Parse YAML list helper function - # NOTE: Avoids 'local -n' (nameref) which requires Bash 4.3+ and fails on macOS Bash 3.2 - # Instead, outputs values to stdout and caller captures into array - parse_yaml_list() { - local field_name="$1" - local in_field=false - - while IFS= read -r line; do - if [[ "$line" =~ ^${field_name}: ]]; then - in_field=true - # Check if it's inline format: field: value - local inline_value="${line#*: }" - if [[ -n "$inline_value" && "$inline_value" != "${field_name}:" ]]; then - # Old comma-separated format for backwards compatibility - echo "$inline_value" | tr ',' '\n' | tr -d ' ' - in_field=false - fi - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - # Extract bot name from " - botname" - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - if [[ -n "$bot_name" ]]; then - echo "$bot_name" - fi - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - # New field started, stop parsing - in_field=false - fi - fi - done <<< "$STATE_FRONTMATTER" - } - - # Read parsed values into arrays (macOS Bash 3.2 compatible) - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_CONFIGURED_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "configured_bots") - - while IFS= read -r bot; do - [[ -n "$bot" ]] && PR_ACTIVE_BOTS_ARRAY+=("$bot") - done < <(parse_yaml_list "active_bots") - - # Backwards compatibility: if configured_bots is empty, use active_bots - if [[ ${#PR_CONFIGURED_BOTS_ARRAY[@]} -eq 0 ]]; then - PR_CONFIGURED_BOTS_ARRAY=("${PR_ACTIVE_BOTS_ARRAY[@]}") - fi - - # Apply defaults - PR_CURRENT_ROUND="${PR_CURRENT_ROUND:-0}" - PR_MAX_ITERATIONS="${PR_MAX_ITERATIONS:-42}" - PR_CODEX_MODEL="${PR_CODEX_MODEL:-$DEFAULT_CODEX_MODEL}" - PR_CODEX_EFFORT="${PR_CODEX_EFFORT:-$DEFAULT_CODEX_EFFORT}" - PR_CODEX_TIMEOUT="${PR_CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}" - PR_POLL_INTERVAL="${PR_POLL_INTERVAL:-$DEFAULT_POLL_INTERVAL}" - PR_POLL_TIMEOUT="${PR_POLL_TIMEOUT:-$DEFAULT_POLL_TIMEOUT}" -} - -parse_pr_loop_state "$STATE_FILE" - -# Build display string and mention string from active bots array -PR_ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${PR_ACTIVE_BOTS_ARRAY[*]}") -PR_CONFIGURED_BOTS_DISPLAY=$(IFS=', '; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") - -# Build mention string from configured bots (for detecting trigger comments) -PR_BOT_MENTION_STRING=$(build_bot_mention_string "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Validate required fields -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number not found in state file" >&2 - exit 0 -fi - -if [[ ! "$PR_CURRENT_ROUND" =~ ^[0-9]+$ ]]; then - echo "Warning: Invalid current_round in state file" >&2 - exit 0 -fi - -# ======================================== -# Resolve PR Base Repository (for fork PRs) -# ======================================== -# IMPORTANT: For fork PRs, comments are on the base repository, not the fork. -# gh pr view without --repo fails in forks because the PR number doesn't exist there. -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks. -# NOTE: This MUST be done BEFORE PR state checks, which also need --repo for forks. - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -PR_LOOKUP_REPO="" # Repo where PR was found (for subsequent lookups) - -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - PR_LOOKUP_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" - PR_LOOKUP_REPO="$CURRENT_REPO" -fi - -# ======================================== -# Check PR State (detect CLOSED/MERGED before polling) -# ======================================== -# NOTE: Uses PR_LOOKUP_REPO (resolved above) for fork PR support - -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" - -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "PR #$PR_NUMBER has been merged. Marking loop as complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/merged-state.md" - exit 0 -fi - -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "PR #$PR_NUMBER has been closed. Marking loop as closed." >&2 - mv "$STATE_FILE" "$LOOP_DIR/closed-state.md" - exit 0 -fi - -# ======================================== -# Check Resolution File Exists -# ======================================== - -RESOLVE_FILE="$LOOP_DIR/round-${PR_CURRENT_ROUND}-pr-resolve.md" - -if [[ ! -f "$RESOLVE_FILE" ]]; then - REASON="# Resolution Summary Missing - -Please write your resolution summary to: $RESOLVE_FILE - -The summary should include: -- Issues addressed -- Files modified -- Tests added (if any)" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Resolution summary missing for round $PR_CURRENT_ROUND" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Git Status -# ======================================== - -if command -v git &>/dev/null && run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null 2>&1; then - GIT_STATUS_CACHED=$(run_with_timeout "$GIT_TIMEOUT" git status --porcelain 2>/dev/null) || GIT_EXIT=$? - GIT_EXIT=${GIT_EXIT:-0} - - if [[ $GIT_EXIT -ne 0 ]]; then - REASON="# Git Status Failed - -Git status operation failed. Please check your repository state and try again." - jq -n --arg reason "$REASON" --arg msg "PR Loop: Git status failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Filter out .humanize from status check - NON_HUMANIZE_STATUS=$(echo "$GIT_STATUS_CACHED" | grep -v '\.humanize' || true) - - if [[ -n "$NON_HUMANIZE_STATUS" ]]; then - REASON="# Git Not Clean - -You have uncommitted changes. Please commit all changes before exiting. - -Changes detected: -\`\`\` -$NON_HUMANIZE_STATUS -\`\`\`" - jq -n --arg reason "$REASON" --arg msg "PR Loop: Uncommitted changes detected" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - - # Step 6: Check for unpushed commits (PR loop always requires push) - CURRENT_BRANCH=$(git branch --show-current 2>/dev/null || echo "main") - AHEAD_COUNT=0 - LOCAL_HEAD=$(git rev-parse HEAD 2>/dev/null) || LOCAL_HEAD="" - - # First try: git status -sb works when upstream is configured - GIT_AHEAD=$(run_with_timeout "$GIT_TIMEOUT" git status -sb 2>/dev/null | grep -o 'ahead [0-9]*' || true) - if [[ -n "$GIT_AHEAD" ]]; then - AHEAD_COUNT=$(echo "$GIT_AHEAD" | grep -o '[0-9]*') - else - # Fallback: Check if upstream exists, if not compare with origin/branch or PR head - if ! git rev-parse --abbrev-ref '@{u}' >/dev/null 2>&1; then - # No upstream configured - try origin/branch first - REMOTE_HEAD=$(git rev-parse "origin/$CURRENT_BRANCH" 2>/dev/null) || REMOTE_HEAD="" - if [[ -n "$LOCAL_HEAD" && -n "$REMOTE_HEAD" && "$LOCAL_HEAD" != "$REMOTE_HEAD" ]]; then - # Count commits ahead of remote - AHEAD_COUNT=$(git rev-list --count "origin/$CURRENT_BRANCH..HEAD" 2>/dev/null) || AHEAD_COUNT=0 - elif [[ -z "$REMOTE_HEAD" && -n "$PR_NUMBER" ]]; then - # No origin/branch exists - compare with PR's headRefOid from GitHub - # This handles cases where branch was never pushed or remote ref is missing - # NOTE: Use --repo for fork PR support (PR_BASE_REPO resolved earlier) - PR_HEAD_SHA=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" --json headRefOid -q '.headRefOid' 2>/dev/null) || PR_HEAD_SHA="" - if [[ -z "$PR_HEAD_SHA" ]]; then - # Failed to get PR head - fail closed (assume unpushed) for safety - echo "Warning: Could not fetch PR head SHA, assuming unpushed commits" >&2 - AHEAD_COUNT=1 - elif [[ -n "$LOCAL_HEAD" && "$LOCAL_HEAD" != "$PR_HEAD_SHA" ]]; then - # Local differs from PR head - count commits since PR head - AHEAD_COUNT=$(git rev-list --count "$PR_HEAD_SHA..HEAD" 2>/dev/null) || { - # PR head not in local history (force push?) - treat as 1 unpushed - AHEAD_COUNT=1 - } - fi - fi - fi - fi - - if [[ "$AHEAD_COUNT" -gt 0 ]]; then - FALLBACK_MSG="# Unpushed Commits Detected - -You have $AHEAD_COUNT unpushed commit(s). PR loop requires pushing changes so bots can review them. - -Please push: git push origin $CURRENT_BRANCH" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/unpushed-commits.md" "$FALLBACK_MSG" \ - "AHEAD_COUNT=$AHEAD_COUNT" "CURRENT_BRANCH=$CURRENT_BRANCH") - jq -n --arg reason "$REASON" --arg msg "PR Loop: $AHEAD_COUNT unpushed commit(s)" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi -fi - -# ======================================== -# Force Push Detection -# ======================================== - -# Detect if the remote branch HEAD has changed in a way that indicates force push -# This happens when previous commits are no longer reachable from current HEAD -if [[ -n "$PR_LATEST_COMMIT_SHA" ]]; then - CURRENT_HEAD=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || CURRENT_HEAD="" - - # Check if the stored commit SHA is still reachable from current HEAD - # If not, a force push (history rewrite) has occurred - if [[ -n "$CURRENT_HEAD" && "$CURRENT_HEAD" != "$PR_LATEST_COMMIT_SHA" ]]; then - # Check if old commit is ancestor of current HEAD - IS_ANCESTOR=$(run_with_timeout "$GIT_TIMEOUT" git merge-base --is-ancestor "$PR_LATEST_COMMIT_SHA" "$CURRENT_HEAD" 2>/dev/null && echo "yes" || echo "no") - - if [[ "$IS_ANCESTOR" == "no" ]]; then - echo "Force push detected: $PR_LATEST_COMMIT_SHA is no longer reachable from $CURRENT_HEAD" >&2 - - # Preserve OLD commit SHA before updating state - OLD_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" - - # Get the timestamp of the new HEAD commit for trigger validation - # This ensures detect_trigger_comment only accepts comments AFTER the force push - # NOTE: Uses PR_LOOKUP_REPO for fork PR support - NEW_HEAD_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_HEAD_COMMIT_AT="" - - if [[ -z "$NEW_HEAD_COMMIT_AT" ]]; then - # Fallback: use current timestamp - NEW_HEAD_COMMIT_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - fi - - # Update state file with new commit SHA/timestamp and clear trigger state - # Clear BOTH last_trigger_at AND trigger_comment_id to prevent stale eyes checks - TEMP_FILE="${STATE_FILE}.forcepush.$$" - sed -e "s/^latest_commit_sha:.*/latest_commit_sha: $CURRENT_HEAD/" \ - -e "s/^latest_commit_at:.*/latest_commit_at: $NEW_HEAD_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Update local variables to reflect the change - PR_LATEST_COMMIT_SHA="$CURRENT_HEAD" - PR_LATEST_COMMIT_AT="$NEW_HEAD_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - - FALLBACK_MSG="# Force Push Detected - -A force push (history rewrite) has been detected. Post a new @bot trigger comment: $PR_BOT_MENTION_STRING" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/force-push-detected.md" "$FALLBACK_MSG" \ - "OLD_COMMIT=$OLD_COMMIT_SHA" "NEW_COMMIT=$CURRENT_HEAD" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING" \ - "PR_NUMBER=$PR_NUMBER") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Force push detected - please re-trigger bots" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - fi - fi -fi - -# ======================================== -# Check Max Iterations -# ======================================== - -NEXT_ROUND=$((PR_CURRENT_ROUND + 1)) - -if [[ $NEXT_ROUND -gt $PR_MAX_ITERATIONS ]]; then - echo "PR loop reached max iterations ($PR_MAX_ITERATIONS). Exiting." >&2 - mv "$STATE_FILE" "$LOOP_DIR/maxiter-state.md" - exit 0 -fi - -# ======================================== -# Check if Active Bots Remain -# ======================================== -# NOTE: Step 8 (Codex +1 check) has been moved to after trigger detection -# to ensure it uses the correct timestamp that accounts for new commits. - -if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved. PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Detect Trigger Comment and Update last_trigger_at -# ======================================== - -# Get current GitHub user login for trigger comment filtering -get_current_user() { - run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null || echo "" -} - -# Find the most recent PR comment from CURRENT USER that contains bot mentions -# Returns: "timestamp|comment_id" on success -# This timestamp is used for --after filtering to catch fast bot replies -# NOTE: Uses --paginate to handle PRs with >30 comments -# IMPORTANT: If latest_commit_at is set, only accepts comments AFTER that timestamp -# This prevents old triggers from being re-used after force push -# IMPORTANT: Uses PR_BASE_REPO (not {owner}/{repo}) for fork PR support -detect_trigger_comment() { - local pr_num="$1" - local current_user="$2" - local after_timestamp="${3:-}" # Optional: only accept comments after this timestamp - - # Fetch ALL issue comments on the PR (paginated to handle >30 comments) - # Using --paginate ensures we don't miss the latest @mention on large PRs - # IMPORTANT: --jq with --paginate runs per-page, so we output objects (not array) - # and use jq -s to aggregate all pages into a single array before filtering - # IMPORTANT: Use PR_BASE_REPO for fork PRs - comments are on base repo, not fork - local comments_json - comments_json=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$pr_num/comments" \ - --paginate --jq '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' 2>/dev/null \ - | jq -s '.') || return 1 - - if [[ -z "$comments_json" || "$comments_json" == "[]" ]]; then - return 1 - fi - - # Build pattern to match any @bot mention - local bot_pattern="" - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ -n "$bot_pattern" ]]; then - bot_pattern="${bot_pattern}|@${bot}" - else - bot_pattern="@${bot}" - fi - done - - # Find most recent trigger comment from CURRENT USER (sorted by created_at descending) - # comments_json is already aggregated from all pages into a single array - # If after_timestamp is set, only accept comments created after that timestamp - # Returns both timestamp and comment ID - local trigger_info - if [[ -n "$after_timestamp" ]]; then - # Filter to only comments AFTER the specified timestamp (force push protection) - trigger_info=$(echo "$comments_json" | jq -r \ - --arg pattern "$bot_pattern" \ - --arg user "$current_user" \ - --arg after "$after_timestamp" ' - [.[] | select( - .author == $user and - (.body | test($pattern; "i")) and - (.created_at >= $after) - )] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - else - trigger_info=$(echo "$comments_json" | jq -r --arg pattern "$bot_pattern" --arg user "$current_user" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0] | "\(.created_at)|\(.id)" // empty - ') - fi - - if [[ -n "$trigger_info" && "$trigger_info" != "null|null" && "$trigger_info" != "|" ]]; then - echo "$trigger_info" - return 0 - fi - - return 1 -} - -# Get current user for trigger comment filtering -CURRENT_USER=$(get_current_user) -if [[ -z "$CURRENT_USER" ]]; then - echo "Warning: Could not determine current GitHub user" >&2 -fi - -# ======================================== -# Refresh latest_commit_at from PR Before Trigger Detection -# ======================================== -# Ensure trigger validation uses the CURRENT latest commit timestamp, -# not a stale value from state. This prevents old triggers from being accepted -# after new (non-force) commits are pushed. - -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -CURRENT_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || CURRENT_LATEST_COMMIT_AT="" - -# Track if new commits were detected (used to override REQUIRE_TRIGGER for cases 2/3) -NEW_COMMITS_DETECTED=false - -if [[ -n "$CURRENT_LATEST_COMMIT_AT" && "$CURRENT_LATEST_COMMIT_AT" != "$PR_LATEST_COMMIT_AT" ]]; then - echo "Updating latest_commit_at: $PR_LATEST_COMMIT_AT -> $CURRENT_LATEST_COMMIT_AT" >&2 - echo " Clearing stale trigger fields (new commits require new @bot mention)" >&2 - - # Persist to state file and clear trigger fields to prevent stale polling - # New commits mean old trigger is invalid - user must post new @bot comment - TEMP_FILE="${STATE_FILE}.commitrefresh.$$" - sed -e "s/^latest_commit_at:.*/latest_commit_at: $CURRENT_LATEST_COMMIT_AT/" \ - -e "s/^last_trigger_at:.*/last_trigger_at:/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id:/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - PR_LATEST_COMMIT_AT="$CURRENT_LATEST_COMMIT_AT" - PR_LAST_TRIGGER_AT="" - PR_TRIGGER_COMMENT_ID="" - NEW_COMMITS_DETECTED=true -fi - -# ALWAYS check for newer trigger comments and update last_trigger_at -# This ensures we use the most recent trigger, not a stale one -# IMPORTANT: Pass latest_commit_at to filter out old triggers (force push protection) -# After a force push, we need a NEW trigger comment, not one from before the push -echo "Detecting trigger comment timestamp from user '$CURRENT_USER'..." >&2 -if [[ -n "$PR_LATEST_COMMIT_AT" ]]; then - echo " (Filtering for comments after: $PR_LATEST_COMMIT_AT)" >&2 -fi -DETECTED_TRIGGER_INFO=$(detect_trigger_comment "$PR_NUMBER" "$CURRENT_USER" "$PR_LATEST_COMMIT_AT") || true -DETECTED_TRIGGER_AT="" -DETECTED_TRIGGER_COMMENT_ID="" - -if [[ -n "$DETECTED_TRIGGER_INFO" ]]; then - # Parse timestamp and comment ID from "timestamp|id" format - DETECTED_TRIGGER_AT="${DETECTED_TRIGGER_INFO%%|*}" - DETECTED_TRIGGER_COMMENT_ID="${DETECTED_TRIGGER_INFO##*|}" -fi - -if [[ -n "$DETECTED_TRIGGER_AT" ]]; then - # Check if detected trigger is newer than stored one - if [[ -z "$PR_LAST_TRIGGER_AT" ]] || [[ "$DETECTED_TRIGGER_AT" > "$PR_LAST_TRIGGER_AT" ]]; then - echo "Found trigger comment at: $DETECTED_TRIGGER_AT (ID: $DETECTED_TRIGGER_COMMENT_ID)" >&2 - if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - echo " (Updating from older trigger: $PR_LAST_TRIGGER_AT)" >&2 - fi - PR_LAST_TRIGGER_AT="$DETECTED_TRIGGER_AT" - PR_TRIGGER_COMMENT_ID="$DETECTED_TRIGGER_COMMENT_ID" - - # Persist to state file - TEMP_FILE="${STATE_FILE}.trigger.$$" - sed -e "s/^last_trigger_at:.*/last_trigger_at: $DETECTED_TRIGGER_AT/" \ - -e "s/^trigger_comment_id:.*/trigger_comment_id: $DETECTED_TRIGGER_COMMENT_ID/" \ - "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - - # Note: Claude eyes verification is done in the dedicated section below - # (after trigger detection) to ensure it runs on EVERY exit attempt - else - echo "Using existing trigger timestamp: $PR_LAST_TRIGGER_AT" >&2 - fi -fi - -# ======================================== -# Determine if Trigger is Required (needed for Claude eyes check below) -# ======================================== - -# Trigger requirement logic: -# - Round 0, startup_case 1: No trigger required (waiting for initial auto-reviews) -# - Round 0, startup_case 2/3: No trigger required (process existing comments) -# - Round 0, startup_case 4/5: Trigger required (new commits after reviews) -# - Round > 0: Always require trigger -# - NEW: If new commits detected during this poll, require trigger (overrides cases 2/3) - -REQUIRE_TRIGGER=false -if [[ "$PR_CURRENT_ROUND" -gt 0 ]]; then - # Subsequent rounds always require a trigger - REQUIRE_TRIGGER=true -elif [[ "$NEW_COMMITS_DETECTED" == "true" ]]; then - # New commits detected during this poll - require fresh trigger - # This overrides cases 2/3 to prevent reusing stale reviews - REQUIRE_TRIGGER=true -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1|2|3) - # Case 1: No comments yet - wait for initial auto-reviews - # Case 2/3: Comments exist - process them without requiring new trigger - REQUIRE_TRIGGER=false - ;; - 4|5) - # Case 4/5: All commented but new commits pushed - require re-trigger - REQUIRE_TRIGGER=true - ;; - *) - # Unknown case, default to not requiring trigger - REQUIRE_TRIGGER=false - ;; - esac -fi - -# ======================================== -# Step 8: Check for Codex +1 Reaction (After Trigger Detection) -# ======================================== -# IMPORTANT: This check runs AFTER trigger detection to ensure: -# 1. We use the correct timestamp that accounts for new commits -# 2. If trigger is required but missing, we don't approve based on old +1 - -# Check for codex bot in active bots -CODEX_IN_ACTIVE=false -for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "codex" ]]; then - CODEX_IN_ACTIVE=true - break - fi -done - -if [[ "$CODEX_IN_ACTIVE" == "true" ]]; then - # Skip +1 check if trigger is required but not yet posted - # (User needs to post @codex comment first) - if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - echo "Skipping Codex +1 check: trigger required but not yet posted" >&2 - else - echo "Round $PR_CURRENT_ROUND: Checking for Codex +1 reaction on PR..." >&2 - - # Determine the timestamp for filtering +1 reactions - # Use trigger timestamp if available, otherwise fall back to loop start time - CODEX_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - echo " (Checking for +1 after: $CODEX_REACTION_AFTER)" >&2 - - # Check for +1 reaction from Codex - CODEX_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$CODEX_REACTION_AFTER" 2>/dev/null) || CODEX_REACTION="" - - if [[ -n "$CODEX_REACTION" && "$CODEX_REACTION" != "null" ]]; then - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - echo "Codex +1 detected at $REACTION_AT - removing codex from active_bots" >&2 - - # Remove only codex from active_bots, keep other bots - declare -a NEW_ACTIVE_BOTS_AFTER_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_AFTER_THUMBSUP+=("$bot") - fi - done - - # If no other bots remain, loop is complete - if [[ ${#NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]} -eq 0 ]]; then - echo "Codex was the only active bot - PR loop approved!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - fi - - # Update active_bots in state file and continue with other bots - echo "Continuing with remaining bots: ${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[*]}" >&2 - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_AFTER_THUMBSUP[@]}") - - # Update state file - NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - # Replace active_bots section in state file - awk -v new_bots="$NEW_ACTIVE_BOTS_YAML" ' - /^active_bots:/ { - print "active_bots:" new_bots - in_bots=1 - next - } - in_bots && /^[[:space:]]+-/ { next } - in_bots && /^[a-zA-Z]/ { in_bots=0 } - { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi -fi - -# ======================================== -# Validate Trigger Comment Exists (Based on startup_case and round) -# ======================================== - -# Validate trigger FIRST, before Claude eyes check -# This ensures we don't waste time checking eyes on a stale trigger_comment_id - -if [[ "$REQUIRE_TRIGGER" == "true" && -z "$PR_LAST_TRIGGER_AT" ]]; then - # Determine startup case description for template - STARTUP_CASE_DESC="requires trigger comment" - case "${PR_STARTUP_CASE:-1}" in - 4) STARTUP_CASE_DESC="New commits after all bots reviewed" ;; - 5) STARTUP_CASE_DESC="New commits after partial bot reviews" ;; - *) STARTUP_CASE_DESC="Subsequent round requires trigger" ;; - esac - - FALLBACK_MSG="# Missing Trigger Comment - -No @bot mention found. Please run: gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review\"" - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/no-trigger-comment.md" "$FALLBACK_MSG" \ - "STARTUP_CASE=${PR_STARTUP_CASE:-1}" "STARTUP_CASE_DESC=$STARTUP_CASE_DESC" \ - "CURRENT_ROUND=$PR_CURRENT_ROUND" "BOT_MENTION_STRING=$PR_BOT_MENTION_STRING") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment - please @mention bots first" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Claude Eyes Verification (AFTER trigger validation) -# ======================================== - -# Verify Claude eyes ONLY AFTER trigger is confirmed to exist -# This prevents checking eyes on a stale trigger_comment_id -# Conditions: -# 1. Claude is configured AND -# 2. A trigger is actually required (REQUIRE_TRIGGER=true) AND -# 3. A trigger comment ID exists (PR_TRIGGER_COMMENT_ID from confirmed detection above) - -CLAUDE_CONFIGURED=false -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "claude" ]]; then - CLAUDE_CONFIGURED=true - break - fi -done - -if [[ "$CLAUDE_CONFIGURED" == "true" && "$REQUIRE_TRIGGER" == "true" ]]; then - # Use the confirmed trigger comment ID (updated by detect_trigger_comment above) - TRIGGER_ID_TO_CHECK="${PR_TRIGGER_COMMENT_ID:-}" - - if [[ -n "$TRIGGER_ID_TO_CHECK" ]]; then - echo "Verifying Claude eyes reaction on trigger comment (ID: $TRIGGER_ID_TO_CHECK)..." >&2 - - # Check for eyes reaction with 3x5s retry - # Pass --pr for fork PR support (reactions are on base repo) - EYES_REACTION=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" claude-eyes "$TRIGGER_ID_TO_CHECK" --pr "$PR_NUMBER" --retry 3 --delay 5 2>/dev/null) || EYES_REACTION="" - - if [[ -z "$EYES_REACTION" || "$EYES_REACTION" == "null" ]]; then - # Claude eyes verification is BLOCKING - error after 3x5s retries - FALLBACK_MSG="# Claude Bot Not Responding - -The Claude bot did not respond with an 'eyes' reaction within 15 seconds (3 x 5s retries). -Please verify the Claude bot is installed and configured for this repository." - REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/claude-eyes-timeout.md" "$FALLBACK_MSG" \ - "RETRY_COUNT=3" "TOTAL_WAIT_SECONDS=15") - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Claude bot not responding - check bot configuration" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 - else - echo "Claude eyes reaction confirmed!" >&2 - fi - else - # Trigger exists (PR_LAST_TRIGGER_AT is set) but no ID - should not happen normally - echo "Warning: Trigger exists but no comment ID for eyes verification" >&2 - fi -elif [[ "$CLAUDE_CONFIGURED" == "true" ]]; then - echo "Claude is configured but trigger not required (startup_case=${PR_STARTUP_CASE:-1}, round=$PR_CURRENT_ROUND) - skipping eyes verification" >&2 -fi - -# ======================================== -# Poll for New Bot Reviews (per-bot tracking) -# ======================================== - -# Poll ALL configured bots, not just active - allows re-adding approved bots if they post new issues -echo "Polling for new bot reviews on PR #$PR_NUMBER..." >&2 -echo "Configured bots: $PR_CONFIGURED_BOTS_DISPLAY" >&2 -echo "Active bots: $PR_ACTIVE_BOTS_DISPLAY" >&2 -echo "Poll interval: ${PR_POLL_INTERVAL}s, Timeout: ${PR_POLL_TIMEOUT}s per bot" >&2 - -POLL_SCRIPT="$PLUGIN_ROOT/scripts/poll-pr-reviews.sh" - -# Consistent file naming: round-N files all refer to round N -COMMENT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-comment.md" - -# Get timestamp for filtering based on startup_case and round -# - With trigger: use trigger timestamp (most accurate) -# - Round 0, Case 1: use started_at (waiting for new auto-reviews) -# - Round 0, Case 2/3: use epoch 0 to collect ALL existing comments -# - Round 0, Case 4/5: should have trigger (blocked above if missing) -AFTER_TIMESTAMP="" -USE_ALL_COMMENTS=false - -if [[ -n "$PR_LAST_TRIGGER_AT" ]]; then - # Always use trigger timestamp when available - AFTER_TIMESTAMP="$PR_LAST_TRIGGER_AT" - echo "Round $PR_CURRENT_ROUND: using trigger timestamp for --after: $AFTER_TIMESTAMP" >&2 -elif [[ "$PR_CURRENT_ROUND" -eq 0 ]]; then - case "${PR_STARTUP_CASE:-1}" in - 1) - # Case 1: No comments yet - filter by started_at to wait for new reviews - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case 1: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - 2|3) - # Case 2/3: Existing comments - collect ALL of them (no timestamp filter) - USE_ALL_COMMENTS=true - AFTER_TIMESTAMP="1970-01-01T00:00:00Z" # Epoch 0 to include all comments - echo "Round 0, Case ${PR_STARTUP_CASE}: collecting ALL existing bot comments" >&2 - ;; - *) - # Case 4/5 should have been blocked above, use started_at as fallback - AFTER_TIMESTAMP="${PR_STARTED_AT}" - echo "Round 0, Case ${PR_STARTUP_CASE}: using started_at for --after: $AFTER_TIMESTAMP" >&2 - ;; - esac -else - # Round N>0 with no trigger - this should have been blocked earlier - # but handle defensively by blocking here too - REASON="# Missing Trigger Comment - -No @bot mention comment found from you on this PR. - -Before polling for bot reviews, you must comment on the PR to trigger the bots. - -**Please run:** -\`\`\`bash -gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" -\`\`\` - -Then try exiting again." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Missing trigger comment" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Convert trigger timestamp to epoch for timeout anchoring -# Per-bot timeouts are measured from the TRIGGER time, not poll start time -# Special case: when USE_ALL_COMMENTS is true (startup cases 2/3), we're looking at -# ALL historical comments. In this case, anchor timeout to NOW (poll start time) -# rather than PR_STARTED_AT, which could be hours old and cause instant timeout. -if [[ "$USE_ALL_COMMENTS" == "true" ]]; then - # Use current time as timeout anchor for historical comment review - TRIGGER_EPOCH=$(date +%s) -else - TRIGGER_EPOCH=$(date -d "$AFTER_TIMESTAMP" +%s 2>/dev/null || date -j -f "%Y-%m-%dT%H:%M:%SZ" "$AFTER_TIMESTAMP" +%s 2>/dev/null || date +%s) -fi - -# Track which bots have responded and their individual timeouts -# IMPORTANT: Poll ALL configured bots (not just active) so we can detect when -# previously approved bots post new issues and re-add them to active_bots -# IMPORTANT: Timeouts are anchored to TRIGGER_EPOCH, not poll start time -# This ensures the 15-minute window is measured from when the @mention was posted -# -# NOTE: Using dynamic variable names instead of associative arrays (declare -A) -# for macOS Bash 3.2 compatibility. Associative arrays require Bash 4.0+. -# Helper functions to get/set values: -_sanitize_key() { echo "$1" | tr -c 'a-zA-Z0-9_' '_'; } -_map_get() { local var="$1_$(_sanitize_key "$2")"; echo "${!var}"; } -_map_set() { local var="$1_$(_sanitize_key "$2")"; eval "$var=\"$3\""; } -_map_isset() { local var="$1_$(_sanitize_key "$2")"; [[ -n "${!var+x}" ]]; } - -POLL_START_EPOCH=$(date +%s) -echo "Timeout anchor: trigger at epoch $TRIGGER_EPOCH (poll started at $POLL_START_EPOCH)" >&2 -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - _map_set "BOTS_RESPONDED" "$bot" "false" - _map_set "BOTS_TIMED_OUT" "$bot" "false" - # Use TRIGGER_EPOCH for timeout, not poll start - _map_set "BOTS_TIMEOUT_START" "$bot" "$TRIGGER_EPOCH" -done - -# Collect all new comments with deduplication by id -# Using dynamic variables: SEEN_ID_=1 -ALL_NEW_COMMENTS="[]" - -while true; do - CURRENT_TIME=$(date +%s) - - # Check if all configured bots have responded OR timed out (per-bot 15min timeout) - ALL_DONE=true - WAITING_BOTS="" - TIMED_OUT_BOTS="" - - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" == "true" ]]; then - continue # Bot already responded - fi - - # Check per-bot timeout (15 minutes each) - auto-remove after timeout - BOT_ELAPSED=$((CURRENT_TIME - $(_map_get BOTS_TIMEOUT_START "$bot"))) - if [[ $BOT_ELAPSED -ge $PR_POLL_TIMEOUT ]]; then - echo "Bot '$bot' timed out after ${PR_POLL_TIMEOUT}s - will be removed from active_bots" >&2 - _map_set "BOTS_TIMED_OUT" "$bot" "true" # Mark as timed out for later removal - if [[ -n "$TIMED_OUT_BOTS" ]]; then - TIMED_OUT_BOTS="${TIMED_OUT_BOTS}, ${bot}" - else - TIMED_OUT_BOTS="$bot" - fi - continue # Mark as done (timed out) - fi - - # Bot still waiting - ALL_DONE=false - if [[ -n "$WAITING_BOTS" ]]; then - WAITING_BOTS="${WAITING_BOTS},${bot}" - else - WAITING_BOTS="$bot" - fi - done - - if [[ "$ALL_DONE" == "true" ]]; then - if [[ -n "$TIMED_OUT_BOTS" ]]; then - echo "Polling complete. Timed out bots: $TIMED_OUT_BOTS" >&2 - else - echo "All configured bots have responded!" >&2 - fi - break - fi - - # Check for cancel signal - if [[ -f "$LOOP_DIR/.cancel-requested" ]]; then - echo "Cancel requested, exiting poll loop..." >&2 - exit 0 - fi - - TOTAL_ELAPSED=$((CURRENT_TIME - POLL_START_EPOCH)) - echo "Poll attempt (elapsed: ${TOTAL_ELAPSED}s, waiting for: $WAITING_BOTS)..." >&2 - - # Poll for new comments from bots we're still waiting for - POLL_RESULT=$("$POLL_SCRIPT" "$PR_NUMBER" --after "$AFTER_TIMESTAMP" --bots "$WAITING_BOTS" 2>/dev/null) || { - echo "Warning: Poll script failed, retrying..." >&2 - sleep "$PR_POLL_INTERVAL" - continue - } - - # Check which bots responded (check all configured bots) - # Poll script returns author names (e.g., chatgpt-codex-connector[bot]) - # We need to map them back to bot names (e.g., codex) - RESPONDED_BOTS=$(echo "$POLL_RESULT" | jq -r '.bots_responded[]' 2>/dev/null || true) - for responded_author in $RESPONDED_BOTS; do - # Map author name to bot name (e.g., chatgpt-codex-connector[bot] -> codex) - responded_bot=$(map_author_to_bot "$responded_author") - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - if [[ "$responded_bot" == "$bot" ]]; then - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - _map_set "BOTS_RESPONDED" "$bot" "true" - echo "Bot '$bot' has responded!" >&2 - fi - fi - done - done - - # Check for Codex +1 reaction during polling (any round) - # Codex may give +1 instead of commenting if no issues found - if [[ "$(_map_get BOTS_RESPONDED codex)" != "true" ]]; then - # Check if codex is a configured bot - CODEX_CONFIGURED=false - for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - [[ "$bot" == "codex" ]] && CODEX_CONFIGURED=true && break - done - - if [[ "$CODEX_CONFIGURED" == "true" ]]; then - # Determine timestamp for filtering - use trigger if available, else loop start - POLL_REACTION_AFTER="${PR_LAST_TRIGGER_AT:-$PR_STARTED_AT}" - - # Check for +1 reaction - THUMBSUP_RESULT=$("$PLUGIN_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup "$PR_NUMBER" --after "$POLL_REACTION_AFTER" 2>/dev/null) || THUMBSUP_RESULT="" - - if [[ -n "$THUMBSUP_RESULT" && "$THUMBSUP_RESULT" != "null" ]]; then - # +1 found - codex approved without issues - echo "Codex +1 reaction detected during polling - treating as approval!" >&2 - _map_set "BOTS_RESPONDED" "codex" "true" - - # Remove codex from active_bots - declare -a NEW_ACTIVE_BOTS_THUMBSUP=() - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" != "codex" ]]; then - NEW_ACTIVE_BOTS_THUMBSUP+=("$bot") - else - echo "Removing 'codex' from active_bots (approved via +1)" >&2 - fi - done - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_THUMBSUP[@]}") - - # Update active_bots in state file - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots have approved (codex via +1) - PR loop complete!" >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 - else - # Update state file with remaining bots - ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - # Use awk to replace active_bots section (portable across GNU/BSD) - TEMP_FILE="${STATE_FILE}.thumbsup.$$" - awk -v bots="$ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - fi - fi - fi - - # Collect new comments WITH DEDUPLICATION by comment id - NEW_COMMENTS=$(echo "$POLL_RESULT" | jq -r '.comments' 2>/dev/null || echo "[]") - if [[ "$NEW_COMMENTS" != "[]" && "$NEW_COMMENTS" != "null" ]]; then - # Deduplicate: only add comments we haven't seen before - UNIQUE_COMMENTS="[]" - while IFS= read -r comment_json; do - [[ -z "$comment_json" || "$comment_json" == "null" ]] && continue - COMMENT_ID=$(echo "$comment_json" | jq -r '.id // empty') - if [[ -n "$COMMENT_ID" ]] && ! _map_isset "SEEN_COMMENT_IDS" "$COMMENT_ID"; then - _map_set "SEEN_COMMENT_IDS" "$COMMENT_ID" "1" - UNIQUE_COMMENTS=$(echo "$UNIQUE_COMMENTS" | jq --argjson c "$comment_json" '. + [$c]') - fi - done < <(echo "$NEW_COMMENTS" | jq -c '.[]') - - if [[ "$UNIQUE_COMMENTS" != "[]" ]]; then - ALL_NEW_COMMENTS=$(echo "$ALL_NEW_COMMENTS $UNIQUE_COMMENTS" | jq -s 'add') - fi - fi - - sleep "$PR_POLL_INTERVAL" -done - -# ======================================== -# Handle No Responses (auto-remove timed-out bots) -# ======================================== - -COMMENT_COUNT=$(echo "$ALL_NEW_COMMENTS" | jq 'length' 2>/dev/null || echo "0") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - echo "No new bot reviews received." >&2 - - # Always remove timed-out bots from active_bots (per-bot timeout behavior) - # Don't wait for ALL bots to timeout - remove each bot as it times out - TIMED_OUT_COUNT=0 - WAITING_COUNT=0 - declare -a NEW_ACTIVE_BOTS_TIMEOUT=() - - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - TIMED_OUT_COUNT=$((TIMED_OUT_COUNT + 1)) - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - # Don't add to NEW_ACTIVE_BOTS_TIMEOUT - elif [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - WAITING_COUNT=$((WAITING_COUNT + 1)) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - else - # Bot responded - keep in active (will be processed if comments come in) - NEW_ACTIVE_BOTS_TIMEOUT+=("$bot") - fi - done - - # If any bots timed out, update the state file with remaining active bots - if [[ $TIMED_OUT_COUNT -gt 0 ]]; then - PR_ACTIVE_BOTS_ARRAY=("${NEW_ACTIVE_BOTS_TIMEOUT[@]}") - - # If no bots remain, loop is complete - if [[ ${#PR_ACTIVE_BOTS_ARRAY[@]} -eq 0 ]]; then - echo "All bots removed (timed out) - PR loop approved!" >&2 - # Build configured_bots YAML - TIMEOUT_CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - # Write updated state with empty active_bots before moving to approve-state.md - { - echo "---" - echo "current_round: $PR_CURRENT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${TIMEOUT_CONFIGURED_BOTS_YAML}" - echo "active_bots:" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: ${PR_LATEST_COMMIT_SHA:-}" - echo "latest_commit_at: ${PR_LATEST_COMMIT_AT:-}" - echo "last_trigger_at: ${PR_LAST_TRIGGER_AT:-}" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" - } > "$LOOP_DIR/approve-state.md" - rm -f "$STATE_FILE" - exit 0 - fi - - # Persist updated active_bots to state file (some bots timed out, others still waiting) - echo "Updating state file with ${#PR_ACTIVE_BOTS_ARRAY[@]} remaining active bots" >&2 - TIMEOUT_ACTIVE_BOTS_YAML=$(build_yaml_list "${PR_ACTIVE_BOTS_ARRAY[@]}") - TEMP_FILE="${STATE_FILE}.timeout.$$" - awk -v bots="$TIMEOUT_ACTIVE_BOTS_YAML" ' - /^active_bots:$/ { - print "active_bots:" bots - skip = 1 - next - } - skip && /^[a-z_]+:/ { skip = 0 } - skip && /^ - / { next } - !skip { print } - ' "$STATE_FILE" > "$TEMP_FILE" - mv "$TEMP_FILE" "$STATE_FILE" - fi - - # Build list of bots that didn't respond (only non-timed-out bots that are still waiting) - MISSING_BOTS="" - for bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$(_map_get BOTS_RESPONDED "$bot")" != "true" ]]; then - if [[ -n "$MISSING_BOTS" ]]; then - MISSING_BOTS="${MISSING_BOTS}, ${bot}" - else - MISSING_BOTS="$bot" - fi - fi - done - - REASON="# Bot Review Timeout - -No new reviews received from bots after polling. - -**Bots that did not respond:** $MISSING_BOTS - -This might mean: -- The bots haven't been triggered (did you comment on the PR?) -- The bots are slow to respond -- The bots are not enabled on this repository - -**Options:** -1. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -2. Wait and try exiting again -3. Cancel the loop: \`/humanize:cancel-pr-loop\`" - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Bot review timeout" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Save New Comments (correct file naming) -# ======================================== - -# Format comments grouped by bot (use configured bots for completeness) -cat > "$COMMENT_FILE" << EOF -# Bot Reviews (Round $NEXT_ROUND) - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Configured bots: $PR_CONFIGURED_BOTS_DISPLAY -Currently active: $PR_ACTIVE_BOTS_DISPLAY - ---- - -EOF - -# Group comments by ALL configured bots (not just active) -# This allows Codex to see when previously approved bots post new issues -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Map bot name to author name (e.g., codex -> chatgpt-codex-connector[bot]) - author=$(map_bot_to_author "$bot") - BOT_COMMENTS=$(echo "$ALL_NEW_COMMENTS" | jq -r --arg author "$author" ' - [.[] | select(.author == $author)] - ') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - - echo "$BOT_COMMENTS" | jq -r ' - .[] | - "### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' >> "$COMMENT_FILE" - else - echo "## Comments from ${author}" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "*No new comments from this bot.*" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - echo "---" >> "$COMMENT_FILE" - echo "" >> "$COMMENT_FILE" - fi -done - -echo "Comments saved to: $COMMENT_FILE" >&2 - -# ======================================== -# Run Local Codex Review of Bot Feedback -# ======================================== - -# Consistent file naming: all round-N files refer to round N -CHECK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-check.md" -FEEDBACK_FILE="$LOOP_DIR/round-${NEXT_ROUND}-pr-feedback.md" - -echo "Running local Codex review of bot feedback..." >&2 - -# Build Codex prompt with per-bot analysis -CODEX_PROMPT_FILE="$LOOP_DIR/round-${NEXT_ROUND}-codex-prompt.md" -BOT_REVIEW_CONTENT=$(cat "$COMMENT_FILE") - -# Build list of expected bots for Codex (all configured bots) -EXPECTED_BOTS_LIST="" -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - EXPECTED_BOTS_LIST="${EXPECTED_BOTS_LIST}- ${bot}\n" -done - -# Load goal tracker update template (with fallback) -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" -GOAL_TRACKER_TEMPLATE_VARS=( - "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" - "NEXT_ROUND=$NEXT_ROUND" -) -GOAL_TRACKER_UPDATE_FALLBACK="## Goal Tracker Update -After analysis, update the goal tracker at $GOAL_TRACKER_FILE with current status." - -GOAL_TRACKER_UPDATE_INSTRUCTIONS=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/codex-goal-tracker-update.md" "$GOAL_TRACKER_UPDATE_FALLBACK" "${GOAL_TRACKER_TEMPLATE_VARS[@]}") - -cat > "$CODEX_PROMPT_FILE" << EOF -# PR Review Validation (Per-Bot Analysis) - -Analyze the following bot reviews and determine approval status FOR EACH BOT. - -## Expected Bots -$(echo -e "$EXPECTED_BOTS_LIST") - -## Bot Reviews -$BOT_REVIEW_CONTENT - -## Your Task - -1. For EACH expected bot, analyze their review (if present) -2. Determine if each bot is: - - **APPROVE**: Bot explicitly approves or says "no issues found", "LGTM", "Didn't find any major issues", etc. - - **ISSUES**: Bot identifies specific problems that need fixing - - **NO_RESPONSE**: Bot did not post any new comments - -3. Output your analysis to $CHECK_FILE with this EXACT structure: - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| | APPROVE/ISSUES/NO_RESPONSE | | - -### Issues Found (if any) -List ALL specific issues from bots that have ISSUES status. - -### Approved Bots (to remove from active_bots) -List bots that should be removed from active tracking (those with APPROVE status). - -### Final Recommendation -- If ALL bots have APPROVE status: End with "APPROVE" on its own line -- If any bot has ISSUES status: End with "ISSUES_REMAINING" on its own line -- If any bot has NO_RESPONSE status: End with "WAITING_FOR_BOTS" on its own line -- If any bot response indicates usage/rate limits hit (e.g., "usage limits", "rate limit", "quota exceeded"): End with "USAGE_LIMIT_HIT" on its own line - -$GOAL_TRACKER_UPDATE_INSTRUCTIONS -EOF - -# Check if codex is available -if ! command -v codex &>/dev/null; then - REASON="# Codex Not Found - -The 'codex' command is not installed or not in PATH. -PR loop requires Codex CLI to validate bot reviews. - -**To fix:** -1. Install Codex CLI -2. Retry the exit - -Or use \`/humanize:cancel-pr-loop\` to cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex not found" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Run Codex -CODEX_ARGS=("-m" "$PR_CODEX_MODEL") -if [[ -n "$PR_CODEX_EFFORT" ]]; then - CODEX_ARGS+=("-c" "model_reasoning_effort=${PR_CODEX_EFFORT}") -fi - -# Determine automation flag based on environment variable -# Default: Use --full-auto (safe mode with sandbox) -# If HUMANIZE_CODEX_BYPASS_SANDBOX is "true" or "1": Use --dangerously-bypass-approvals-and-sandbox -CODEX_AUTO_FLAG="--full-auto" -if [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "true" ]] || [[ "${HUMANIZE_CODEX_BYPASS_SANDBOX:-}" == "1" ]]; then - CODEX_AUTO_FLAG="--dangerously-bypass-approvals-and-sandbox" -fi - -# Disable native hooks for nested Codex reviewer calls to prevent Stop-hook recursion. -# Probe whether the installed Codex CLI supports --disable; fall back to empty args -# so older builds do not fail with an unknown-argument error. -CODEX_DISABLE_HOOKS_ARGS=() -if codex --help 2>&1 | grep -q -- '--disable'; then - CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) -fi - -CODEX_ARGS+=("$CODEX_AUTO_FLAG" "-C" "$PROJECT_ROOT") - -CODEX_PROMPT_CONTENT=$(cat "$CODEX_PROMPT_FILE") -CODEX_EXIT_CODE=0 - -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$PR_CODEX_TIMEOUT" codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec "${CODEX_ARGS[@]}" - \ - > "$CHECK_FILE" 2>/dev/null || CODEX_EXIT_CODE=$? - -if [[ $CODEX_EXIT_CODE -ne 0 ]]; then - REASON="# Codex Review Failed - -Codex failed to validate bot reviews (exit code: $CODEX_EXIT_CODE). - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review failed" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -if [[ ! -s "$CHECK_FILE" ]]; then - REASON="# Codex Review Empty - -Codex produced no output when validating bot reviews. - -Please retry or cancel the loop." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Codex review empty" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# ======================================== -# Check Codex Result and Update active_bots -# ======================================== - -CHECK_CONTENT=$(cat "$CHECK_FILE") -LAST_LINE=$(echo "$CHECK_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) -LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - -# Use "APPROVE" marker to indicate all bots approved -if [[ "$LAST_LINE_TRIMMED" == "APPROVE" ]]; then - echo "All bots have approved! PR loop complete." >&2 - - # Update goal tracker BEFORE exit (idempotent - won't duplicate if Codex already updated) - if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # For APPROVE, we record 0 new issues - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" '{"issues": 0, "resolved": 0, "bot": "All"}' || true - fi - - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# Handle WAITING_FOR_BOTS - block exit but don't advance round -if [[ "$LAST_LINE_TRIMMED" == "WAITING_FOR_BOTS" ]]; then - echo "Some bots haven't responded yet. Blocking exit." >&2 - - REASON="# Waiting for Bot Responses - -Some bots haven't posted their reviews yet. - -**Options:** -1. Wait and try exiting again (bots may still be processing) -2. Comment on the PR to trigger bot reviews: - \`\`\`bash - gh pr comment $PR_NUMBER --body \"$PR_BOT_MENTION_STRING please review the latest changes\" - \`\`\` -3. Cancel the loop: \`/humanize:cancel-pr-loop\` - -**Note:** The round counter will NOT advance until all expected bots respond." - - jq -n --arg reason "$REASON" --arg msg "PR Loop: Waiting for bot responses" \ - '{"decision": "block", "reason": $reason, "systemMessage": $msg}' - exit 0 -fi - -# Handle USAGE_LIMIT_HIT - terminate loop gracefully (service limitation, not code issue) -if [[ "$LAST_LINE_TRIMMED" == "USAGE_LIMIT_HIT" ]]; then - echo "Bot usage/rate limits detected. Terminating PR loop." >&2 - - # Move state file to indicate usage limit termination - mv "$STATE_FILE" "$LOOP_DIR/usage-limit-state.md" - - # Let exit proceed without blocking - the loop is over due to external limitation - exit 0 -fi - -# ======================================== -# Update active_bots in state file -# ======================================== - -# Extract approved bots from Codex output and remove them from active_bots -# Look for "### Approved Bots" section -# NOTE: Use awk for more robust extraction that handles: -# - Section at end of file (no following ###) -# - Section immediately followed by ### (empty section) -APPROVED_SECTION=$(awk '/^### Approved Bots/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Extract bots with issues from Codex output (for re-add logic) -# Look for "### Per-Bot Status" table and find bots with ISSUES status -# NOTE: Use awk for more robust extraction -ISSUES_SECTION=$(awk '/^### Per-Bot Status/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" || true) - -# Build new active_bots array with re-add logic -# IMPORTANT: Process ALL configured bots, not just currently active ones -# This allows re-adding bots that were previously approved but now have new issues -declare -a NEW_ACTIVE_BOTS=() -# NOTE: Using _map_set/get instead of declare -A for macOS Bash 3.2 compatibility - -# First, identify bots with issues from Codex output -while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - # Extract bot name from table row: | botname | ISSUES | summary | - BOT_WITH_ISSUE=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_WITH_ISSUE" ]]; then - _map_set "BOTS_WITH_ISSUES" "$BOT_WITH_ISSUE" "true" - fi - fi - if echo "$line" | grep -qiE '\|[[:space:]]*APPROVE[[:space:]]*\|'; then - # Extract bot name from table row: | botname | APPROVE | summary | - BOT_APPROVED=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - if [[ -n "$BOT_APPROVED" ]]; then - _map_set "BOTS_APPROVED" "$BOT_APPROVED" "true" - fi - fi -done <<< "$ISSUES_SECTION" - -# Process ALL configured bots (not just currently active) -# This allows re-adding previously approved bots if they post new issues -# Also handle timed-out bots by removing them from active_bots -for bot in "${PR_CONFIGURED_BOTS_ARRAY[@]}"; do - # Check if bot timed out - remove from active_bots - if [[ "$(_map_get BOTS_TIMED_OUT "$bot")" == "true" ]]; then - echo "Removing '$bot' from active_bots (timed out after ${PR_POLL_TIMEOUT}s)" >&2 - continue # Don't add to NEW_ACTIVE_BOTS - fi - - if [[ "$(_map_get BOTS_WITH_ISSUES "$bot")" == "true" ]]; then - # Bot has issues - add to active list - if [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - echo "Bot '$bot' was previously approved but has new issues - re-adding to active" >&2 - else - echo "Bot '$bot' has issues - keeping active" >&2 - fi - NEW_ACTIVE_BOTS+=("$bot") - elif [[ "$(_map_get BOTS_APPROVED "$bot")" == "true" ]]; then - # Bot approved with no new issues - remove from active - echo "Removing '$bot' from active_bots (approved)" >&2 - elif echo "$APPROVED_SECTION" | grep -qi "$bot"; then - # Bot mentioned in approved section - remove - echo "Removing '$bot' from active_bots (in approved section)" >&2 - else - # Bot not mentioned in ISSUES or APPROVE - check if was active - WAS_ACTIVE=false - for active_bot in "${PR_ACTIVE_BOTS_ARRAY[@]}"; do - if [[ "$bot" == "$active_bot" ]]; then - WAS_ACTIVE=true - break - fi - done - if [[ "$WAS_ACTIVE" == "true" ]]; then - # Was active, not mentioned - keep active (NO_RESPONSE case) - echo "Bot '$bot' not mentioned - keeping active" >&2 - NEW_ACTIVE_BOTS+=("$bot") - fi - fi -done - -# Update state file with new active_bots and incremented round -TEMP_FILE="${STATE_FILE}.tmp.$$" - -# Build new YAML list for active_bots -NEW_ACTIVE_BOTS_YAML=$(build_yaml_list "${NEW_ACTIVE_BOTS[@]}") - -# ======================================== -# Update PR Goal Tracker -# ======================================== -# Extract issue counts from Codex output and update goal tracker -# Count issues by looking at the Issues Found section -ISSUES_FOUND_COUNT=0 -ISSUES_RESOLVED_COUNT=0 - -# Count issues in the "### Issues Found" section -if grep -q "### Issues Found" "$CHECK_FILE" 2>/dev/null; then - # Count list items: numbered (1., 2.) or bullet (-, *) in Issues Found section - # NOTE: Use awk for robust extraction (handles section at end of file) - ISSUES_FOUND_COUNT=$(awk '/^### Issues Found/{found=1; next} found && /^###/{exit} found{print}' "$CHECK_FILE" \ - | grep -cE '^[0-9]+\.|^- |^\* ' 2>/dev/null || echo "0") -fi - -# Count resolved issues: issues are only resolved when ALL bots approve -# NOTE: If we reach this point, not all bots have approved (full APPROVE case -# already triggered early exit above), so issues found in this round are NOT -# resolved yet. Setting resolved=0 prevents inflating the resolved count when -# only some bots approve while others report issues. -# ISSUES_RESOLVED_COUNT stays 0 - issues will be marked resolved in a future -# round when all bots approve and the early exit path records the resolution. - -# Call update_pr_goal_tracker if goal tracker exists -if [[ -f "$GOAL_TRACKER_FILE" ]]; then - # NOTE: Use lowercase "codex" to match configured bot names and avoid duplicate rows - # (Codex itself writes rows with lowercase names in goal tracker) - BOT_RESULTS_JSON="{\"bot\": \"codex\", \"issues\": $ISSUES_FOUND_COUNT, \"resolved\": $ISSUES_RESOLVED_COUNT}" - update_pr_goal_tracker "$GOAL_TRACKER_FILE" "$NEXT_ROUND" "$BOT_RESULTS_JSON" || true -fi - -# Build YAML list for configured_bots (never changes) -CONFIGURED_BOTS_YAML=$(build_yaml_list "${PR_CONFIGURED_BOTS_ARRAY[@]}") - -# Update latest_commit_sha to current HEAD (for force push detection in next round) -NEW_LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD 2>/dev/null) || NEW_LATEST_COMMIT_SHA="$PR_LATEST_COMMIT_SHA" -# NOTE: Sort by committedDate before selecting last - API order is not guaranteed -# NOTE: Uses PR_LOOKUP_REPO for fork PR support -NEW_LATEST_COMMIT_AT=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json commits \ - --jq '.commits | sort_by(.committedDate) | last | .committedDate' 2>/dev/null) || NEW_LATEST_COMMIT_AT="$PR_LATEST_COMMIT_AT" - -# Re-evaluate startup_case dynamically -# This allows case to change as bot comments arrive -BOTS_COMMA_LIST=$(IFS=','; echo "${PR_CONFIGURED_BOTS_ARRAY[*]}") -NEW_REVIEWER_STATUS=$("$PLUGIN_ROOT/scripts/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || NEW_REVIEWER_STATUS="" -if [[ -n "$NEW_REVIEWER_STATUS" ]]; then - NEW_STARTUP_CASE=$(echo "$NEW_REVIEWER_STATUS" | jq -r '.case') - if [[ -n "$NEW_STARTUP_CASE" && "$NEW_STARTUP_CASE" != "null" ]]; then - if [[ "$NEW_STARTUP_CASE" != "${PR_STARTUP_CASE:-1}" ]]; then - echo "Startup case changed: ${PR_STARTUP_CASE:-1} -> $NEW_STARTUP_CASE" >&2 - fi - PR_STARTUP_CASE="$NEW_STARTUP_CASE" - fi -fi - -# Create updated state file (with last_trigger_at cleared - will be set when next @mention posted) -{ - echo "---" - echo "current_round: $NEXT_ROUND" - echo "max_iterations: $PR_MAX_ITERATIONS" - echo "pr_number: $PR_NUMBER" - echo "start_branch: $PR_START_BRANCH" - echo "configured_bots:${CONFIGURED_BOTS_YAML}" - echo "active_bots:${NEW_ACTIVE_BOTS_YAML}" - echo "codex_model: $PR_CODEX_MODEL" - echo "codex_effort: $PR_CODEX_EFFORT" - echo "codex_timeout: $PR_CODEX_TIMEOUT" - echo "poll_interval: $PR_POLL_INTERVAL" - echo "poll_timeout: $PR_POLL_TIMEOUT" - echo "started_at: $PR_STARTED_AT" - echo "startup_case: ${PR_STARTUP_CASE:-1}" - echo "latest_commit_sha: $NEW_LATEST_COMMIT_SHA" - echo "latest_commit_at: ${NEW_LATEST_COMMIT_AT:-}" - echo "last_trigger_at:" - echo "trigger_comment_id: ${PR_TRIGGER_COMMENT_ID:-}" - echo "---" -} > "$TEMP_FILE" -mv "$TEMP_FILE" "$STATE_FILE" - -# Check if all bots are now approved -if [[ ${#NEW_ACTIVE_BOTS[@]} -eq 0 ]]; then - echo "All bots have now approved! PR loop complete." >&2 - mv "$STATE_FILE" "$LOOP_DIR/approve-state.md" - exit 0 -fi - -# ======================================== -# Issues Remaining - Continue Loop -# ======================================== - -# Build new bot mention string -NEW_BOT_MENTION_STRING=$(build_bot_mention_string "${NEW_ACTIVE_BOTS[@]}") - -# Create feedback file for next round -cat > "$FEEDBACK_FILE" << EOF -# PR Loop Feedback (Round $NEXT_ROUND) - -## Bot Review Analysis - -$CHECK_CONTENT - ---- - -## Your Task - -Address the issues identified above: - -1. Read and understand each issue -2. Make the necessary code changes -3. Commit and push your changes -4. Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment $PR_NUMBER --body "$NEW_BOT_MENTION_STRING please review the latest changes" - \`\`\` -5. Write your resolution summary to: $LOOP_DIR/round-${NEXT_ROUND}-pr-resolve.md - ---- - -**Remaining active bots:** $(IFS=', '; echo "${NEW_ACTIVE_BOTS[*]}") -**Round:** $NEXT_ROUND of $PR_MAX_ITERATIONS -EOF - -SYSTEM_MSG="PR Loop: Round $NEXT_ROUND/$PR_MAX_ITERATIONS - Bot reviews identified issues" - -jq -n \ - --arg reason "$(cat "$FEEDBACK_FILE")" \ - --arg msg "$SYSTEM_MSG" \ - '{ - "decision": "block", - "reason": $reason, - "systemMessage": $msg - }' - -exit 0 diff --git a/prompt-template/block/force-push-detected.md b/prompt-template/block/force-push-detected.md deleted file mode 100644 index b09feb50..00000000 --- a/prompt-template/block/force-push-detected.md +++ /dev/null @@ -1,17 +0,0 @@ -# Force Push Detected - -A **force push** has been detected on this PR. The commit SHA changed from `{{OLD_COMMIT}}` to `{{NEW_COMMIT}}` in a non-fast-forward manner. - -Force pushes reset the review state because the commit history has been rewritten. - -**Required Actions**: -1. The PR loop has updated its tracking to the new commit SHA -2. You must post a new trigger comment to restart the review cycle -3. Post a comment mentioning {{BOT_MENTION_STRING}} to trigger a new review - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/no-trigger-comment.md b/prompt-template/block/no-trigger-comment.md deleted file mode 100644 index 9f886f8e..00000000 --- a/prompt-template/block/no-trigger-comment.md +++ /dev/null @@ -1,17 +0,0 @@ -# No Trigger Comment Found - -The PR loop is waiting for a **trigger comment** before it can proceed. - -**Why is this required?** -- Startup case {{STARTUP_CASE}}: {{STARTUP_CASE_DESC}} -- Round {{CURRENT_ROUND}} requires explicit trigger to start the review cycle - -**Required Action**: -Post a comment on the PR mentioning {{BOT_MENTION_STRING}} to trigger a review. - -**Example trigger comment**: -``` -{{BOT_MENTION_STRING}} Please review these changes. -``` - -After posting a trigger comment, you may attempt to continue. diff --git a/prompt-template/block/pr-loop-prompt-write.md b/prompt-template/block/pr-loop-prompt-write.md deleted file mode 100644 index 625629ee..00000000 --- a/prompt-template/block/pr-loop-prompt-write.md +++ /dev/null @@ -1,9 +0,0 @@ -# PR Loop File Write Blocked - -You cannot write to `round-*-pr-comment.md` or `round-*-prompt.md` files in `.humanize/pr-loop/`. - -These files are generated by the PR loop system: -- `round-*-pr-comment.md`: Contains PR comments fetched from GitHub -- `round-*-prompt.md`: Contains instructions for the current round - -Both are read-only and managed by the system. diff --git a/prompt-template/block/pr-loop-state-modification.md b/prompt-template/block/pr-loop-state-modification.md deleted file mode 100644 index 5e17692d..00000000 --- a/prompt-template/block/pr-loop-state-modification.md +++ /dev/null @@ -1,12 +0,0 @@ -# PR Loop State File Modification Blocked - -You cannot modify `state.md` in `.humanize/pr-loop/`. This file is managed by the PR loop system. - -The state file contains: -- Current round number -- PR number and branch -- Active bots configuration -- Codex configuration -- Polling settings - -Modifying it would corrupt the PR loop state. diff --git a/prompt-template/pr-loop/codex-goal-tracker-update.md b/prompt-template/pr-loop/codex-goal-tracker-update.md deleted file mode 100644 index 65ba5379..00000000 --- a/prompt-template/pr-loop/codex-goal-tracker-update.md +++ /dev/null @@ -1,64 +0,0 @@ -## Goal Tracker Update Instructions - -After completing your analysis, update the goal tracker file at `{{GOAL_TRACKER_FILE}}`: - -### Required Updates - -1. **Add row to Issue Summary table:** - - Add a new row for this round with your review results - - Format: `| {{NEXT_ROUND}} | | | | |` - - Status should be: "Issues Found", "All Resolved", or "Approved" - -2. **Update Total Statistics section:** - - Increment `Total Issues Found` by number of new issues discovered - - Increment `Total Issues Resolved` by number of issues you verified as fixed - - Update `Remaining` to be (Total Found - Total Resolved) - -3. **Add Issue Log entry for this round:** - - Create heading: `### Round {{NEXT_ROUND}}` - - List each issue or approval with details - - Include reviewer name and brief description - -### Example Goal Tracker Update - -If bot "claude" reported 2 new issues and "codex" found 0 issues (approved): - -```markdown -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | claude | 2 | 0 | Issues Found | -| 1 | codex | 0 | 0 | Approved | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: 2026-01-18T10:00:00Z -Startup Case: 1 - -### Round 1 -**claude** found 2 issues: -1. Missing error handling in auth.ts -2. Test coverage below 80% - -**codex** approved - no issues found. -``` - -### Important Rules - -- Keep the file structure intact -- Use proper markdown table formatting -- Only update the sections mentioned above (Issue Summary, Total Statistics, Issue Log) -- Do not modify the header sections (PR Information, Ultimate Goal) -- Add to existing tables, do not replace them -- Each reviewer gets a separate row in Issue Summary diff --git a/prompt-template/pr-loop/critical-requirements-has-comments.md b/prompt-template/pr-loop/critical-requirements-has-comments.md deleted file mode 100644 index 4855a306..00000000 --- a/prompt-template/pr-loop/critical-requirements-has-comments.md +++ /dev/null @@ -1,24 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/critical-requirements-no-comments.md b/prompt-template/pr-loop/critical-requirements-no-comments.md deleted file mode 100644 index 1b043501..00000000 --- a/prompt-template/pr-loop/critical-requirements-no-comments.md +++ /dev/null @@ -1,21 +0,0 @@ - -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -=========================================== diff --git a/prompt-template/pr-loop/goal-tracker-initial.md b/prompt-template/pr-loop/goal-tracker-initial.md deleted file mode 100644 index d95e9936..00000000 --- a/prompt-template/pr-loop/goal-tracker-initial.md +++ /dev/null @@ -1,33 +0,0 @@ -# PR Review Goal Tracker - -## PR Information - -- **PR Number:** #{{PR_NUMBER}} -- **Branch:** {{START_BRANCH}} -- **Started:** {{STARTED_AT}} -- **Monitored Bots:** {{ACTIVE_BOTS_DISPLAY}} -- **Startup Case:** {{STARTUP_CASE}} - -## Ultimate Goal - -Get all monitored bot reviewers ({{ACTIVE_BOTS_DISPLAY}}) to approve this PR. - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -Started: {{STARTED_AT}} -Startup Case: {{STARTUP_CASE}} diff --git a/prompt-template/pr-loop/round-0-header.md b/prompt-template/pr-loop/round-0-header.md deleted file mode 100644 index e694ce41..00000000 --- a/prompt-template/pr-loop/round-0-header.md +++ /dev/null @@ -1,15 +0,0 @@ -Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: - diff --git a/prompt-template/pr-loop/round-0-task-has-comments.md b/prompt-template/pr-loop/round-0-task-has-comments.md deleted file mode 100644 index 37f3e9f1..00000000 --- a/prompt-template/pr-loop/round-0-task-has-comments.md +++ /dev/null @@ -1,43 +0,0 @@ - ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - ```bash - gh pr comment {{PR_NUMBER}} --body "{{BOT_MENTION_STRING}} please review the latest changes" - ``` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/prompt-template/pr-loop/round-0-task-no-comments.md b/prompt-template/pr-loop/round-0-task-no-comments.md deleted file mode 100644 index 6ed6d9a2..00000000 --- a/prompt-template/pr-loop/round-0-task-no-comments.md +++ /dev/null @@ -1,30 +0,0 @@ - ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends diff --git a/scripts/cancel-pr-loop.sh b/scripts/cancel-pr-loop.sh deleted file mode 100755 index 388b536f..00000000 --- a/scripts/cancel-pr-loop.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env bash -# -# Cancel script for cancel-pr-loop -# -# Cancels an active PR loop by creating a cancel signal file -# and renaming the state file to cancel-state.md. -# -# Usage: -# cancel-pr-loop.sh [--force] -# -# Exit codes: -# 0 - Successfully cancelled -# 1 - No active loop found -# 2 - Reserved for future use (e.g., confirmation required) -# 3 - Other error -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -FORCE="false" - -while [[ $# -gt 0 ]]; do - case $1 in - --force) - FORCE="true" - shift - ;; - -h|--help) - cat << 'HELP_EOF' -cancel-pr-loop.sh - Cancel active PR loop - -USAGE: - cancel-pr-loop.sh [OPTIONS] - -OPTIONS: - --force Force cancel (currently has no additional effect) - -h, --help Show this help message - -EXIT CODES: - 0 - Successfully cancelled - 1 - No active loop found - 3 - Other error - -DESCRIPTION: - Cancels the active PR loop by: - 1. Finding the most recent PR loop directory - 2. Creating a .cancel-requested signal file - 3. Renaming state.md to cancel-state.md - -NOTE: - This command only affects PR loops (.humanize/pr-loop/). - RLCR loops (.humanize/rlcr/) are not affected. -HELP_EOF - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 3 - ;; - esac -done - -# ======================================== -# Find Loop Directory -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Find newest loop directory (different from RLCR - uses pr-loop instead of rlcr) -LOOP_DIR=$(ls -1d "$LOOP_BASE_DIR"/*/ 2>/dev/null | sort -r | head -1) || true - -if [[ -z "$LOOP_DIR" ]]; then - echo "NO_LOOP" - echo "No active PR loop found." - exit 1 -fi - -# ======================================== -# Check Loop State -# ======================================== - -STATE_FILE="$LOOP_DIR/state.md" -CANCEL_SIGNAL="$LOOP_DIR/.cancel-requested" - -if [[ -f "$STATE_FILE" ]]; then - LOOP_STATE="ACTIVE" - ACTIVE_STATE_FILE="$STATE_FILE" -else - echo "NO_ACTIVE_LOOP" - echo "No active PR loop found. The loop directory exists but no active state file is present." - exit 1 -fi - -# ======================================== -# Extract Round Info -# ======================================== - -# Extract current_round and max_iterations from the state file -CURRENT_ROUND=$(grep -E '^current_round:' "$ACTIVE_STATE_FILE" | sed 's/^current_round:[[:space:]]*//' | tr -d ' ') -MAX_ITERATIONS=$(grep -E '^max_iterations:' "$ACTIVE_STATE_FILE" | sed 's/^max_iterations:[[:space:]]*//' | tr -d ' ') -PR_NUMBER=$(grep -E '^pr_number:' "$ACTIVE_STATE_FILE" | sed 's/^pr_number:[[:space:]]*//' | tr -d ' ') - -# Default values if not found -CURRENT_ROUND=${CURRENT_ROUND:-"?"} -MAX_ITERATIONS=${MAX_ITERATIONS:-"?"} -PR_NUMBER=${PR_NUMBER:-"?"} - -# ======================================== -# Perform Cancellation -# ======================================== - -# Create cancel signal file -touch "$CANCEL_SIGNAL" - -# Rename state file to cancel-state.md -mv "$ACTIVE_STATE_FILE" "$LOOP_DIR/cancel-state.md" - -# ======================================== -# Output Result -# ======================================== - -echo "CANCELLED" -echo "Cancelled PR loop for PR #$PR_NUMBER (was at round $CURRENT_ROUND of $MAX_ITERATIONS)." -echo "State preserved as cancel-state.md" - -exit 0 diff --git a/scripts/check-bot-reactions.sh b/scripts/check-bot-reactions.sh deleted file mode 100755 index e14861a6..00000000 --- a/scripts/check-bot-reactions.sh +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env bash -# -# Check bot reactions on PR or comments -# -# Detects: -# - Codex +1 (thumbs-up) reaction on PR body (first round approval) -# - Claude eyes reaction on trigger comments (confirmation of receipt) -# -# Usage: -# check-bot-reactions.sh codex-thumbsup [--after ] -# check-bot-reactions.sh claude-eyes [--retry ] [--delay ] -# -# Exit codes: -# 0 - Reaction found -# 1 - Reaction not found (or timeout after all retries) -# 2 - Error (API failure, missing arguments, etc.) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=30 - -# Default retry settings for claude eyes -DEFAULT_MAX_RETRIES=3 -DEFAULT_RETRY_DELAY=5 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Helper Functions -# ======================================== - -show_help() { - cat << 'EOF' -check-bot-reactions.sh - Detect bot reactions on GitHub PRs and comments - -USAGE: - check-bot-reactions.sh codex-thumbsup [--after ] - check-bot-reactions.sh claude-eyes [--retry ] [--delay ] - -COMMANDS: - codex-thumbsup Check for Codex +1 reaction on PR body - Returns reaction created_at timestamp if found - --after: Only count reaction if created after this timestamp - - claude-eyes Check for Claude eyes reaction on a specific comment - Retries with delay if not found immediately - --retry: Number of attempts (default: 3) - --delay: Seconds between attempts (default: 5) - -EXIT CODES: - 0 - Reaction found (outputs JSON with reaction info) - 1 - Reaction not found - 2 - Error (API failure, etc.) - -EXAMPLES: - # Check if Codex approved PR #123 with thumbs-up - check-bot-reactions.sh codex-thumbsup 123 - - # Check if Codex approved after loop started - check-bot-reactions.sh codex-thumbsup 123 --after "2026-01-18T10:00:00Z" - - # Wait for Claude eyes reaction on comment (15 seconds total) - check-bot-reactions.sh claude-eyes 12345678 --retry 3 --delay 5 -EOF - exit 0 -} - -# ======================================== -# Parse Arguments -# ======================================== - -COMMAND="${1:-}" -shift || true - -if [[ -z "$COMMAND" ]] || [[ "$COMMAND" == "-h" ]] || [[ "$COMMAND" == "--help" ]]; then - show_help -fi - -case "$COMMAND" in - codex-thumbsup) - # Parse codex-thumbsup arguments - PR_NUMBER="" - AFTER_TIMESTAMP="" - - while [[ $# -gt 0 ]]; do - case $1 in - --after) - AFTER_TIMESTAMP="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for codex-thumbsup: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required for codex-thumbsup" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations - # Try current repo first - if PR exists there, use it - PR_BASE_REPO="" - if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Fetch PR reactions (with pagination to catch all reactions) - # The PR body is treated as issue #PR_NUMBER, so we use the issues reactions endpoint - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - echo "Error: Failed to fetch PR reactions" >&2 - exit 2 - } - - # Look for Codex +1 reaction - # User login: chatgpt-codex-connector[bot] - CODEX_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "chatgpt-codex-connector[bot]" and .content == "+1")] | .[0] // empty - ') - - if [[ "$CODEX_REACTION" == "null" ]] || [[ -z "$CODEX_REACTION" ]]; then - # No +1 reaction from Codex - exit 1 - fi - - REACTION_AT=$(echo "$CODEX_REACTION" | jq -r '.created_at') - - # If --after specified, check timestamp - if [[ -n "$AFTER_TIMESTAMP" ]]; then - if [[ "$REACTION_AT" < "$AFTER_TIMESTAMP" ]]; then - # Reaction exists but is older than specified timestamp - exit 1 - fi - fi - - # Output reaction info - echo "$CODEX_REACTION" - exit 0 - ;; - - claude-eyes) - # Parse claude-eyes arguments - COMMENT_ID="" - PR_NUMBER="" - MAX_RETRIES="$DEFAULT_MAX_RETRIES" - RETRY_DELAY="$DEFAULT_RETRY_DELAY" - - while [[ $# -gt 0 ]]; do - case $1 in - --retry) - MAX_RETRIES="$2" - shift 2 - ;; - --delay) - RETRY_DELAY="$2" - shift 2 - ;; - --pr) - PR_NUMBER="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option for claude-eyes: $1" >&2 - exit 2 - ;; - *) - if [[ -z "$COMMENT_ID" ]]; then - COMMENT_ID="$1" - else - echo "Error: Multiple comment IDs specified" >&2 - exit 2 - fi - shift - ;; - esac - done - - if [[ -z "$COMMENT_ID" ]]; then - echo "Error: Comment ID is required for claude-eyes" >&2 - exit 2 - fi - - # IMPORTANT: Use the PR's base repository for API calls (for fork PR support) - # Reactions are on the base repo, not the fork - # Strategy: Try current repo first, check if PR exists there, then try parent repo for forks - - # Step 1: Get current repo - CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - - # Step 2: Determine the correct repo for PR operations (if PR number provided) - PR_BASE_REPO="" - if [[ -n "$PR_NUMBER" && -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi - fi - - # Step 3: If PR not found in current repo and PR number provided, try parent repo (fork case) - if [[ -z "$PR_BASE_REPO" && -n "$PR_NUMBER" ]]; then - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi - fi - - # Step 4: Final fallback to current repo - if [[ -z "$PR_BASE_REPO" ]]; then - PR_BASE_REPO="$CURRENT_REPO" - fi - - # Retry loop for eyes reaction - for attempt in $(seq 1 "$MAX_RETRIES"); do - # Wait before checking (gives Claude time to react) - sleep "$RETRY_DELAY" - - # Fetch comment reactions (with pagination to catch all reactions) - # IMPORTANT: Use PR_BASE_REPO for fork PR support - # IMPORTANT: Use --paginate to fetch all reactions (default is 30 per page) - # NOTE: --paginate with --jq emits one array per page; use jq -s 'add' to merge them - REACTIONS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/comments/$COMMENT_ID/reactions" \ - --paginate --jq '[.[] | {user: .user.login, content: .content, created_at: .created_at}]' 2>/dev/null \ - | jq -s 'add // []') || { - # API error - continue to next attempt - continue - } - - # Look for Claude eyes reaction - # User login: claude[bot] - CLAUDE_REACTION=$(echo "$REACTIONS" | jq -r ' - [.[] | select(.user == "claude[bot]" and .content == "eyes")] | .[0] // empty - ') - - if [[ "$CLAUDE_REACTION" != "null" ]] && [[ -n "$CLAUDE_REACTION" ]]; then - # Found eyes reaction - echo "$CLAUDE_REACTION" - exit 0 - fi - - # Not found yet, will retry if attempts remain - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Attempt $attempt/$MAX_RETRIES: Eyes not found, retrying..." >&2 - fi - done - - # All attempts exhausted - echo "No eyes reaction found after $MAX_RETRIES attempts ($(( MAX_RETRIES * RETRY_DELAY )) seconds total)" >&2 - exit 1 - ;; - - *) - echo "Error: Unknown command: $COMMAND" >&2 - echo "Use --help for usage information" >&2 - exit 2 - ;; -esac diff --git a/scripts/check-pr-reviewer-status.sh b/scripts/check-pr-reviewer-status.sh deleted file mode 100755 index e4915c99..00000000 --- a/scripts/check-pr-reviewer-status.sh +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env bash -# -# Check PR reviewer status for startup case determination -# -# Analyzes reviewer comments on ENTIRE PR (not just after latest commit) -# to determine which startup case applies. -# -# Usage: -# check-pr-reviewer-status.sh --bots -# -# Output (JSON): -# { -# "case": 1-5, -# "reviewers_commented": ["claude"], -# "reviewers_missing": ["codex"], -# "latest_commit_sha": "abc123", -# "latest_commit_at": "2026-01-18T12:00:00Z", -# "newest_review_at": "2026-01-18T11:00:00Z", -# "has_commits_after_reviews": true -# } -# -# Cases: -# 1 - No reviewer comments at all -# 2 - Some (not all) reviewers commented -# 3 - All reviewers commented, no new commits after -# 4 - All reviewers commented, new commits after (needs re-review) -# 5 - All reviewers commented, new commits after (like case 4, for future distinction) - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Timeout for gh operations -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -BOT_LIST="" - -while [[ $# -gt 0 ]]; do - case $1 in - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOT_LIST="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Multiple PR numbers specified" >&2 - exit 1 - fi - shift - ;; - esac -done - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - echo "Usage: check-pr-reviewer-status.sh --bots " >&2 - exit 1 -fi - -if [[ -z "$BOT_LIST" ]]; then - echo "Error: --bots is required" >&2 - echo "Usage: check-pr-reviewer-status.sh --bots " >&2 - exit 1 -fi - -# ======================================== -# Bot Name Mapping -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# ======================================== -# Fetch PR Data -# ======================================== - -# Parse bot list into array -IFS=',' read -ra BOTS <<< "$BOT_LIST" - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if [[ -n "$CURRENT_REPO" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Warning: Could not resolve PR base repository, using current repo" >&2 - PR_BASE_REPO="$CURRENT_REPO" -fi - -# Get latest commit info (use --repo for fork support) -COMMIT_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_BASE_REPO" \ - --json headRefOid,commits \ - --jq '{sha: .headRefOid, date: (.commits | sort_by(.committedDate) | last | .committedDate)}' 2>/dev/null) || { - echo "Error: Failed to fetch PR commit info" >&2 - exit 1 -} - -LATEST_COMMIT_SHA=$(echo "$COMMIT_INFO" | jq -r '.sha') -LATEST_COMMIT_AT=$(echo "$COMMIT_INFO" | jq -r '.date') - -# Fetch all comments (issue comments, review comments, and PR review submissions) -# Using --paginate to handle PRs with many comments -# IMPORTANT: Use PR_BASE_REPO for fork PR support -ISSUE_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || ISSUE_COMMENTS="[]" - -REVIEW_COMMENTS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/comments" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .created_at, body: .body}]' 2>/dev/null) || REVIEW_COMMENTS="[]" - -# Also fetch PR review submissions (APPROVE, REQUEST_CHANGES, COMMENT reviews) -# These are different from inline review comments and may be the only feedback from some bots -PR_REVIEWS=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/pulls/$PR_NUMBER/reviews" \ - --paginate --jq '[.[] | {author: .user.login, created_at: .submitted_at, body: .body, state: .state}]' 2>/dev/null) || PR_REVIEWS="[]" - -# Combine all comments and reviews -ALL_COMMENTS=$(echo "$ISSUE_COMMENTS $REVIEW_COMMENTS $PR_REVIEWS" | jq -s 'add // []') - -# ======================================== -# Analyze Comments by Bot -# ======================================== - -declare -a REVIEWERS_COMMENTED=() -declare -a REVIEWERS_MISSING=() -declare -a REVIEWERS_STALE=() # Bots whose latest review is before latest commit -NEWEST_REVIEW_AT="" - -for bot in "${BOTS[@]}"; do - author=$(map_bot_to_author "$bot") - - # Check if this bot has any comments - BOT_COMMENTS=$(echo "$ALL_COMMENTS" | jq --arg author "$author" '[.[] | select(.author == $author)]') - BOT_COUNT=$(echo "$BOT_COMMENTS" | jq 'length') - - if [[ "$BOT_COUNT" -gt 0 ]]; then - REVIEWERS_COMMENTED+=("$bot") - - # Track this bot's newest review timestamp - BOT_NEWEST=$(echo "$BOT_COMMENTS" | jq -r 'sort_by(.created_at) | reverse | .[0].created_at') - - # Check if this bot's review is stale (before latest commit) - # This is per-bot, not global - a bot's review can be stale even if another bot reviewed later - if [[ -n "$LATEST_COMMIT_AT" && -n "$BOT_NEWEST" && "$LATEST_COMMIT_AT" > "$BOT_NEWEST" ]]; then - REVIEWERS_STALE+=("$bot") - fi - - # Track global newest for output (still useful for debugging) - if [[ -z "$NEWEST_REVIEW_AT" ]] || [[ "$BOT_NEWEST" > "$NEWEST_REVIEW_AT" ]]; then - NEWEST_REVIEW_AT="$BOT_NEWEST" - fi - else - REVIEWERS_MISSING+=("$bot") - fi -done - -# ======================================== -# Determine Case -# ======================================== - -CASE=0 -HAS_COMMITS_AFTER_REVIEWS=false - -# Count how many bots have commented -COMMENTED_COUNT=${#REVIEWERS_COMMENTED[@]} -MISSING_COUNT=${#REVIEWERS_MISSING[@]} -STALE_COUNT=${#REVIEWERS_STALE[@]} -TOTAL_BOTS=${#BOTS[@]} - -if [[ $COMMENTED_COUNT -eq 0 ]]; then - # Case 1: No reviewer comments at all - CASE=1 -elif [[ $MISSING_COUNT -gt 0 ]]; then - # Some (not all) reviewers commented - # Check if ANY bot that commented has a stale review (per-bot check) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 5: Some reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=5 - else - # Case 2: Some reviewers commented, all reviews are fresh - CASE=2 - fi -else - # All reviewers have commented - # Check if ANY bot has a stale review (per-bot check, not global newest) - if [[ $STALE_COUNT -gt 0 ]]; then - # Case 4: All reviewers commented, but at least one has stale review - HAS_COMMITS_AFTER_REVIEWS=true - CASE=4 - else - # Case 3: All commented, all reviews are fresh - CASE=3 - fi -fi - -# ======================================== -# Output JSON -# ======================================== - -# Build JSON arrays -COMMENTED_JSON=$(printf '%s\n' "${REVIEWERS_COMMENTED[@]}" | jq -R . | jq -s .) -MISSING_JSON=$(printf '%s\n' "${REVIEWERS_MISSING[@]}" | jq -R . | jq -s .) - -# Handle empty arrays -[[ ${#REVIEWERS_COMMENTED[@]} -eq 0 ]] && COMMENTED_JSON="[]" -[[ ${#REVIEWERS_MISSING[@]} -eq 0 ]] && MISSING_JSON="[]" - -jq -n \ - --argjson case "$CASE" \ - --argjson reviewers_commented "$COMMENTED_JSON" \ - --argjson reviewers_missing "$MISSING_JSON" \ - --arg latest_commit_sha "$LATEST_COMMIT_SHA" \ - --arg latest_commit_at "$LATEST_COMMIT_AT" \ - --arg newest_review_at "${NEWEST_REVIEW_AT:-null}" \ - --argjson has_commits_after_reviews "$HAS_COMMITS_AFTER_REVIEWS" \ - '{ - case: $case, - reviewers_commented: $reviewers_commented, - reviewers_missing: $reviewers_missing, - latest_commit_sha: $latest_commit_sha, - latest_commit_at: $latest_commit_at, - newest_review_at: (if $newest_review_at == "null" then null else $newest_review_at end), - has_commits_after_reviews: $has_commits_after_reviews - }' diff --git a/scripts/fetch-pr-comments.sh b/scripts/fetch-pr-comments.sh deleted file mode 100755 index b4e892b1..00000000 --- a/scripts/fetch-pr-comments.sh +++ /dev/null @@ -1,452 +0,0 @@ -#!/usr/bin/env bash -# -# Fetch PR comments from GitHub -# -# Fetches all types of PR comments: -# - Issue comments (general comments on the PR) -# - Review comments (inline code comments) -# - PR reviews (summary reviews with approval/rejection status) -# -# Usage: -# fetch-pr-comments.sh [--after ] -# -# Output: Formatted markdown file with all comments -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -OUTPUT_FILE="" -AFTER_TIMESTAMP="" -ACTIVE_BOTS="" # Comma-separated list of active bots for grouping - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - ACTIVE_BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -fetch-pr-comments.sh - Fetch PR comments from GitHub - -USAGE: - fetch-pr-comments.sh [OPTIONS] - -ARGUMENTS: - The PR number to fetch comments from - Path to write the formatted comments - -OPTIONS: - --after Only include comments after this ISO 8601 timestamp - --bots Comma-separated list of active bots for grouping - -h, --help Show this help message - -OUTPUT FORMAT: - The output file contains markdown-formatted comments with: - - Comment type (issue comment, review comment, PR review) - - Author (with [bot] indicator for bot accounts) - - Timestamp - - Content - - Comments are deduplicated by ID and sorted newest first. - Human comments come before bot comments. - If --bots is provided, bot comments are grouped by bot. -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - elif [[ -z "$OUTPUT_FILE" ]]; then - OUTPUT_FILE="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$OUTPUT_FILE" ]]; then - echo "Error: Output file is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, check if PR exists there, then try parent repo for forks - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Fetch Comments -# ======================================== - -# Create temporary files for each comment type -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ISSUE_COMMENTS_FILE="$TEMP_DIR/issue_comments.json" -REVIEW_COMMENTS_FILE="$TEMP_DIR/review_comments.json" -PR_REVIEWS_FILE="$TEMP_DIR/pr_reviews.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures for strict mode -API_FAILURES=0 - -# Function to fetch with retries -fetch_with_retry() { - local endpoint="$1" - local output_file="$2" - local description="$3" - local attempt=1 - - while [[ $attempt -le $MAX_RETRIES ]]; do - if gh api "$endpoint" --paginate > "$output_file" 2>/dev/null; then - return 0 - fi - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: Failed to fetch $description (attempt $attempt/$MAX_RETRIES), retrying in ${RETRY_DELAY}s..." >&2 - sleep "$RETRY_DELAY" - else - echo "ERROR: Failed to fetch $description after $MAX_RETRIES attempts" >&2 - echo "[]" > "$output_file" - API_FAILURES=$((API_FAILURES + 1)) - # Return 0 so script continues under set -euo pipefail - # API_FAILURES counter tracks failures for strict mode if needed - return 0 - fi - ((attempt++)) - done -} - -# Fetch issue comments (general PR comments) -# claude[bot] typically posts here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments" "$ISSUE_COMMENTS_FILE" "issue comments" - -# Fetch PR review comments (inline code comments) -# codex (chatgpt-codex-connector[bot]) typically posts inline comments here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments" "$REVIEW_COMMENTS_FILE" "PR review comments" - -# Fetch PR reviews (summary reviews with approval status) -# Both bots may post summary reviews here -fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews" "$PR_REVIEWS_FILE" "PR reviews" - -# ======================================== -# Process and Format Comments -# ======================================== - -# Function to check if user is a bot -is_bot() { - local user_type="$1" - local user_login="$2" - - if [[ "$user_type" == "Bot" ]] || [[ "$user_login" == *"[bot]" ]]; then - echo "true" - else - echo "false" - fi -} - -# Function to format timestamp for comparison -format_timestamp() { - local ts="$1" - # Remove trailing Z and convert to comparable format - echo "$ts" | sed 's/Z$//' | tr 'T' ' ' -} - -# Initialize output file -cat > "$OUTPUT_FILE" << EOF -# PR Comments for #$PR_NUMBER - -Fetched at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -Repository: $REPO_OWNER/$REPO_NAME - ---- - -EOF - -# Process all comments into a unified format -# Create a combined JSON with all comments -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" - -# Process issue comments -jq -r --arg type "issue_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: null, - line: null, - state: null - } - else - empty - end -' "$ISSUE_COMMENTS_FILE" > "$TEMP_DIR/issue_processed.jsonl" 2>/dev/null || true - -# Process review comments (inline) -jq -r --arg type "review_comment" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - updated_at: .updated_at, - body: .body, - path: .path, - line: (.line // .original_line), - state: null - } - else - empty - end -' "$REVIEW_COMMENTS_FILE" > "$TEMP_DIR/review_processed.jsonl" 2>/dev/null || true - -# Process PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -jq -r --arg type "pr_review" ' - if type == "array" then - .[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - updated_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - path: null, - line: null, - state: .state - } - else - empty - end -' "$PR_REVIEWS_FILE" > "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null || true - -# Combine all processed comments and deduplicate by id -cat "$TEMP_DIR/issue_processed.jsonl" "$TEMP_DIR/review_processed.jsonl" "$TEMP_DIR/reviews_processed.jsonl" 2>/dev/null | \ - jq -s 'unique_by(.id)' > "$ALL_COMMENTS_FILE" - -# Filter by timestamp if provided -if [[ -n "$AFTER_TIMESTAMP" ]]; then - jq --arg after "$AFTER_TIMESTAMP" ' - [.[] | select(.created_at > $after)] - ' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/filtered.json" - mv "$TEMP_DIR/filtered.json" "$ALL_COMMENTS_FILE" -fi - -# Sort: human comments first, then by timestamp (newest first) -# Uses fromdateiso8601 for proper ISO 8601 timestamp parsing -# Filter out entries with null created_at to avoid fromdateiso8601 errors -jq ' - [.[] | select(.created_at != null)] | - sort_by( - (if .author_type == "Bot" or (.author | test("\\[bot\\]$")) then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) -' "$ALL_COMMENTS_FILE" > "$TEMP_DIR/sorted.json" - -# Format comments into markdown -COMMENT_COUNT=$(jq 'length' "$TEMP_DIR/sorted.json") - -if [[ "$COMMENT_COUNT" == "0" ]]; then - cat >> "$OUTPUT_FILE" << EOF -*No comments found.* - ---- - -This PR has no review comments yet from the monitored bots. -EOF -else - # Add section headers - echo "## Human Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - # First pass: human comments - HUMAN_COMMENTS=$(jq -r ' - .[] | select(.author_type != "Bot" and (.author | test("\\[bot\\]$") | not)) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" 2>/dev/null || true) - - if [[ -n "$HUMAN_COMMENTS" ]]; then - echo "$HUMAN_COMMENTS" >> "$OUTPUT_FILE" - else - echo "*No human comments.*" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - fi - - echo "" >> "$OUTPUT_FILE" - - # Second pass: bot comments - if [[ -n "$ACTIVE_BOTS" ]]; then - # Map bot names to GitHub comment author names: - # - claude -> claude[bot] - # - codex -> chatgpt-codex-connector[bot] - map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac - } - - # Group bot comments by active bots - echo "## Bot Comments (Grouped by Bot)" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - IFS=',' read -ra BOT_ARRAY <<< "$ACTIVE_BOTS" - for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - echo "### Comments from ${author}" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - BOT_COMMENTS=$(jq -r --arg author "$author" ' - [.[] | select(.author == $author)] | - if length == 0 then - "*No comments from this bot.*\n" - else - .[] | - "#### Comment\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - end - ' "$TEMP_DIR/sorted.json" 2>/dev/null || echo "*Error reading comments.*") - - echo "$BOT_COMMENTS" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - done - else - # Default: all bot comments together - echo "## Bot Comments" >> "$OUTPUT_FILE" - echo "" >> "$OUTPUT_FILE" - - jq -r ' - .[] | select(.author_type == "Bot" or (.author | test("\\[bot\\]$"))) | - "### Comment from \(.author)\n\n" + - "- **Type**: \(.type | gsub("_"; " "))\n" + - "- **Time**: \(.created_at)\n" + - (if .path then "- **File**: `\(.path)`\(if .line then " (line \(.line))" else "" end)\n" else "" end) + - (if .state then "- **Status**: \(.state)\n" else "" end) + - "\n\(.body)\n\n---\n" - ' "$TEMP_DIR/sorted.json" >> "$OUTPUT_FILE" 2>/dev/null || true - fi -fi - -echo "" >> "$OUTPUT_FILE" -echo "---" >> "$OUTPUT_FILE" -echo "" >> "$OUTPUT_FILE" -echo "*End of comments*" >> "$OUTPUT_FILE" - -# Report API failures (non-fatal but logged) -if [[ $API_FAILURES -gt 0 ]]; then - echo "WARNING: $API_FAILURES API endpoint(s) failed after retries. Some comments may be missing." >&2 - echo "" >> "$OUTPUT_FILE" - echo "**Warning:** Some API calls failed. Comments may be incomplete." >> "$OUTPUT_FILE" -fi - -exit 0 diff --git a/scripts/humanize.sh b/scripts/humanize.sh index c5ac3f20..346c1802 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -1176,9 +1176,6 @@ humanize() { rlcr) _humanize_monitor_codex "$@" ;; - pr) - _humanize_monitor_pr "$@" - ;; skill) _humanize_monitor_skill "$@" ;; @@ -1189,11 +1186,10 @@ humanize() { _humanize_monitor_skill --tool-filter gemini "$@" ;; *) - echo "Usage: humanize monitor " + echo "Usage: humanize monitor " echo "" echo "Subcommands:" echo " rlcr Monitor the latest RLCR loop log from .humanize/rlcr" - echo " pr Monitor the latest PR loop from .humanize/pr-loop" echo " skill Monitor all skill invocations (codex + gemini)" echo " codex Monitor ask-codex skill invocations only" echo " gemini Monitor ask-gemini skill invocations only" @@ -1212,7 +1208,6 @@ humanize() { echo "" echo "Commands:" echo " monitor rlcr Monitor the latest RLCR loop log" - echo " monitor pr Monitor the latest PR loop" echo " monitor skill Monitor all skill invocations (codex + gemini)" echo " monitor codex Monitor ask-codex skill invocations only" echo " monitor gemini Monitor ask-gemini skill invocations only" @@ -1221,442 +1216,6 @@ humanize() { esac } -# ======================================== -# PR Loop Monitor Function -# ======================================== - -# Monitor the latest PR loop from .humanize/pr-loop with fixed status bar and rolling tail -_humanize_monitor_pr() { - # Enable 0-indexed arrays in zsh for bash compatibility - [[ -n "${ZSH_VERSION:-}" ]] && setopt localoptions ksharrays - - local loop_dir=".humanize/pr-loop" - local current_file="" - local current_session_dir="" - local check_interval=2 # seconds between checking for new files - local status_bar_height=10 # number of lines for status bar - local once_mode=false - - # Parse arguments - while [[ $# -gt 0 ]]; do - case "$1" in - --once) - once_mode=true - shift - ;; - *) - shift - ;; - esac - done - - # Check if .humanize/pr-loop exists - if [[ ! -d "$loop_dir" ]]; then - echo "Error: $loop_dir directory not found in current directory" - echo "Are you in a project with an active PR loop?" - return 1 - fi - - # Use shared monitor helper for finding latest session - _pr_find_latest_session() { - monitor_find_latest_session "$loop_dir" - } - - # Function to find the latest monitorable file (pr-check, pr-feedback, or pr-comment) - _pr_find_latest_file() { - local session_dir="$1" - [[ ! -d "$session_dir" ]] && return - - local latest="" - local latest_mtime=0 - - # Check for pr-check files (Codex analysis output) - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - # Check for pr-feedback files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-feedback.md' -type f 2>/dev/null) - - # Check for pr-comment files - while IFS= read -r f; do - [[ -z "$f" ]] && continue - [[ ! -f "$f" ]] && continue - local mtime=$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo 0) - if [[ "$mtime" -gt "$latest_mtime" ]]; then - latest="$f" - latest_mtime="$mtime" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-comment.md' -type f 2>/dev/null) - - echo "$latest" - } - - # Use shared monitor helper for finding state file - # Note: monitor_find_state_file returns "approve" not "approved" for approve-state.md - # so we maintain the PR-specific status mapping here for display purposes - _pr_find_state_file() { - local session_dir="$1" - local result - result=$(monitor_find_state_file "$session_dir") - local state_file="${result%|*}" - local stop_reason="${result#*|}" - - # Map stop reasons to PR-friendly status names - case "$stop_reason" in - approve) stop_reason="approved" ;; - maxiter) stop_reason="max-iterations" ;; - esac - - echo "$state_file|$stop_reason" - } - - # Function to parse state.md and return key values - _pr_parse_state_md() { - local state_file="$1" - [[ ! -f "$state_file" ]] && echo "0|42|?|?|?|?|N/A" && return - - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") - - local current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - local max_iterations=$(echo "$frontmatter" | grep "^max_iterations:" | sed "s/max_iterations: *//" | tr -d ' ') - local pr_number=$(echo "$frontmatter" | grep "^pr_number:" | sed "s/pr_number: *//" | tr -d ' ') - local start_branch=$(echo "$frontmatter" | grep "^start_branch:" | sed "s/start_branch: *//" | tr -d '"' || true) - local configured_bots=$(echo "$frontmatter" | sed -n '/^configured_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local active_bots=$(echo "$frontmatter" | sed -n '/^active_bots:$/,/^[a-z_]*:/{ /^ - /{ s/^ - //; p; } }' | tr '\n' ',' | sed 's/,$//') - local codex_model=$(echo "$frontmatter" | grep "^codex_model:" | sed "s/codex_model: *//" | tr -d ' ') - local codex_effort=$(echo "$frontmatter" | grep "^codex_effort:" | sed "s/codex_effort: *//" | tr -d ' ') - local started_at=$(echo "$frontmatter" | grep "^started_at:" | sed "s/started_at: *//" || true) - - # Apply defaults - current_round=${current_round:-0} - max_iterations=${max_iterations:-42} - pr_number=${pr_number:-"?"} - start_branch=${start_branch:-"?"} - configured_bots=${configured_bots:-"none"} - active_bots=${active_bots:-"none"} - codex_model=${codex_model:-"$DEFAULT_CODEX_MODEL"} - codex_effort=${codex_effort:-"medium"} - started_at=${started_at:-"N/A"} - - echo "$current_round|$max_iterations|$pr_number|$start_branch|$configured_bots|$active_bots|$codex_model|$codex_effort|$started_at" - } - - # Draw the status bar at the top - _pr_draw_status_bar() { - local session_dir="$1" - local monitored_file="$2" - local loop_status="$3" - local term_width=$(tput cols) - - # Parse state file - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - [[ -z "$loop_status" ]] && loop_status="${state_info#*|}" - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Save cursor position and move to top - tput sc - - # ANSI color codes - local green="\033[1;32m" yellow="\033[1;33m" cyan="\033[1;36m" - local magenta="\033[1;35m" red="\033[1;31m" reset="\033[0m" - local bg="\033[44m" bold="\033[1m" dim="\033[2m" - local clr_eol="\033[K" # Clear to end of line (reduces flicker vs clearing entire area) - - # Move to top and draw directly (no pre-clearing to avoid flicker) - tput cup 0 0 - local session_basename=$(basename "$session_dir") - printf "${bg}${bold}%-${term_width}s${reset}${clr_eol}\n" " PR Loop Monitor" - printf "${cyan}Session:${reset} %s ${cyan}PR:${reset} #%s ${cyan}Branch:${reset} %s${clr_eol}\n" "$session_basename" "$pr_number" "$start_branch" - printf "${green}Round:${reset} ${bold}%s${reset} / %s ${yellow}Codex:${reset} %s (%s)${clr_eol}\n" "$current_round" "$max_iterations" "$codex_model" "$codex_effort" - - # Detect phase and determine status color - local phase="" - local phase_display="" - if type get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - # Loop status line with color based on phase/status - local status_color="${green}" - case "$phase" in - approved) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - maxiter) status_color="${red}" ;; - codex_analyzing) status_color="${magenta}" ;; - waiting_initial_review) status_color="${yellow}" ;; - waiting_reviewer) status_color="${green}" ;; - *) status_color="${dim}" ;; - esac - - if [[ -n "$phase_display" ]]; then - printf "${magenta}Phase:${reset} ${status_color}%s${reset}${clr_eol}\n" "$phase_display" - else - # Fallback to loop_status if phase detection not available - case "$loop_status" in - active) status_color="${green}" ;; - approved|completed) status_color="${cyan}" ;; - cancelled) status_color="${yellow}" ;; - max-iterations) status_color="${red}" ;; - *) status_color="${dim}" ;; - esac - printf "${magenta}Status:${reset} ${status_color}%s${reset}${clr_eol}\n" "$loop_status" - fi - - # Bot status - printf "${cyan}Configured Bots:${reset} %s${clr_eol}\n" "$configured_bots" - if [[ "$active_bots" == "none" ]] || [[ -z "$active_bots" ]]; then - printf "${green}Active Bots:${reset} ${green}all approved${reset}${clr_eol}\n" - else - printf "${yellow}Active Bots:${reset} %s${clr_eol}\n" "$active_bots" - fi - - # Goal tracker issue stats - local goal_tracker_file="$session_dir/goal-tracker.md" - if [[ -f "$goal_tracker_file" ]] && type humanize_parse_pr_goal_tracker &>/dev/null; then - local tracker_stats=$(humanize_parse_pr_goal_tracker "$goal_tracker_file") - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$tracker_stats" - if [[ "$total_issues" != "0" ]] || [[ "$resolved_issues" != "0" ]]; then - printf "${cyan}Issues:${reset} Found: ${yellow}%s${reset}, Resolved: ${green}%s${reset}, Remaining: ${red}%s${reset}${clr_eol}\n" "$total_issues" "$resolved_issues" "$remaining_issues" - fi - fi - - # Started time - local start_display="$started_at" - if [[ "$started_at" != "N/A" ]]; then - start_display=$(echo "$started_at" | sed 's/T/ /; s/Z/ UTC/') - fi - printf "${dim}Started:${reset} %s${clr_eol}\n" "$start_display" - - # Currently monitoring - local file_basename="" - [[ -n "$monitored_file" ]] && file_basename=$(basename "$monitored_file") - printf "${dim}Watching:${reset} %s${clr_eol}\n" "${file_basename:-none}" - - # Separator - printf "%-${term_width}s${clr_eol}\n" "$(printf '%*s' "$term_width" | tr ' ' '-')" - - # Restore cursor position - tput rc - } - - # Track state for cleanup - local TAIL_PID="" - local monitor_running=true - local cleanup_done=false - - # Cleanup function - called by trap - # Must work cleanly in both bash and zsh - _pr_cleanup() { - # Prevent multiple cleanup calls - [[ "${cleanup_done:-false}" == "true" ]] && return - cleanup_done=true - monitor_running=false - - # Reset traps to prevent re-triggering - trap - INT TERM EXIT 2>/dev/null || true - - # Kill background tail if running - if [[ -n "${TAIL_PID:-}" ]]; then - if kill -0 "$TAIL_PID" 2>/dev/null; then - kill "$TAIL_PID" 2>/dev/null || true - # Use timeout-safe wait - ( wait "$TAIL_PID" 2>/dev/null ) & - wait $! 2>/dev/null || true - fi - fi - - # Show cursor and restore terminal - tput cnorm 2>/dev/null || true - tput rmcup 2>/dev/null || true - echo "" - echo "Monitor stopped." - } - - # Set up signal handlers (bash/zsh compatible) - # Use TRAPINT/TRAPTERM for zsh, standard trap for bash - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: use TRAPINT and TRAPTERM for better handling - TRAPINT() { _pr_cleanup; return 130; } - TRAPTERM() { _pr_cleanup; return 143; } - # Also set EXIT trap for clean exit - trap '_pr_cleanup' EXIT - else - # bash: use standard trap - trap '_pr_cleanup' EXIT INT TERM - fi - - # One-shot mode: print status once and exit (for testing and scripting) - if [[ "$once_mode" == "true" ]]; then - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - echo "No PR loop sessions found in $loop_dir" - return 1 - fi - - local state_info=$(_pr_find_state_file "$session_dir") - local state_file="${state_info%|*}" - local loop_status="${state_info#*|}" - - if [[ -z "$state_file" ]]; then - echo "No state file found in $session_dir" - return 1 - fi - - local state_values=$(_pr_parse_state_md "$state_file") - IFS='|' read -r current_round max_iterations pr_number start_branch configured_bots active_bots codex_model codex_effort started_at <<< "$state_values" - - # Get phase for --once mode display - local phase="" - local phase_display="" - if declare -f get_pr_loop_phase &>/dev/null; then - phase=$(get_pr_loop_phase "$session_dir") - phase_display=$(get_pr_loop_phase_display "$phase" "$active_bots") - fi - - echo "==========================================" - echo " PR Loop Monitor" - echo "==========================================" - echo "" - echo "Session: $(basename "$session_dir")" - if [[ -n "$phase_display" ]]; then - echo "Phase: $phase_display" - else - echo "Status: $loop_status" - fi - echo "" - echo "PR Number: #$pr_number" - echo "Branch: $start_branch" - echo "Configured Bots: ${configured_bots:-none}" - echo "Active Bots: ${active_bots:-none}" - echo "" - echo "Round: $current_round / $max_iterations" - echo "Codex: $codex_model:$codex_effort" - echo "Started: $started_at" - echo "" - echo "==========================================" - echo " Recent Files" - echo "==========================================" - echo "" - - # List recent round files - local round_files - round_files=$(find "$session_dir" -maxdepth 1 -name 'round-*.md' -type f 2>/dev/null) - if [[ -n "$round_files" ]]; then - echo "$round_files" | xargs ls -lt 2>/dev/null | head -10 | while read -r line; do - echo " $line" - done - fi - - echo "" - echo "==========================================" - echo " Latest Activity" - echo "==========================================" - echo "" - - local latest_file=$(_pr_find_latest_file "$session_dir") - if [[ -n "$latest_file" && -f "$latest_file" ]]; then - echo "Latest: $(basename "$latest_file")" - echo "----------------------------------------" - tail -20 "$latest_file" - echo "" - fi - - echo "==========================================" - return 0 - fi - - # Initialize terminal - tput smcup # Save screen - tput civis # Hide cursor - clear - - # Create scrolling region below status bar - tput csr $status_bar_height $(($(tput lines) - 1)) - - # Main monitoring loop - while [[ "$monitor_running" == "true" ]]; do - # Find latest session - local session_dir=$(_pr_find_latest_session) - if [[ -z "$session_dir" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop session..." - sleep "$check_interval" - continue - fi - - # Check if session changed - if [[ "$session_dir" != "$current_session_dir" ]]; then - current_session_dir="$session_dir" - current_file="" - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - TAIL_PID="" - fi - - # Find latest file to monitor - local latest_file=$(_pr_find_latest_file "$session_dir") - - # Get loop status - local state_info=$(_pr_find_state_file "$session_dir") - local loop_status="${state_info#*|}" - - # Update status bar - _pr_draw_status_bar "$session_dir" "$latest_file" "$loop_status" - - # Check if file changed or new file appeared - if [[ "$latest_file" != "$current_file" ]] && [[ -n "$latest_file" ]]; then - current_file="$latest_file" - - # Kill old tail process - [[ -n "$TAIL_PID" ]] && kill "$TAIL_PID" 2>/dev/null - - # Clear content area and show new file - tput cup $status_bar_height 0 - tput ed # Clear to end of screen - - # Start tailing the new file - tail -n +1 -f "$current_file" 2>/dev/null & - TAIL_PID=$! - fi - - # If no file to monitor yet, show waiting message - if [[ -z "$current_file" ]]; then - tput cup $status_bar_height 0 - echo "Waiting for PR loop activity..." - fi - - sleep "$check_interval" - done - - # Reset trap handlers (zsh and bash) - if [[ -n "${ZSH_VERSION:-}" ]]; then - # zsh: undefine the TRAP* functions - unfunction TRAPINT TRAPTERM 2>/dev/null || true - else - trap - INT TERM EXIT - fi -} - # Source skill monitor (provides _humanize_monitor_skill) if [[ -f "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" ]]; then source "$HUMANIZE_SCRIPT_DIR/lib/monitor-skill.sh" diff --git a/scripts/lib/monitor-common.sh b/scripts/lib/monitor-common.sh index 5409396d..671a3100 100644 --- a/scripts/lib/monitor-common.sh +++ b/scripts/lib/monitor-common.sh @@ -2,7 +2,7 @@ # # monitor-common.sh - Shared utilities for humanize monitor functions # -# This file contains common functions used by both RLCR and PR loop monitors. +# This file contains common functions used by humanize monitor functions. # It should be sourced by humanize.sh rather than executed directly. # ======================================== @@ -255,135 +255,6 @@ monitor_truncate_string() { fi } -# ======================================== -# PR Loop Phase Detection -# ======================================== - -# Detect current PR loop phase from file state -# Returns: one of: approved, cancelled, maxiter, codex_analyzing, waiting_initial_review, waiting_reviewer -# -# Usage: get_pr_loop_phase "/path/to/session" -# -# Detection strategy for codex_analyzing: -# 1. Find the latest round's pr-check.md file -# 2. Check if it's growing by comparing current size with cached previous size -# 3. Cache size in /tmp for comparison on next call -get_pr_loop_phase() { - local session_dir="$1" - - [[ ! -d "$session_dir" ]] && echo "unknown" && return - - # Check for final states first - [[ -f "$session_dir/approve-state.md" ]] && echo "approved" && return - [[ -f "$session_dir/cancel-state.md" ]] && echo "cancelled" && return - [[ -f "$session_dir/maxiter-state.md" ]] && echo "maxiter" && return - - # Check for Codex running by detecting file growth - # Find the highest numbered round pr-check file - local latest_check="" - local highest_round=-1 - while IFS= read -r f; do - [[ -z "$f" ]] && continue - local basename=$(basename "$f") - local round_str="${basename#round-}" - round_str="${round_str%-pr-check.md}" - if [[ "$round_str" =~ ^[0-9]+$ ]] && [[ "$round_str" -gt "$highest_round" ]]; then - highest_round="$round_str" - latest_check="$f" - fi - done < <(find "$session_dir" -maxdepth 1 -name 'round-*-pr-check.md' -type f 2>/dev/null) - - if [[ -n "$latest_check" ]]; then - # Get current file size - local current_size - current_size=$(stat -c%s "$latest_check" 2>/dev/null || stat -f%z "$latest_check" 2>/dev/null || echo 0) - - # Cache file for tracking size changes (unique per session) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-${highest_round}.size" - - # Read previous size from cache - local previous_size=0 - [[ -f "$cache_file" ]] && previous_size=$(cat "$cache_file" 2>/dev/null || echo 0) - - # Update cache with current size - echo "$current_size" > "$cache_file" 2>/dev/null || true - - # If file is growing OR is new (no previous record), Codex is analyzing - # Also check mtime as fallback (file modified in last 10 seconds) - local now_epoch file_epoch - now_epoch=$(date +%s) - file_epoch=$(stat -c %Y "$latest_check" 2>/dev/null || stat -f %m "$latest_check" 2>/dev/null || echo 0) - local age_seconds=$((now_epoch - file_epoch)) - - if [[ "$current_size" -gt "$previous_size" ]] || [[ "$age_seconds" -lt 10 ]]; then - echo "codex_analyzing" - return - fi - fi - - # Check state.md for round info - if [[ -f "$session_dir/state.md" ]]; then - local frontmatter - frontmatter=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$session_dir/state.md" 2>/dev/null) - - local current_round - local startup_case - current_round=$(echo "$frontmatter" | grep "^current_round:" | sed "s/current_round: *//" | tr -d ' ') - startup_case=$(echo "$frontmatter" | grep "^startup_case:" | sed "s/startup_case: *//" | tr -d ' ') - - current_round=${current_round:-0} - startup_case=${startup_case:-1} - - if [[ "$current_round" -eq 0 && "$startup_case" -eq 1 ]]; then - echo "waiting_initial_review" - else - echo "waiting_reviewer" - fi - else - echo "unknown" - fi -} - -# Get human-readable description for PR loop phase -# Usage: get_pr_loop_phase_display "waiting_reviewer" "claude,codex" -get_pr_loop_phase_display() { - local phase="$1" - local active_bots="$2" - - case "$phase" in - approved) - echo "All reviews approved" - ;; - cancelled) - echo "Loop cancelled" - ;; - maxiter) - echo "Max iterations reached" - ;; - codex_analyzing) - echo "Codex analyzing reviews..." - ;; - waiting_initial_review) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for initial PR review from $active_bots" - else - echo "Waiting for initial PR review" - fi - ;; - waiting_reviewer) - if [[ -n "$active_bots" && "$active_bots" != "none" ]]; then - echo "Waiting for $active_bots (polling...)" - else - echo "Waiting for reviews (polling...)" - fi - ;; - *) - echo "Unknown phase" - ;; - esac -} - # ======================================== # Goal Tracker Parsing # ======================================== @@ -511,37 +382,3 @@ parse_goal_tracker() { echo "${total_acs}|${completed_acs}|${active_tasks}|${completed_tasks}|${deferred_tasks}|${open_issues}|${goal_summary}" } -# Parse PR goal-tracker.md for issue statistics -# Returns: total_issues|resolved_issues|remaining_issues|last_reviewer -# Usage: humanize_parse_pr_goal_tracker "/path/to/goal-tracker.md" -humanize_parse_pr_goal_tracker() { - local tracker_file="$1" - if [[ ! -f "$tracker_file" ]]; then - echo "0|0|0|none" - return - fi - - # Extract from Total Statistics section - # Format: - Total Issues Found: N - local total_issues - total_issues=$(grep -E "^- Total Issues Found:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - total_issues=${total_issues:-0} - - local resolved_issues - resolved_issues=$(grep -E "^- Total Issues Resolved:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - resolved_issues=${resolved_issues:-0} - - local remaining_issues - remaining_issues=$(grep -E "^- Remaining:" "$tracker_file" | sed 's/.*: //' | tr -d ' ') - remaining_issues=${remaining_issues:-0} - - # Get last reviewer from Issue Summary table (last row, Reviewer column) - # Table format: | ID | Reviewer | Round | Status | Description | - # Pattern matches rows like "|1|..." or "| 1 |..." (with or without spaces) - local last_reviewer - last_reviewer=$(sed -n '/## Issue Summary/,/^##/p' "$tracker_file" \ - | grep -E '^\|[[:space:]]*[0-9]+' | tail -1 | cut -d'|' -f3 | tr -d ' ') - last_reviewer=${last_reviewer:-none} - - echo "${total_issues}|${resolved_issues}|${remaining_issues}|${last_reviewer}" -} diff --git a/scripts/poll-pr-reviews.sh b/scripts/poll-pr-reviews.sh deleted file mode 100755 index 282899bd..00000000 --- a/scripts/poll-pr-reviews.sh +++ /dev/null @@ -1,328 +0,0 @@ -#!/usr/bin/env bash -# -# Poll for new PR reviews from specified bots -# -# Checks for new comments from specified bots after a given timestamp. -# -# Usage: -# poll-pr-reviews.sh --after --bots -# -# Output: JSON with new comments from the bots, or empty array if none -# - -set -euo pipefail - -# ======================================== -# Parse Arguments -# ======================================== - -PR_NUMBER="" -AFTER_TIMESTAMP="" -BOTS="" - -while [[ $# -gt 0 ]]; do - case $1 in - --after) - if [[ -z "${2:-}" ]]; then - echo "Error: --after requires a timestamp argument" >&2 - exit 1 - fi - AFTER_TIMESTAMP="$2" - shift 2 - ;; - --bots) - if [[ -z "${2:-}" ]]; then - echo "Error: --bots requires a comma-separated list of bot names" >&2 - exit 1 - fi - BOTS="$2" - shift 2 - ;; - -h|--help) - cat << 'HELP_EOF' -poll-pr-reviews.sh - Poll for new PR reviews from bots - -USAGE: - poll-pr-reviews.sh --after --bots - -ARGUMENTS: - The PR number to poll - -OPTIONS: - --after Only return comments after this ISO 8601 timestamp - --bots Comma-separated list of bot names to watch - -h, --help Show this help message - -OUTPUT: - JSON object with: - - comments: Array of new comments from watched bots - - bots_responded: Array of bot names that have new comments - - has_new_comments: Boolean indicating if any new comments found - -EXAMPLE: - poll-pr-reviews.sh 123 --after 2026-01-18T12:00:00Z --bots claude,codex -HELP_EOF - exit 0 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - exit 1 - ;; - *) - if [[ -z "$PR_NUMBER" ]]; then - PR_NUMBER="$1" - else - echo "Error: Unexpected argument: $1" >&2 - exit 1 - fi - shift - ;; - esac -done - -# Validate arguments -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: PR number is required" >&2 - exit 1 -fi - -if [[ -z "$AFTER_TIMESTAMP" ]]; then - echo "Error: --after timestamp is required" >&2 - exit 1 -fi - -if [[ -z "$BOTS" ]]; then - echo "Error: --bots list is required" >&2 - exit 1 -fi - -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number: $PR_NUMBER" >&2 - exit 1 -fi - -# ======================================== -# Check Prerequisites -# ======================================== - -if ! command -v gh &>/dev/null; then - echo "Error: GitHub CLI (gh) is required" >&2 - exit 1 -fi - -if ! command -v jq &>/dev/null; then - echo "Error: jq is required for JSON parsing" >&2 - exit 1 -fi - -# ======================================== -# Get Repository Info -# ======================================== - -# IMPORTANT: For fork PRs, we need to resolve the base (upstream) repository -# gh pr view without --repo fails in forks because the PR number doesn't exist there -# Strategy: First get current repo, then try to get PR's base repo with --repo flag - -# Step 1: Get the current repo (works in both forks and base repos) -CURRENT_REPO=$(gh repo view --json owner,name -q '.owner.login + "/" + .name' 2>/dev/null) || { - echo "Error: Failed to get current repository" >&2 - exit 1 -} - -# Step 2: Determine the correct repo for PR operations -# Try current repo first - if PR exists there, use it -PR_BASE_REPO="" -if gh pr view "$PR_NUMBER" --repo "$CURRENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$CURRENT_REPO" -else - # PR not found in current repo - check if this is a fork and try parent repo - PARENT_REPO=$(gh repo view --json parent -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - if [[ -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - if gh pr view "$PR_NUMBER" --repo "$PARENT_REPO" --json number -q .number >/dev/null 2>&1; then - PR_BASE_REPO="$PARENT_REPO" - fi - fi -fi - -if [[ -z "$PR_BASE_REPO" ]]; then - echo "Error: Failed to find PR #$PR_NUMBER in current or parent repository" >&2 - exit 1 -fi - -REPO_OWNER="${PR_BASE_REPO%%/*}" -REPO_NAME="${PR_BASE_REPO##*/}" - -if [[ -z "$REPO_OWNER" || -z "$REPO_NAME" ]]; then - echo "Error: Could not parse repository owner/name from: $PR_BASE_REPO" >&2 - exit 1 -fi - -# ======================================== -# Build Bot Filter -# ======================================== - -# Map bot names to GitHub comment author names: -# - claude -> claude[bot] -# - codex -> chatgpt-codex-connector[bot] -map_bot_to_author() { - local bot="$1" - case "$bot" in - codex) echo "chatgpt-codex-connector[bot]" ;; - *) echo "${bot}[bot]" ;; - esac -} - -# Convert comma-separated bots to jq filter pattern -BOT_PATTERNS="" -IFS=',' read -ra BOT_ARRAY <<< "$BOTS" -for bot in "${BOT_ARRAY[@]}"; do - bot=$(echo "$bot" | tr -d ' ') - author=$(map_bot_to_author "$bot") - if [[ -n "$BOT_PATTERNS" ]]; then - BOT_PATTERNS="$BOT_PATTERNS|" - fi - # Escape brackets for regex - BOT_PATTERNS="${BOT_PATTERNS}${author//\[/\\[}" - BOT_PATTERNS="${BOT_PATTERNS//\]/\\]}" -done - -# ======================================== -# Fetch and Filter Comments -# ======================================== - -# Create temporary files -TEMP_DIR=$(mktemp -d) -trap 'rm -rf "$TEMP_DIR"' EXIT - -ALL_COMMENTS_FILE="$TEMP_DIR/all_comments.json" -FILTERED_FILE="$TEMP_DIR/filtered.json" - -# Retry configuration -MAX_RETRIES=3 -RETRY_DELAY=2 - -# Track API failures (for diagnostics, not script termination) -API_FAILURES=0 - -# Function to fetch with retries -# Returns 0 even on failure to prevent script termination under set -euo pipefail -# On failure, outputs empty array "[]" so jq processing continues gracefully -fetch_with_retry() { - local endpoint="$1" - local attempt=1 - local result="" - - while [[ $attempt -le $MAX_RETRIES ]]; do - result=$(gh api "$endpoint" --paginate 2>/dev/null) && { - echo "$result" - return 0 - } - - if [[ $attempt -lt $MAX_RETRIES ]]; then - echo "Warning: API fetch failed (attempt $attempt/$MAX_RETRIES), retrying..." >&2 - sleep "$RETRY_DELAY" - else - echo "Warning: API fetch failed after $MAX_RETRIES attempts for $endpoint" >&2 - API_FAILURES=$((API_FAILURES + 1)) - fi - ((attempt++)) - done - - # Return empty array and success (0) to allow polling to continue - # Partial API outages shouldn't terminate the entire poll loop - echo "[]" - return 0 -} - -# Initialize empty array -echo "[]" > "$ALL_COMMENTS_FILE" - -# Fetch issue comments -ISSUE_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/issues/$PR_NUMBER/comments") -echo "$ISSUE_COMMENTS" | jq -r --arg type "issue_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body - }] - else - [] - end -' > "$TEMP_DIR/issue.json" - -# Fetch review comments -REVIEW_COMMENTS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/comments") -echo "$REVIEW_COMMENTS" | jq -r --arg type "review_comment" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .created_at, - body: .body, - path: .path, - line: (.line // .original_line) - }] - else - [] - end -' > "$TEMP_DIR/review.json" - -# Fetch PR reviews -# Note: Include all reviews, even those with empty body (e.g. approval-only reviews) -# For empty body reviews, use a placeholder indicating the state -PR_REVIEWS=$(fetch_with_retry "repos/$REPO_OWNER/$REPO_NAME/pulls/$PR_NUMBER/reviews") -echo "$PR_REVIEWS" | jq -r --arg type "pr_review" ' - if type == "array" then - [.[] | { - type: $type, - id: .id, - author: .user.login, - author_type: .user.type, - created_at: .submitted_at, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end), - state: .state - }] - else - [] - end -' > "$TEMP_DIR/reviews.json" - -# Combine all comments -jq -s 'add' "$TEMP_DIR/issue.json" "$TEMP_DIR/review.json" "$TEMP_DIR/reviews.json" > "$ALL_COMMENTS_FILE" - -# Filter: after timestamp AND from watched bots -jq --arg after "$AFTER_TIMESTAMP" --arg pattern "$BOT_PATTERNS" ' - [.[] | select( - .created_at >= $after and - (.author | test($pattern; "i")) - )] -' "$ALL_COMMENTS_FILE" > "$FILTERED_FILE" - -# ======================================== -# Build Output -# ======================================== - -COMMENT_COUNT=$(jq 'length' "$FILTERED_FILE") - -# Get list of bots that responded -BOTS_RESPONDED=$(jq -r '[.[] | .author] | unique | join(",")' "$FILTERED_FILE") - -# Build final output -jq -n \ - --argjson comments "$(cat "$FILTERED_FILE")" \ - --arg bots_responded "$BOTS_RESPONDED" \ - --argjson has_new $(if [[ "$COMMENT_COUNT" -gt 0 ]]; then echo "true"; else echo "false"; fi) \ - '{ - comments: $comments, - bots_responded: ($bots_responded | split(",") | map(select(length > 0))), - has_new_comments: $has_new, - comment_count: ($comments | length) - }' - -exit 0 diff --git a/scripts/setup-pr-loop.sh b/scripts/setup-pr-loop.sh deleted file mode 100755 index 648250da..00000000 --- a/scripts/setup-pr-loop.sh +++ /dev/null @@ -1,945 +0,0 @@ -#!/usr/bin/env bash -# -# Setup script for start-pr-loop -# -# Creates state files for the PR loop that monitors GitHub PR reviews from bots. -# -# Usage: -# setup-pr-loop.sh --claude|--codex [--max N] [--codex-model MODEL:EFFORT] [--codex-timeout SECONDS] -# - -set -euo pipefail - -# ======================================== -# Default Configuration -# ======================================== - -# Override effort before sourcing loop-common.sh (PR loop defaults to medium effort). -# codex_model is NOT pre-set here so that config-backed values from loop-common.sh apply. -DEFAULT_CODEX_EFFORT="medium" -DEFAULT_CODEX_TIMEOUT=900 -DEFAULT_MAX_ITERATIONS=42 - -# Polling configuration -POLL_INTERVAL=30 -POLL_TIMEOUT=900 # 15 minutes per bot - -# Default timeout for git operations (30 seconds) -GIT_TIMEOUT=30 - -# Default timeout for GitHub CLI operations (60 seconds) -GH_TIMEOUT=60 - -# Source portable timeout wrapper -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -source "$SCRIPT_DIR/portable-timeout.sh" - -# Source template loader and shared loop library (provides DEFAULT_CODEX_MODEL and other constants) -HOOKS_LIB_DIR="$(cd "$SCRIPT_DIR/../hooks/lib" && pwd)" -source "$HOOKS_LIB_DIR/template-loader.sh" -source "$HOOKS_LIB_DIR/loop-common.sh" - -# Initialize template directory -TEMPLATE_DIR="${TEMPLATE_DIR:-$(get_template_dir "$HOOKS_LIB_DIR")}" - -# ======================================== -# Parse Arguments -# ======================================== - -MAX_ITERATIONS="$DEFAULT_MAX_ITERATIONS" -CODEX_MODEL="$DEFAULT_CODEX_MODEL" -CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" -CODEX_TIMEOUT="$DEFAULT_CODEX_TIMEOUT" - -# Bot flags -BOT_CLAUDE="false" -BOT_CODEX="false" - -show_help() { - cat << 'HELP_EOF' -start-pr-loop - PR review loop with remote bot monitoring - -USAGE: - /humanize:start-pr-loop --claude|--codex [OPTIONS] - -BOT FLAGS (at least one required): - --claude Monitor reviews from claude[bot] (trigger: @claude) - --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex) - -OPTIONS: - --max Maximum iterations before auto-stop (default: 42) - --codex-model - Codex model and reasoning effort (default from config, effort: medium) - --codex-timeout - Timeout for each Codex review in seconds (default: 900) - -h, --help Show this help message - -DESCRIPTION: - Starts a PR review loop that: - - 1. Detects the PR associated with the current branch - 2. Fetches review comments from the specified bot(s) - 3. Analyzes and fixes issues identified by the bot(s) - 4. Pushes changes and triggers re-review by commenting @bot - 5. Waits for bot response (polls every 30s, 15min timeout) - 6. Uses local Codex to verify if remote concerns are valid - - The flow: - 1. Claude analyzes PR comments and fixes issues - 2. Claude pushes changes and comments @bot on PR - 3. Stop Hook polls for new bot reviews - 4. When reviews arrive, local Codex validates them - 5. If issues found, Claude continues fixing - 6. If all bots approve, loop ends - -EXAMPLES: - /humanize:start-pr-loop --claude - /humanize:start-pr-loop --codex --max 20 - /humanize:start-pr-loop --claude --codex - -STOPPING: - - /humanize:cancel-pr-loop Cancel the active PR loop - - Reach --max iterations - - All bots approve the changes - -MONITORING: - humanize monitor pr -HELP_EOF - exit 0 -} - -while [[ $# -gt 0 ]]; do - case $1 in - -h|--help) - show_help - ;; - --claude) - BOT_CLAUDE="true" - shift - ;; - --codex) - BOT_CODEX="true" - shift - ;; - --max) - if [[ -z "${2:-}" ]]; then - echo "Error: --max requires a number argument" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --max must be a positive integer, got: $2" >&2 - exit 1 - fi - MAX_ITERATIONS="$2" - shift 2 - ;; - --codex-model) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-model requires a MODEL:EFFORT argument" >&2 - exit 1 - fi - # Parse MODEL:EFFORT format (portable - works in bash and zsh) - if [[ "$2" == *:* ]]; then - CODEX_MODEL="${2%%:*}" - CODEX_EFFORT="${2#*:}" - else - CODEX_MODEL="$2" - CODEX_EFFORT="$DEFAULT_CODEX_EFFORT" - fi - shift 2 - ;; - --codex-timeout) - if [[ -z "${2:-}" ]]; then - echo "Error: --codex-timeout requires a number argument (seconds)" >&2 - exit 1 - fi - if ! [[ "$2" =~ ^[0-9]+$ ]]; then - echo "Error: --codex-timeout must be a positive integer (seconds), got: $2" >&2 - exit 1 - fi - CODEX_TIMEOUT="$2" - shift 2 - ;; - -*) - echo "Error: Unknown option: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - *) - echo "Error: Unexpected argument: $1" >&2 - echo "Use --help for usage information" >&2 - exit 1 - ;; - esac -done - -# ======================================== -# Validate Bot Flags -# ======================================== - -if [[ "$BOT_CLAUDE" != "true" && "$BOT_CODEX" != "true" ]]; then - echo "Error: At least one bot flag is required" >&2 - echo "" >&2 - echo "Usage: /humanize:start-pr-loop --claude|--codex [OPTIONS]" >&2 - echo "" >&2 - echo "Bot flags:" >&2 - echo " --claude Monitor reviews from claude[bot] (trigger: @claude)" >&2 - echo " --codex Monitor reviews from chatgpt-codex-connector[bot] (trigger: @codex)" >&2 - echo "" >&2 - echo "For help: /humanize:start-pr-loop --help" >&2 - exit 1 -fi - -# Build active_bots list (stored as array for YAML list format) -# Bot names stored in state: claude, codex -# Trigger mentions: @claude, @codex -# Comment authors: claude[bot], chatgpt-codex-connector[bot] -declare -a ACTIVE_BOTS_ARRAY=() -if [[ "$BOT_CLAUDE" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("claude") -fi -if [[ "$BOT_CODEX" == "true" ]]; then - ACTIVE_BOTS_ARRAY+=("codex") -fi - -# ======================================== -# Validate Prerequisites -# ======================================== - -PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" - -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) - -# Build dynamic mention string from active bots (using shared helper) -BOT_MENTION_STRING=$(build_bot_mention_string "${ACTIVE_BOTS_ARRAY[@]}") - -# ======================================== -# Mutual Exclusion Check -# ======================================== - -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time -RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") - -if [[ -n "$RLCR_LOOP_DIR" ]]; then - echo "Error: An RLCR loop is already active" >&2 - echo " Active loop: $RLCR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the RLCR loop first with: /humanize:cancel-rlcr-loop" >&2 - exit 1 -fi - -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - -# Check git repo (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse --git-dir &>/dev/null; then - echo "Error: Project must be a git repository (or git command timed out)" >&2 - exit 1 -fi - -# Check at least one commit (with timeout) -if ! run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD &>/dev/null 2>&1; then - echo "Error: Git repository must have at least one commit (or git command timed out)" >&2 - exit 1 -fi - -# Check gh CLI is installed -if ! command -v gh &>/dev/null; then - echo "Error: start-pr-loop requires the GitHub CLI (gh) to be installed" >&2 - echo "" >&2 - echo "Please install the GitHub CLI: https://cli.github.com/" >&2 - exit 1 -fi - -# Check gh CLI is authenticated -if ! gh auth status &>/dev/null 2>&1; then - echo "Error: GitHub CLI is not authenticated" >&2 - echo "" >&2 - echo "Please run: gh auth login" >&2 - exit 1 -fi - -# Check codex is available -if ! command -v codex &>/dev/null; then - echo "Error: start-pr-loop requires codex to run" >&2 - echo "" >&2 - echo "Please install Codex CLI: https://openai.com/codex" >&2 - exit 1 -fi - -# ======================================== -# Detect PR -# ======================================== - -START_BRANCH=$(run_with_timeout "$GIT_TIMEOUT" git -C "$PROJECT_ROOT" rev-parse --abbrev-ref HEAD) -if [[ -z "$START_BRANCH" ]]; then - echo "Error: Failed to get current branch (git command timed out or failed)" >&2 - exit 1 -fi - -# ======================================== -# Resolve Repository Context (for fork PR support) -# ======================================== -# IMPORTANT: For fork PRs, the PR lives in the upstream (parent) repo, not the fork. -# We must resolve the correct repo BEFORE attempting to get PR number/state. - -# Step 1: Get current repo -CURRENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json owner,name \ - -q '.owner.login + "/" + .name' 2>/dev/null) || CURRENT_REPO="" - -# Step 2: Check if current repo is a fork and get parent repo -PARENT_REPO=$(run_with_timeout "$GH_TIMEOUT" gh repo view --json parent \ - -q '.parent.owner.login + "/" + .parent.name' 2>/dev/null) || PARENT_REPO="" - -# Step 3: Determine which repo to use for PR lookups -# Try current repo first, then parent (for fork case) -PR_LOOKUP_REPO="" -PR_NUMBER="" - -# Try to find PR using gh's auto-detection (no --repo flag) -# This handles cases where local branch name differs from PR head (e.g., renamed branch) -# IMPORTANT: gh pr view can auto-resolve to upstream repo when in a fork, so we must -# extract the actual repo from the PR URL rather than assuming it's CURRENT_REPO -PR_INFO=$(run_with_timeout "$GH_TIMEOUT" gh pr view --json number,url -q '.number,.url' 2>/dev/null) || PR_INFO="" -if [[ -n "$PR_INFO" ]]; then - # Parse number and URL from newline-separated output (jq outputs each field on separate line) - PR_NUMBER=$(echo "$PR_INFO" | head -1) - PR_URL=$(echo "$PR_INFO" | tail -1) - # Validate PR_NUMBER is numeric - if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_INFO" >&2 - PR_NUMBER="" - PR_URL="" - else - # Extract repo from URL: https://HOST/OWNER/REPO/pull/NUMBER -> OWNER/REPO - # Works with github.com and GitHub Enterprise (any host) - if [[ "$PR_URL" =~ https?://[^/]+/([^/]+/[^/]+)/pull/ ]]; then - PR_LOOKUP_REPO="${BASH_REMATCH[1]}" - else - # Fallback to current repo if URL parsing fails - PR_LOOKUP_REPO="$CURRENT_REPO" - fi - fi -fi - -# If not found in current repo and we have a parent (fork case), try parent -# IMPORTANT: For fork PRs, the head branch lives in the fork, so we must use -# the fork-qualified format (FORK_OWNER:BRANCH) when looking up in parent repo -if [[ -z "$PR_NUMBER" && -n "$PARENT_REPO" && "$PARENT_REPO" != "null/" && "$PARENT_REPO" != "/" ]]; then - echo "Checking parent repo for PR (fork detected)..." >&2 - # Extract fork owner from CURRENT_REPO (format: owner/repo) - FORK_OWNER="${CURRENT_REPO%%/*}" - # Use fork-qualified branch name: FORK_OWNER:BRANCH - QUALIFIED_BRANCH="${FORK_OWNER}:${START_BRANCH}" - echo " Using qualified branch: $QUALIFIED_BRANCH" >&2 - PR_NUMBER=$(run_with_timeout "$GH_TIMEOUT" gh pr view --repo "$PARENT_REPO" "$QUALIFIED_BRANCH" --json number -q .number 2>/dev/null) || PR_NUMBER="" - if [[ -n "$PR_NUMBER" ]]; then - PR_LOOKUP_REPO="$PARENT_REPO" - echo "Found PR #$PR_NUMBER in parent repo: $PARENT_REPO" >&2 - fi -fi - -if [[ -z "$PR_NUMBER" ]]; then - echo "Error: No pull request found for branch '$START_BRANCH'" >&2 - echo "" >&2 - echo "Please create a pull request first:" >&2 - echo " gh pr create" >&2 - exit 1 -fi - -# Validate PR_NUMBER is numeric -if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then - echo "Error: Invalid PR number from gh CLI: $PR_NUMBER" >&2 - exit 1 -fi - -# Get PR state (using resolved repo for fork support) -PR_STATE=$(run_with_timeout "$GH_TIMEOUT" gh pr view "$PR_NUMBER" --repo "$PR_LOOKUP_REPO" --json state -q .state 2>/dev/null) || PR_STATE="" -if [[ "$PR_STATE" == "MERGED" ]]; then - echo "Error: PR #$PR_NUMBER has already been merged" >&2 - exit 1 -fi -if [[ "$PR_STATE" == "CLOSED" ]]; then - echo "Error: PR #$PR_NUMBER has been closed" >&2 - exit 1 -fi - -# IMPORTANT: Use the PR's lookup repository for API calls -# Since PR_LOOKUP_REPO was already validated to contain this PR, we can use it directly -PR_BASE_REPO="$PR_LOOKUP_REPO" - -# ======================================== -# Validate YAML Safety -# ======================================== - -# Validate branch name for YAML safety (prevents injection in state.md) -if [[ "$START_BRANCH" == *[:\#\"\'\`]* ]] || [[ "$START_BRANCH" =~ $'\n' ]]; then - echo "Error: Branch name contains YAML-unsafe characters" >&2 - echo " Branch: $START_BRANCH" >&2 - echo " Characters not allowed: : # \" ' \` newline" >&2 - echo " Please checkout a branch with a simpler name" >&2 - exit 1 -fi - -# Validate codex model for YAML safety -if [[ ! "$CODEX_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then - echo "Error: Codex model contains invalid characters" >&2 - echo " Model: $CODEX_MODEL" >&2 - echo " Only alphanumeric, hyphen, underscore, dot allowed" >&2 - exit 1 -fi - -# Validate codex effort for YAML safety -if [[ ! "$CODEX_EFFORT" =~ ^[a-zA-Z0-9_-]+$ ]]; then - echo "Error: Codex effort contains invalid characters" >&2 - echo " Effort: $CODEX_EFFORT" >&2 - echo " Only alphanumeric, hyphen, underscore allowed" >&2 - exit 1 -fi - -# ======================================== -# Setup State Directory -# ======================================== - -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/pr-loop" - -# Create timestamp for this loop session -TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) -LOOP_DIR="$LOOP_BASE_DIR/$TIMESTAMP" - -mkdir -p "$LOOP_DIR" - -# ======================================== -# Fetch Initial Comments -# ======================================== - -COMMENT_FILE="$LOOP_DIR/round-0-pr-comment.md" - -# Build comma-separated bot list for fetch script -BOTS_COMMA_LIST=$(IFS=','; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Call fetch-pr-comments.sh to get all comments, grouped by active bots -"$SCRIPT_DIR/fetch-pr-comments.sh" "$PR_NUMBER" "$COMMENT_FILE" --bots "$BOTS_COMMA_LIST" - -# ======================================== -# Determine Startup Case -# ======================================== - -# Call check-pr-reviewer-status.sh to analyze PR state -REVIEWER_STATUS=$("$SCRIPT_DIR/check-pr-reviewer-status.sh" "$PR_NUMBER" --bots "$BOTS_COMMA_LIST" 2>/dev/null) || { - echo "Warning: Failed to check reviewer status, defaulting to Case 1" >&2 - REVIEWER_STATUS='{"case":1,"reviewers_commented":[],"reviewers_missing":[],"latest_commit_sha":"","latest_commit_at":"","newest_review_at":null,"has_commits_after_reviews":false}' -} - -# Parse reviewer status JSON -STARTUP_CASE=$(echo "$REVIEWER_STATUS" | jq -r '.case') -LATEST_COMMIT_SHA=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_sha') -LATEST_COMMIT_AT=$(echo "$REVIEWER_STATUS" | jq -r '.latest_commit_at') -HAS_COMMITS_AFTER=$(echo "$REVIEWER_STATUS" | jq -r '.has_commits_after_reviews') - -# Fallback to git HEAD if API didn't return commit SHA -if [[ -z "$LATEST_COMMIT_SHA" ]] || [[ "$LATEST_COMMIT_SHA" == "null" ]]; then - LATEST_COMMIT_SHA=$(run_with_timeout "$GIT_TIMEOUT" git rev-parse HEAD) -fi - -echo "Startup Case: $STARTUP_CASE" >&2 -echo "Latest Commit: $LATEST_COMMIT_SHA" >&2 - -# Handle Case 4/5: All reviewers commented but new commits exist -# Need to trigger re-review by posting @bot comment -LAST_TRIGGER_AT="" -TRIGGER_COMMENT_ID="" - -if [[ "$STARTUP_CASE" -eq 4 ]] || [[ "$STARTUP_CASE" -eq 5 ]]; then - # First, check if there's already a pending @mention after the latest commit - # This avoids duplicate @mention spam when user has already requested re-review - echo "Case $STARTUP_CASE: Checking for existing trigger comment after latest commit..." >&2 - - # Build regex patterns for bot mentions with word boundary anchoring - # Pattern: (start|non-username-char) + @botname + (end|non-username-char) - # Prevents false matches like @claude-dev or support@codex.io - MENTION_PATTERNS_JSON=$(printf '%s\n' "${ACTIVE_BOTS_ARRAY[@]}" | jq -R '"(^|[^a-zA-Z0-9_-])@" + . + "($|[^a-zA-Z0-9_-])"' | jq -s '.') - - # Find existing trigger comment that mentions ALL active bots after latest commit - # Notes: - # - Uses PR_BASE_REPO for fork PR support - # - Uses jq -s to aggregate paginated results before filtering - # - Reuse only when ALL bots are mentioned (partial mentions need new trigger) - # - Strips code blocks/inline code/quotes since GitHub ignores mentions there - if [[ -n "$LATEST_COMMIT_AT" && "$LATEST_COMMIT_AT" != "null" ]]; then - EXISTING_TRIGGER=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate 2>/dev/null \ - | jq -s --arg since "$LATEST_COMMIT_AT" --argjson patterns "$MENTION_PATTERNS_JSON" ' - # Strip content between delimiters, keeping even-indexed parts (outside delimiters) - # Used for fenced code blocks where regex fails on nested backticks - def strip_between(delim): [splits(delim)] | to_entries | map(select(.key % 2 == 0) | .value) | join(" "); - - # Strip code blocks, inline code, and quoted lines (GitHub ignores mentions in these) - def strip_non_mention_contexts: - strip_between("```") # fenced code blocks - | strip_between("~~~") # tilde fenced code blocks - | gsub("`[^`]*`"; " ") # inline code - | gsub("(^|\\n)( |\\t)[^\\n]*"; " ") # indented code blocks (4+ spaces or tab) - | gsub("(^|\\n)\\s*>[^\\n]*"; " "); # quoted lines (> prefix) - - [.[][] | select(.created_at > $since and ( - # Check that ALL patterns are present in the stripped body - # Use case-insensitive matching since GitHub mentions are case-insensitive - (.body | strip_non_mention_contexts) as $clean_body - | $patterns | all(. as $p | $clean_body | test($p; "i")) - ))] - | sort_by(.created_at) - | last - | {id: .id, created_at: .created_at} - ') || EXISTING_TRIGGER="" - else - EXISTING_TRIGGER="" - fi - - # Extract fields once to avoid repeated jq calls - # Skip jq parsing if EXISTING_TRIGGER is empty (API failure fallback) - if [[ -n "$EXISTING_TRIGGER" ]]; then - TRIGGER_COMMENT_ID=$(echo "$EXISTING_TRIGGER" | jq -r '.id // empty' 2>/dev/null) || TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT=$(echo "$EXISTING_TRIGGER" | jq -r '.created_at // empty' 2>/dev/null) || LAST_TRIGGER_AT="" - else - TRIGGER_COMMENT_ID="" - LAST_TRIGGER_AT="" - fi - - if [[ -n "$TRIGGER_COMMENT_ID" ]]; then - # Found existing @mention - reuse it instead of posting new one - echo "Found existing trigger comment (ID: $TRIGGER_COMMENT_ID), skipping duplicate @mention" >&2 - else - # No existing @mention - post new trigger - echo "No existing trigger found, posting trigger comment for re-review..." >&2 - - # Post trigger comment (abort on failure to prevent orphaned state) - # NOTE: Uses --repo for fork PR support (comments go to base repo, not fork) - TRIGGER_BODY="$BOT_MENTION_STRING please review the latest changes (new commits since last review)" - TRIGGER_RESULT=$(run_with_timeout "$GH_TIMEOUT" gh pr comment "$PR_NUMBER" --repo "$PR_BASE_REPO" --body "$TRIGGER_BODY" 2>&1) || { - echo "Error: Failed to post trigger comment: $TRIGGER_RESULT" >&2 - echo "" >&2 - echo "Cannot proceed without a trigger comment - bots would not be notified." >&2 - echo "Please check:" >&2 - echo " - GitHub API rate limits" >&2 - echo " - Network connectivity" >&2 - echo " - Repository permissions" >&2 - rm -rf "$LOOP_DIR" - exit 1 - } - - # Get the comment ID and use GitHub's timestamp to avoid clock skew - # Fetch the latest comment from current user - CURRENT_USER=$(run_with_timeout "$GH_TIMEOUT" gh api user --jq '.login' 2>/dev/null) || CURRENT_USER="" - if [[ -n "$CURRENT_USER" ]]; then - # Fetch both ID and created_at from the comment we just posted - # IMPORTANT: --jq with --paginate runs per-page, so aggregate first then filter - # IMPORTANT: Use PR_BASE_REPO for fork PR support - COMMENT_DATA=$(run_with_timeout "$GH_TIMEOUT" gh api "repos/$PR_BASE_REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq ".[] | select(.user.login == \"$CURRENT_USER\") | {id: .id, created_at: .created_at}" 2>/dev/null \ - | jq -s 'sort_by(.created_at) | reverse | .[0]') || COMMENT_DATA="" - - if [[ -n "$COMMENT_DATA" && "$COMMENT_DATA" != "null" ]]; then - TRIGGER_COMMENT_ID=$(echo "$COMMENT_DATA" | jq -r '.id // empty') - # Use GitHub's timestamp instead of local time to avoid clock skew - LAST_TRIGGER_AT=$(echo "$COMMENT_DATA" | jq -r '.created_at // empty') - fi - fi - - # NOTE: Do NOT fall back to local time if GitHub timestamp fetch failed. - # Local clock skew could set a future timestamp, causing stop hook to filter - # out all comments. The stop hook has its own trigger detection logic that - # will find the trigger comment if LAST_TRIGGER_AT is empty. - fi - - # If --claude is specified, verify eyes reaction (MANDATORY per plan) - if [[ "$BOT_CLAUDE" == "true" ]]; then - echo "Verifying Claude eyes reaction (3 attempts x 5 seconds)..." >&2 - - if [[ -z "$TRIGGER_COMMENT_ID" ]]; then - # Fail if trigger comment ID not found (can't verify eyes without it) - echo "Error: Could not find trigger comment ID for eyes verification" >&2 - echo "" >&2 - echo "The trigger comment was posted but its ID could not be retrieved." >&2 - echo "This prevents verification of Claude's eyes reaction." >&2 - echo "" >&2 - echo "Please try:" >&2 - echo " 1. Wait a moment and try again" >&2 - echo " 2. Check GitHub rate limits" >&2 - echo " 3. Verify the comment was posted successfully" >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - - # Check for eyes reaction with retry - # Pass --pr for fork PR support (reactions are on base repo) - if ! "$SCRIPT_DIR/check-bot-reactions.sh" claude-eyes "$TRIGGER_COMMENT_ID" --pr "$PR_NUMBER" --retry 3 --delay 5 >/dev/null 2>&1; then - echo "Error: Claude bot did not respond with eyes reaction" >&2 - echo "" >&2 - echo "This may indicate:" >&2 - echo " - Claude bot is not configured on this repository" >&2 - echo " - Network issues preventing Claude from seeing the mention" >&2 - echo "" >&2 - echo "Please verify Claude bot is set up correctly on this repository." >&2 - - # Clean up the loop directory since we're failing - rm -rf "$LOOP_DIR" - exit 1 - fi - echo "Claude eyes reaction confirmed!" >&2 - fi -fi - -# ======================================== -# Create State File -# ======================================== - -# Build YAML list for active_bots and configured_bots (using shared helper) -ACTIVE_BOTS_YAML=$(build_yaml_list "${ACTIVE_BOTS_ARRAY[@]}") - -# configured_bots is identical to active_bots at start, but never changes -# This allows re-polling previously approved bots if they post new issues -CONFIGURED_BOTS_YAML="$ACTIVE_BOTS_YAML" - -cat > "$LOOP_DIR/state.md" << EOF ---- -current_round: 0 -max_iterations: $MAX_ITERATIONS -pr_number: $PR_NUMBER -start_branch: $START_BRANCH -configured_bots:${CONFIGURED_BOTS_YAML} -active_bots:${ACTIVE_BOTS_YAML} -codex_model: $CODEX_MODEL -codex_effort: $CODEX_EFFORT -codex_timeout: $CODEX_TIMEOUT -poll_interval: $POLL_INTERVAL -poll_timeout: $POLL_TIMEOUT -started_at: $(date -u +%Y-%m-%dT%H:%M:%SZ) -startup_case: $STARTUP_CASE -latest_commit_sha: $LATEST_COMMIT_SHA -latest_commit_at: ${LATEST_COMMIT_AT:-} -last_trigger_at: ${LAST_TRIGGER_AT:-} -trigger_comment_id: ${TRIGGER_COMMENT_ID:-} ---- -EOF - -# ======================================== -# Create Goal Tracker -# ======================================== - -GOAL_TRACKER_FILE="$LOOP_DIR/goal-tracker.md" - -# Build display string for active bots -ACTIVE_BOTS_DISPLAY=$(IFS=', '; echo "${ACTIVE_BOTS_ARRAY[*]}") - -# Build acceptance criteria rows for each bot -BOT_AC_ROWS="" -AC_NUM=1 -for bot in "${ACTIVE_BOTS_ARRAY[@]}"; do - BOT_AC_ROWS="${BOT_AC_ROWS}| AC-${AC_NUM} | Get approval from ${bot} | ${bot} | pending | -" - AC_NUM=$((AC_NUM + 1)) -done - -# Current timestamp for log -STARTED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) - -# Goal tracker template variables -GOAL_TRACKER_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "STARTUP_CASE=$STARTUP_CASE" - "BOT_AC_ROWS=$BOT_AC_ROWS" - "STARTED_AT=$STARTED_AT" -) - -FALLBACK_GOAL_TRACKER="# PR Loop Goal Tracker - -## PR Information - -- **PR Number:** #$PR_NUMBER -- **Branch:** $START_BRANCH -- **Monitored Bots:** $ACTIVE_BOTS_DISPLAY -- **Startup Case:** $STARTUP_CASE - -## Ultimate Goal - -Get all monitored bot reviewers ($ACTIVE_BOTS_DISPLAY) to approve this PR. - -## Acceptance Criteria - -| AC | Description | Bot | Status | -|----|-------------|-----|--------| -${BOT_AC_ROWS} -## Current Status - -### Round 0: Initialization - -- **Phase:** Waiting for initial bot reviews -- **Active Bots:** $ACTIVE_BOTS_DISPLAY -- **Approved Bots:** (none yet) - -### Open Issues - -| Round | Bot | Issue | Status | -|-------|-----|-------|--------| -| - | - | (awaiting initial reviews) | pending | - -### Addressed Issues - -| Round | Bot | Issue | Resolution | -|-------|-----|-------|------------| - -## Log - -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $STARTED_AT | PR loop initialized (Case $STARTUP_CASE) | -" - -GOAL_TRACKER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/goal-tracker-initial.md" "$FALLBACK_GOAL_TRACKER" "${GOAL_TRACKER_VARS[@]}") -echo "$GOAL_TRACKER_CONTENT" > "$GOAL_TRACKER_FILE" - -echo "Goal tracker created: $GOAL_TRACKER_FILE" >&2 - -# ======================================== -# Create Initial Prompt -# ======================================== - -RESOLVE_PATH="$LOOP_DIR/round-0-pr-resolve.md" - -# Detect if comments exist by checking for the "No comments found" sentinel -# fetch-pr-comments.sh outputs "*No comments found.*" only when there are zero comments -if grep -q '^\*No comments found\.\*$' "$COMMENT_FILE" 2>/dev/null; then - COMMENT_COUNT=0 -else - COMMENT_COUNT=1 # Non-zero indicates comments exist -fi - -# Template variables for rendering -TEMPLATE_VARS=( - "PR_NUMBER=$PR_NUMBER" - "START_BRANCH=$START_BRANCH" - "ACTIVE_BOTS_DISPLAY=$ACTIVE_BOTS_DISPLAY" - "RESOLVE_PATH=$RESOLVE_PATH" - "BOT_MENTION_STRING=$BOT_MENTION_STRING" -) - -# Fallback header (used if template fails to load) -FALLBACK_HEADER="Read and execute below with ultrathink - -## PR Review Loop (Round 0) - -You are in a PR review loop monitoring feedback from remote review bots. - -**PR Information:** -- PR Number: #{{PR_NUMBER}} -- Branch: {{START_BRANCH}} -- Active Bots: {{ACTIVE_BOTS_DISPLAY}} - -## Review Comments - -The following comments have been fetched from the PR: -" - -# Load and render header template -HEADER_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-header.md" "$FALLBACK_HEADER" "${TEMPLATE_VARS[@]}") - -# Write header to prompt file -echo "$HEADER_CONTENT" > "$LOOP_DIR/round-0-prompt.md" - -# Append the fetched comments -cat "$COMMENT_FILE" >> "$LOOP_DIR/round-0-prompt.md" - -# Select task template based on whether there are comments -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - # No comments yet - this is a fresh PR, bots will review automatically - FALLBACK_TASK=" ---- - -## Your Task - -This PR has no review comments yet. The monitored bots ({{ACTIVE_BOTS_DISPLAY}}) will automatically review the PR - you do NOT need to comment to trigger the first review. - -1. **Wait for automatic bot reviews**: - - Simply write your summary and try to exit - - The Stop Hook will poll for the first bot reviews - -2. **Write your initial summary** to: @{{RESOLVE_PATH}} - - Note that this is Round 0 awaiting initial bot reviews - - No issues to address yet - ---- - -## Important Rules - -1. **Do not comment to trigger review**: First reviews are automatic -2. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -3. **Trust the process**: The Stop Hook manages polling and Codex validation - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues are found, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-no-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -else - # Has comments - normal flow with issues to address - FALLBACK_TASK=" ---- - -## Your Task - -1. **Analyze the comments above**, prioritizing: - - Human comments first (they take precedence) - - Bot comments (newest first) - -2. **Fix any issues** identified by the reviewers: - - Read the relevant code files - - Make necessary changes - - Create appropriate tests if needed - -3. **After fixing issues**: - - Commit your changes with a descriptive message - - Push to the remote repository - - Comment on the PR to trigger re-review: - \`\`\`bash - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review the latest changes\" - \`\`\` - -4. **Write your resolution summary** to: @{{RESOLVE_PATH}} - - List what issues were addressed - - Files modified - - Tests added (if any) - ---- - -## Important Rules - -1. **Do not modify state files**: The .humanize/pr-loop/ files are managed by the system -2. **Always push changes**: Your fixes must be pushed for bots to review them -3. **Use the correct comment format**: Tag the bots to trigger their reviews -4. **Be thorough**: Address all valid concerns from the reviewers - ---- - -Note: After you write your summary and try to exit, the Stop Hook will: -1. Poll for new bot reviews (every 30 seconds, up to 15 minutes per bot) -2. When reviews arrive, local Codex will validate if they indicate approval -3. If issues remain, you will receive feedback and continue -4. If all bots approve, the loop ends -" - TASK_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/round-0-task-has-comments.md" "$FALLBACK_TASK" "${TEMPLATE_VARS[@]}") -fi - -# Append task section to prompt file -echo "$TASK_CONTENT" >> "$LOOP_DIR/round-0-prompt.md" - -# ======================================== -# Output Setup Message -# ======================================== - -# All important work is done. If output fails due to SIGPIPE (pipe closed), exit cleanly. -trap 'exit 0' PIPE - -cat << EOF -=== start-pr-loop activated === - -PR Number: #$PR_NUMBER -Branch: $START_BRANCH -Active Bots: $ACTIVE_BOTS_DISPLAY -Comments Fetched: $COMMENT_COUNT -Max Iterations: $MAX_ITERATIONS -Codex Model: $CODEX_MODEL -Codex Effort: $CODEX_EFFORT -Codex Timeout: ${CODEX_TIMEOUT}s -Poll Interval: ${POLL_INTERVAL}s -Poll Timeout: ${POLL_TIMEOUT}s (per bot) -Loop Directory: $LOOP_DIR - -The PR loop is now active. When you try to exit: -1. Stop Hook polls for new bot reviews (every 30s) -2. When reviews arrive, local Codex validates them -3. If issues remain, you'll receive feedback and continue -4. If all bots approve, the loop ends - -To cancel: /humanize:cancel-pr-loop - ---- - -EOF - -# Output the initial prompt -cat "$LOOP_DIR/round-0-prompt.md" - -# Output critical requirements based on whether there are comments -echo "" -if [[ "$COMMENT_COUNT" -eq 0 ]]; then - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should note: - - This is Round 0 awaiting initial bot reviews - - No issues to address yet - -2. Try to exit - the Stop Hook will poll for bot reviews - -DO NOT comment on the PR to trigger review - the bots will -review automatically since this is a new PR. - -The Stop Hook will poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-no-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -else - FALLBACK_CRITICAL=" -=========================================== -CRITICAL - Work Completion Requirements -=========================================== - -When you complete your work, you MUST: - -1. COMMIT and PUSH your changes: - - Create a commit with descriptive message - - Push to the remote repository - -2. Comment on the PR to trigger re-review: - gh pr comment {{PR_NUMBER}} --body \"{{BOT_MENTION_STRING}} please review\" - -3. Write your resolution summary to: - {{RESOLVE_PATH}} - - The summary should include: - - Issues addressed - - Files modified - - Tests added (if any) - -The Stop Hook will then poll for bot reviews. -===========================================" - CRITICAL_CONTENT=$(load_and_render_safe "$TEMPLATE_DIR" "pr-loop/critical-requirements-has-comments.md" "$FALLBACK_CRITICAL" "${TEMPLATE_VARS[@]}") -fi -echo "$CRITICAL_CONTENT" - -# Explicit exit 0 to ensure clean exit code even if final output fails -exit 0 diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 47089b6f..9d45363c 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -325,7 +325,7 @@ done PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}" -# loop-common.sh already sourced above (provides find_active_loop, find_active_pr_loop, etc.) +# loop-common.sh already sourced above (provides find_active_loop, etc.) # ======================================== # Required Dependency Check @@ -362,10 +362,8 @@ fi # Mutual Exclusion Check # ======================================== -# Check for existing active loops (both RLCR and PR loops) -# Only one loop type can be active at a time +# Check for existing active RLCR loop RLCR_LOOP_DIR=$(find_active_loop "$PROJECT_ROOT/.humanize/rlcr" 2>/dev/null || echo "") -PR_LOOP_DIR=$(find_active_pr_loop "$PROJECT_ROOT/.humanize/pr-loop" 2>/dev/null || echo "") if [[ -n "$RLCR_LOOP_DIR" ]]; then echo "Error: An RLCR loop is already active" >&2 @@ -376,15 +374,6 @@ if [[ -n "$RLCR_LOOP_DIR" ]]; then exit 1 fi -if [[ -n "$PR_LOOP_DIR" ]]; then - echo "Error: A PR loop is already active" >&2 - echo " Active loop: $PR_LOOP_DIR" >&2 - echo "" >&2 - echo "Only one loop can be active at a time." >&2 - echo "Cancel the PR loop first with: /humanize:cancel-pr-loop" >&2 - exit 1 -fi - # ======================================== # Agent Teams Validation # ======================================== diff --git a/skills/humanize/SKILL.md b/skills/humanize/SKILL.md index 2795eccc..5b0a6af3 100644 --- a/skills/humanize/SKILL.md +++ b/skills/humanize/SKILL.md @@ -1,6 +1,6 @@ --- name: humanize -description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops, plus PR review automation with bot monitoring. +description: Iterative development with AI review. Provides RLCR (Ralph-Loop with Codex Review) for implementation planning and code review loops. user-invocable: false disable-model-invocation: true --- @@ -47,19 +47,7 @@ The RLCR (Ralph-Loop with Codex Review) loop has two phases: - If no issues → loop completes with Finalize Phase - On Codex CLI `0.114.0+` with `codex_hooks` enabled, Humanize installs a native `Stop` hook so exit gating runs automatically -### 2. PR Loop - Automated PR Review Handling - -Automates handling of GitHub PR reviews from remote bots: - -1. Detects the PR associated with the current branch -2. Fetches review comments from specified bot(s) (`--claude` and/or `--codex`) -3. AI analyzes and fixes issues identified by the bot(s) -4. Pushes changes and triggers re-review by commenting @bot -5. Stop Hook polls for new bot reviews (every 30s, 15min timeout per bot) -6. Local Codex validates if remote concerns are resolved -7. Loop continues until all bots approve or max iterations reached - -### 3. Generate Plan - Structured Plan from Draft +### 2. Generate Plan - Structured Plan from Draft Transforms a rough draft document into a structured implementation plan with: - Clear goal description @@ -106,30 +94,6 @@ After each round, write the required summary and stop/exit normally. Humanize's "{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-rlcr-loop.sh" --force ``` -### Start PR Loop - -```bash -# Monitor claude[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude - -# Monitor chatgpt-codex-connector[bot] reviews -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --codex - -# Monitor both -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-pr-loop.sh" --claude --codex -``` - -**Common Options:** -- `--max N` - Maximum iterations (default: 42) -- `--codex-model MODEL:EFFORT` - Codex model for validation (default: gpt-5.4:medium) -- `--codex-timeout SECONDS` - Timeout for Codex validation (default: 900) - -### Cancel PR Loop - -```bash -"{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-pr-loop.sh" -``` - ### Generate Plan from Draft ```bash @@ -211,7 +175,7 @@ The RLCR loop uses a Goal Tracker to prevent goal drift: ## Prerequisites - `codex` - OpenAI Codex CLI (for review) -- `gh` - GitHub CLI (for PR loop) + ## Directory Structure @@ -231,10 +195,6 @@ Humanize stores all data in `.humanize/`: │ ├── methodology-analysis-report.md │ ├── methodology-analysis-done.md │ └── complete-state.md -├── pr-loop/ # PR loop data -│ └── / -│ ├── state.md -│ └── resolution-N.md └── skill/ # One-shot skill results └── / ├── input.md @@ -249,7 +209,6 @@ Use the monitor script to track loop progress: ```bash source "{{HUMANIZE_RUNTIME_ROOT}}/scripts/humanize.sh" humanize monitor rlcr # Monitor RLCR loop -humanize monitor pr # Monitor PR loop ``` ## Exit Codes diff --git a/tests/mocks/gh b/tests/mocks/gh index d91a58c9..7d4dff06 100755 --- a/tests/mocks/gh +++ b/tests/mocks/gh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Mock gh CLI for testing PR loop functionality +# Mock gh CLI for testing GitHub API interactions # # This mock intercepts gh commands and returns predefined responses # based on environment variables and fixture files. diff --git a/tests/robustness/test-concurrent-state-robustness.sh b/tests/robustness/test-concurrent-state-robustness.sh index ad72b3fc..57115bcf 100755 --- a/tests/robustness/test-concurrent-state-robustness.sh +++ b/tests/robustness/test-concurrent-state-robustness.sh @@ -386,61 +386,6 @@ else fail "Unicode content" "2" "$ROUND" fi -# ======================================== -# PR Loop State Tests -# ======================================== - -echo "" -echo "--- PR Loop State Tests ---" -echo "" - -# Test 16: find_active_pr_loop works correctly -echo "Test 16: find_active_pr_loop detection" -mkdir -p "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00" -cat > "$TEST_DIR/pr-loops/pr-loop/2026-01-19_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -ACTIVE=$(find_active_pr_loop "$TEST_DIR/pr-loops/pr-loop" 2>/dev/null || echo "") -if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "find_active_pr_loop works correctly" -else - fail "find_active_pr_loop" "*2026-01-19*" "$ACTIVE" -fi - -# Test 17: PR loop state with YAML list for active_bots -echo "" -echo "Test 17: PR loop state with YAML list" -mkdir -p "$TEST_DIR/pr-yaml" -cat > "$TEST_DIR/pr-yaml/state.md" << 'EOF' ---- -current_round: 1 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex ---- -EOF - -# Test that we can read the state file without errors -if [[ -f "$TEST_DIR/pr-yaml/state.md" ]]; then - # Check if file contains expected YAML structure - if grep -q "^ - claude$" "$TEST_DIR/pr-yaml/state.md" && \ - grep -q "^ - codex$" "$TEST_DIR/pr-yaml/state.md"; then - pass "PR loop YAML list format validated" - else - fail "YAML list format" "list items" "missing" - fi -else - fail "YAML list" "file exists" "file not found" -fi - # ======================================== # Stale Loop Detection Tests # ======================================== diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 21dd9fb0..1d4a21f5 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -6,7 +6,6 @@ # - loop-edit-validator.sh # - loop-plan-file-validator.sh # - loop-codex-stop-hook.sh (state parsing) -# - pr-loop-stop-hook.sh (state parsing) # # Focus areas: # - JSON input validation edge cases @@ -658,22 +657,6 @@ else fail "Missing state handling" "exit 0, no block decision" "exit=$EXIT_CODE, output=$OUTPUT" fi -# Test 17: PR stop hook handles missing state gracefully (allows exit) -echo "" -echo "Test 17: PR stop hook allows exit when no state directory" -mkdir -p "$TEST_DIR/no-pr-state" - -set +e -OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/no-pr-state" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) -EXIT_CODE=$? -set -e -# Should exit 0, no block decision -if [[ $EXIT_CODE -eq 0 ]] && ! echo "$OUTPUT" | grep -q '"decision".*:.*"block"'; then - pass "PR stop hook allows exit when no state (no block decision)" -else - fail "PR missing state" "exit 0, no block decision" "exit=$EXIT_CODE" -fi - # Test 18: Stop hook with corrupted state file outputs block decision echo "" echo "Test 18: Stop hook with corrupted state outputs decision" diff --git a/tests/robustness/test-pr-loop-api-fetch.sh b/tests/robustness/test-pr-loop-api-fetch.sh deleted file mode 100755 index 489a1cee..00000000 --- a/tests/robustness/test-pr-loop-api-fetch.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop API fetch/state tests (parallel split 1/2) -# -# Runs Tests 1-11: PR Loop State Handling + fetch-pr-comments + -# Bot Response Parsing + JSON Edge Cases -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_fetch_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_fetch_tests -print_test_summary "PR Loop API Fetch Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-poll.sh b/tests/robustness/test-pr-loop-api-poll.sh deleted file mode 100755 index f56d5594..00000000 --- a/tests/robustness/test-pr-loop-api-poll.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop API poll/stop-hook tests (parallel split 2/2) -# -# Runs Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -# -# Sources the shared test library from test-pr-loop-api-robustness.sh -# and invokes the run_poll_tests group function. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "$SCRIPT_DIR/test-pr-loop-api-robustness.sh" - -run_poll_tests -print_test_summary "PR Loop API Poll Tests" -exit $? diff --git a/tests/robustness/test-pr-loop-api-robustness.sh b/tests/robustness/test-pr-loop-api-robustness.sh deleted file mode 100755 index d8e5097a..00000000 --- a/tests/robustness/test-pr-loop-api-robustness.sh +++ /dev/null @@ -1,866 +0,0 @@ -#!/usr/bin/env bash -# -# Robustness tests for PR loop API handling -# -# Tests PR loop behavior under API error conditions by invoking actual -# PR loop scripts with mocked gh commands: -# - API failure handling -# - Rate limiting responses -# - Bot response JSON parsing -# - Network error simulation -# - PR loop state file handling -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -source "$PROJECT_ROOT/hooks/lib/loop-common.sh" -source "$SCRIPT_DIR/../test-helpers.sh" - -setup_test_dir - -echo "========================================" -echo "PR Loop API Robustness Tests" -echo "========================================" -echo "" - -# ======================================== -# Helper Functions -# ======================================== - -# Create a comprehensive mock gh that handles repo view, pr view, and api calls -# This allows fetch-pr-comments.sh to run end-to-end -create_mock_gh() { - local dir="$1" - local behavior="$2" # "empty_array", "rate_limit", "network_error", "bot_comments", etc. - mkdir -p "$dir/bin" - - # Base mock that handles repo view and pr view for all behaviors - # Note: gh CLI applies -q jq queries internally, so we output the final result - # fetch-pr-comments.sh uses: gh repo view --json owner,name -q '...' - # gh pr view PR --repo REPO --json number -q .number - cat > "$dir/bin/gh" << 'GHEOF_START' -#!/usr/bin/env bash -# Mock gh command for testing - -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view (required by fetch-pr-comments.sh) -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # -q query extracts owner.login + "/" + name - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - # parent query returns empty/null for non-fork - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi - -# Handle pr view (required by fetch-pr-comments.sh) -# PR existence check uses: gh pr view --repo REPO --json number -q .number -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - echo "PR #123" - exit 0 -fi - -# Handle api calls based on behavior -GHEOF_START - - # Add behavior-specific api handling - case "$behavior" in - empty_array) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - rate_limit) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo '{"message":"API rate limit exceeded","documentation_url":"https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting"}' >&2 - exit 1 -fi -echo "[]" -exit 0 -GHEOF - ;; - network_error) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "Connection refused" >&2 - exit 6 -fi -echo "[]" -exit 0 -GHEOF - ;; - auth_failure) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "You are not logged into any GitHub hosts" >&2 - exit 1 -fi -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - claude_approval) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Claude bot approval for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM! The implementation looks good.","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - codex_issues) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return Codex bot with issues for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"[P1] Critical issue found\n[P2] Minor issue","created_at":"2026-01-19T12:00:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - mixed_bots) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - # Return mixed bot responses for issue comments endpoint - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - cat << 'JSON' -[{"id":1,"user":{"login":"claude[bot]","type":"Bot"},"body":"LGTM","created_at":"2026-01-19T12:00:00Z"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]","type":"Bot"},"body":"Approved","created_at":"2026-01-19T12:01:00Z"}] -JSON - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - unicode_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - printf '[{"id":1,"user":{"login":"bot","type":"Bot"},"body":"Good work! \u2705 \u2728","created_at":"2026-01-19T12:00:00Z"}]\n' - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - long_comment) - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Generate a long comment body - LONG_BODY=$(head -c 10000 /dev/zero 2>/dev/null | tr '\0' 'a' || printf 'a%.0s' {1..10000}) - echo "[{\"id\":1,\"user\":{\"login\":\"bot\",\"type\":\"Bot\"},\"body\":\"$LONG_BODY\",\"created_at\":\"2026-01-19T12:00:00Z\"}]" - exit 0 - fi - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - *) - # Default: return empty array for api calls - cat >> "$dir/bin/gh" << 'GHEOF' -if [[ "$1" == "api" ]]; then - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - ;; - esac - chmod +x "$dir/bin/gh" -} - -create_pr_loop_state() { - local dir="$1" - local round="${2:-0}" - mkdir -p "$dir/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$dir/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << EOF ---- -current_round: $round -max_iterations: 42 -pr_number: 123 -pr_owner: testowner -pr_repo: testrepo -base_branch: main -configured_bots: - - claude - - codex -active_bots: - - claude -startup_case: 3 -review_started: false ---- -EOF -} - -init_basic_git_repo() { - local dir="$1" - cd "$dir" - git init -q - git config user.email "test@test.com" - git config user.name "Test User" - git config commit.gpgsign false - git checkout -q -b main 2>/dev/null || git checkout -q main - echo "initial" > file.txt - git add file.txt - git commit -q -m "Initial commit" - cd - > /dev/null -} - -# ======================================== -# Test Group Functions -# ======================================== - -# Tests 1-11: PR Loop State Handling + fetch-pr-comments + Bot Response Parsing + JSON Edge Cases -run_fetch_tests() { - - # ======================================== - # PR Loop State Handling Tests - # ======================================== - - echo "--- PR Loop State Handling Tests ---" - echo "" - - # Test 1: find_active_pr_loop detects PR loop state - echo "Test 1: PR loop state detection" - mkdir -p "$TEST_DIR/prloop1/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/prloop1" - - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop1/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ "$ACTIVE" == *"2026-01-19"* ]]; then - pass "PR loop state detected" - else - fail "PR loop detection" "*2026-01-19*" "$ACTIVE" - fi - - # Test 2: PR loop with YAML list active_bots - echo "" - echo "Test 2: PR loop with YAML list active_bots" - mkdir -p "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -active_bots: - - claude - - codex -configured_bots: - - claude - - codex -base_branch: main -review_started: false ---- -EOF - - # Verify the file can be read - if grep -q "active_bots:" "$TEST_DIR/prloop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md"; then - pass "YAML list active_bots format accepted" - else - fail "YAML list format" "contains active_bots" "not found" - fi - - # Test 3: PR loop state with missing pr_number - echo "" - echo "Test 3: PR loop state with missing pr_number" - mkdir -p "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00" - cat > "$TEST_DIR/prloop3/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -configured_bots: - - claude -base_branch: main -review_started: false ---- -EOF - - # Should still be detectable as an active loop - ACTIVE=$(find_active_pr_loop "$TEST_DIR/prloop3/.humanize/pr-loop" 2>/dev/null || echo "") - if [[ -n "$ACTIVE" ]]; then - pass "PR loop without pr_number still detected" - else - fail "Missing pr_number" "detected" "not detected" - fi - - # ======================================== - # fetch-pr-comments.sh Tests - # ======================================== - - echo "" - echo "--- fetch-pr-comments.sh Script Tests ---" - echo "" - - # Test 4: Empty JSON array handled by fetch-pr-comments - echo "Test 4: Empty PR comments creates valid output file" - mkdir -p "$TEST_DIR/fetch1" - init_basic_git_repo "$TEST_DIR/fetch1" - create_mock_gh "$TEST_DIR/fetch1" "empty_array" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Must succeed AND create output file with expected content - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/fetch1/comments.md" ]]; then - # Verify output contains expected structure - if grep -q "PR Comments for #123" "$TEST_DIR/fetch1/comments.md" && \ - grep -q "testowner/testrepo" "$TEST_DIR/fetch1/comments.md"; then - pass "Empty PR comments creates valid output (PR#, repo in file)" - else - fail "Empty PR output" "contains PR# and repo" "$(head -10 "$TEST_DIR/fetch1/comments.md")" - fi - else - fail "Empty PR comments" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 5: Rate limit error produces warning in output - echo "" - echo "Test 5: Rate limit error produces warning" - mkdir -p "$TEST_DIR/fetch2" - init_basic_git_repo "$TEST_DIR/fetch2" - create_mock_gh "$TEST_DIR/fetch2" "rate_limit" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Script may still create output file with warnings about API failures - if [[ -f "$TEST_DIR/fetch2/comments.md" ]]; then - # Check for warning about API failures - if grep -qi "warning\|failed" "$TEST_DIR/fetch2/comments.md" || echo "$OUTPUT" | grep -qi "failed\|error"; then - pass "Rate limit produces warning (exit=$EXIT_CODE)" - else - pass "Rate limit handled gracefully (exit=$EXIT_CODE)" - fi - else - # Non-zero exit without file is acceptable for API errors - if [[ $EXIT_CODE -ne 0 ]]; then - pass "Rate limit error returns non-zero exit ($EXIT_CODE)" - else - fail "Rate limit handling" "non-zero exit or warning" "exit 0, no file" - fi - fi - - # Test 6: Network error handled gracefully - echo "" - echo "Test 6: Network error handled gracefully" - mkdir -p "$TEST_DIR/fetch3" - init_basic_git_repo "$TEST_DIR/fetch3" - create_mock_gh "$TEST_DIR/fetch3" "network_error" - - set +e - OUTPUT=$(PATH="$TEST_DIR/fetch3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/fetch3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - # Network errors should produce non-zero exit or warning - if [[ $EXIT_CODE -ne 0 ]] || echo "$OUTPUT" | grep -qi "error\|failed\|connection"; then - pass "Network error handled (exit=$EXIT_CODE)" - else - fail "Network error handling" "non-zero exit or error message" "exit=$EXIT_CODE" - fi - - # ======================================== - # Bot Response Parsing Tests (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- Bot Response Parsing Tests ---" - echo "" - - # Test 7: Claude bot comments parsed and formatted in output - echo "Test 7: Claude bot comments appear in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot1" - init_basic_git_repo "$TEST_DIR/bot1" - create_mock_gh "$TEST_DIR/bot1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot1/comments.md" ]]; then - # Verify Claude bot comment appears in formatted output - if grep -q "claude\[bot\]" "$TEST_DIR/bot1/comments.md" && grep -q "LGTM" "$TEST_DIR/bot1/comments.md"; then - pass "Claude bot comment parsed and formatted in output" - else - fail "Claude parsing" "claude[bot] and LGTM in output" "$(cat "$TEST_DIR/bot1/comments.md")" - fi - else - fail "Claude bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 8: Codex bot with severity markers parsed correctly - echo "" - echo "Test 8: Codex bot severity markers in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot2" - init_basic_git_repo "$TEST_DIR/bot2" - create_mock_gh "$TEST_DIR/bot2" "codex_issues" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot2/comments.md" ]]; then - # Verify Codex severity markers appear in output - if grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot2/comments.md" && grep -q "\[P1\]" "$TEST_DIR/bot2/comments.md"; then - pass "Codex severity markers parsed in output" - else - fail "Codex parsing" "[P1] marker in output" "$(cat "$TEST_DIR/bot2/comments.md")" - fi - else - fail "Codex bot test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 9: Multiple bot responses both appear in output - echo "" - echo "Test 9: Multiple bots in fetch-pr-comments output" - mkdir -p "$TEST_DIR/bot3" - init_basic_git_repo "$TEST_DIR/bot3" - create_mock_gh "$TEST_DIR/bot3" "mixed_bots" - - set +e - OUTPUT=$(PATH="$TEST_DIR/bot3/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/bot3/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/bot3/comments.md" ]]; then - # Verify both bots appear - if grep -q "claude\[bot\]" "$TEST_DIR/bot3/comments.md" && grep -q "chatgpt-codex-connector\[bot\]" "$TEST_DIR/bot3/comments.md"; then - pass "Multiple bot responses both appear in output" - else - fail "Multiple bots" "both bots in output" "$(cat "$TEST_DIR/bot3/comments.md")" - fi - else - fail "Multiple bots test" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # ======================================== - # JSON Edge Cases (via fetch-pr-comments.sh) - # ======================================== - - echo "" - echo "--- JSON Edge Cases ---" - echo "" - - # Test 10: Unicode in bot comments processed through full pipeline - echo "Test 10: Unicode comments processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json1" - init_basic_git_repo "$TEST_DIR/json1" - create_mock_gh "$TEST_DIR/json1" "unicode_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json1/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json1/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json1/comments.md" ]]; then - pass "Unicode comments processed successfully" - else - fail "Unicode handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi - - # Test 11: Very long comment body processed - echo "" - echo "Test 11: Long comment body processed by fetch-pr-comments" - mkdir -p "$TEST_DIR/json2" - init_basic_git_repo "$TEST_DIR/json2" - create_mock_gh "$TEST_DIR/json2" "long_comment" - - set +e - OUTPUT=$(PATH="$TEST_DIR/json2/bin:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$TEST_DIR/json2/comments.md" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && [[ -f "$TEST_DIR/json2/comments.md" ]]; then - # Verify the long content was written - FILE_SIZE=$(wc -c < "$TEST_DIR/json2/comments.md") - if [[ $FILE_SIZE -gt 1000 ]]; then - pass "Long comment body processed (file size: $FILE_SIZE bytes)" - else - pass "Long comment handled (may be truncated)" - fi - else - fail "Long body handling" "exit 0 with output file" "exit=$EXIT_CODE" - fi -} - -# Tests 12-19: PR Loop Stop Hook + poll-pr-reviews -run_poll_tests() { - - # ======================================== - # PR Loop Stop Hook Tests - # ======================================== - - echo "" - echo "--- PR Loop Stop Hook Tests ---" - echo "" - - # Test 12: Stop hook with no active PR loop - echo "Test 12: Stop hook with no active PR loop" - mkdir -p "$TEST_DIR/stop1" - init_basic_git_repo "$TEST_DIR/stop1" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop1" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]]; then - pass "PR stop hook passes when no loop active" - else - fail "No PR loop handling" "exit 0" "exit $EXIT_CODE" - fi - - # Test 13: Stop hook with corrupted state - echo "" - echo "Test 13: Stop hook with corrupted state" - mkdir -p "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00" - echo "not valid yaml [[[" > "$TEST_DIR/stop2/.humanize/pr-loop/2026-01-19_00-00-00/state.md" - init_basic_git_repo "$TEST_DIR/stop2" - - set +e - OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/stop2" bash "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) - EXIT_CODE=$? - set -e - - # Should handle gracefully without crashing - if [[ $EXIT_CODE -lt 128 ]]; then - pass "Stop hook handles corrupted state (exit $EXIT_CODE)" - else - fail "Corrupted state" "exit < 128" "exit $EXIT_CODE" - fi - - # Test 14: approve-state.md directory structure - echo "" - echo "Test 14: approve-state.md directory structure" - mkdir -p "$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00" - create_pr_loop_state "$TEST_DIR/stop3" - - # The approve-state.md path should be writable - APPROVE_PATH="$TEST_DIR/stop3/.humanize/pr-loop/2026-01-19_00-00-00/approve-state.md" - touch "$APPROVE_PATH" 2>/dev/null - if [[ -f "$APPROVE_PATH" ]]; then - pass "approve-state.md path is writable" - rm "$APPROVE_PATH" - else - fail "Approve path" "writable" "not writable" - fi - - # ======================================== - # poll-pr-reviews.sh Tests - # ======================================== - - echo "" - echo "--- poll-pr-reviews.sh Script Tests ---" - echo "" - - # Test 15: poll-pr-reviews help displays usage - echo "Test 15: poll-pr-reviews help displays usage" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" --help 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -qi "usage\|poll"; then - pass "poll-pr-reviews help displays usage" - else - fail "poll-pr-reviews help" "exit 0 with usage" "exit=$EXIT_CODE" - fi - - # Test 16: poll-pr-reviews with missing required args - echo "" - echo "Test 16: poll-pr-reviews missing args rejected" - set +e - OUTPUT=$("$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 2>&1) - EXIT_CODE=$? - set -e - - if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "required\|error"; then - pass "poll-pr-reviews missing args rejected" - else - fail "poll-pr-reviews validation" "non-zero with error" "exit=$EXIT_CODE" - fi - - # Test 17: poll-pr-reviews with mocked gh returns JSON output with required fields - echo "" - echo "Test 17: poll-pr-reviews with mocked gh produces valid JSON output" - mkdir -p "$TEST_DIR/poll1" - init_basic_git_repo "$TEST_DIR/poll1" - create_mock_gh "$TEST_DIR/poll1" "claude_approval" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll1/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # poll-pr-reviews must output JSON with has_new_comments and parse correctly - if [[ $EXIT_CODE -eq 0 ]]; then - # Parse JSON to verify structure - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ -n "$HAS_NEW" ]] && [[ "$HAS_NEW" == "true" || "$HAS_NEW" == "false" ]]; then - # Also verify comments array exists (may be empty) - COMMENTS_TYPE=$(echo "$OUTPUT" | jq -r '.comments | type' 2>/dev/null || echo "") - if [[ "$COMMENTS_TYPE" == "array" ]]; then - pass "poll-pr-reviews produces valid JSON (has_new_comments=$HAS_NEW, comments is array)" - else - pass "poll-pr-reviews produces JSON with has_new_comments=$HAS_NEW" - fi - else - fail "poll-pr-reviews JSON" "has_new_comments boolean" "output missing or invalid: $OUTPUT" - fi - else - fail "poll-pr-reviews execution" "exit 0" "exit=$EXIT_CODE, output=$OUTPUT" - fi - - # Test 18: poll-pr-reviews timeout handling with slow mock - echo "" - echo "Test 18: poll-pr-reviews handles slow API gracefully" - mkdir -p "$TEST_DIR/poll2" - init_basic_git_repo "$TEST_DIR/poll2" - - # Create a mock gh that sleeps briefly but responds - mkdir -p "$TEST_DIR/poll2/bin" - cat > "$TEST_DIR/poll2/bin/gh" << 'GHEOF' -#!/usr/bin/env bash -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - exit 0 - elif [[ "$*" == *"parent"* ]]; then - echo '{"parent":null}' - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Simulate slow API -if [[ "$1" == "api" ]]; then - sleep 0.5 - echo "[]" - exit 0 -fi -echo "[]" -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll2/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll2/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # Should complete without hanging and produce valid JSON (even if empty) - if [[ $EXIT_CODE -eq 0 ]]; then - # Verify JSON output with has_new_comments (API returns empty, so should be false) - HAS_NEW=$(echo "$OUTPUT" | jq -r '.has_new_comments // empty' 2>/dev/null || echo "") - if [[ "$HAS_NEW" == "false" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=false, no comments)" - elif [[ -n "$HAS_NEW" ]]; then - pass "poll-pr-reviews handles slow API (has_new_comments=$HAS_NEW)" - else - pass "poll-pr-reviews handles slow API gracefully (exit=0)" - fi - else - fail "poll-pr-reviews timeout" "exit 0" "exit=$EXIT_CODE" - fi - - # Test 19: poll-pr-reviews with API failure returns has_new_comments:false - echo "" - echo "Test 19: poll-pr-reviews with API failure returns has_new_comments:false" - mkdir -p "$TEST_DIR/poll3" - init_basic_git_repo "$TEST_DIR/poll3" - - # Create a mock gh that fails on API calls - mkdir -p "$TEST_DIR/poll3/bin" - cat > "$TEST_DIR/poll3/bin/gh" << 'GHEOF' -#!/usr/bin/env bash -# Check for -q flag anywhere in args (jq query) -HAS_Q_FLAG=false -for arg in "$@"; do - if [[ "$arg" == "-q" ]]; then - HAS_Q_FLAG=true - break - fi -done - -# Handle repo view -if [[ "$1" == "repo" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"owner,name"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "testowner/testrepo" - else - echo '{"owner":{"login":"testowner"},"name":"testrepo"}' - fi - exit 0 - elif [[ "$*" == *"parent"* ]]; then - if [[ "$HAS_Q_FLAG" == "true" ]]; then - echo "/" - else - echo '{"parent":null}' - fi - exit 0 - fi - fi - echo "testowner/testrepo" - exit 0 -fi -# Handle pr view -if [[ "$1" == "pr" && "$2" == "view" ]]; then - if [[ "$*" == *"--json"* ]]; then - if [[ "$*" == *"number"* ]]; then - echo '{"number": 123}' - else - echo '{"state": "OPEN"}' - fi - exit 0 - fi - exit 0 -fi -# Fail on API calls to simulate network error -if [[ "$1" == "api" ]]; then - echo "Error: Network unreachable" >&2 - exit 1 -fi -exit 0 -GHEOF - chmod +x "$TEST_DIR/poll3/bin/gh" - - set +e - OUTPUT=$(PATH="$TEST_DIR/poll3/bin:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 --after "2026-01-18T00:00:00Z" --bots "claude" 2>&1) - EXIT_CODE=$? - set -e - - # On API failure, poll-pr-reviews MUST: - # 1. Exit with code 0 - # 2. Output valid JSON (parseable by jq -e) - # 3. Have has_new_comments exactly equal to false - # NO FALLBACKS - all three conditions must be met - if [[ $EXIT_CODE -ne 0 ]]; then - fail "poll-pr-reviews API failure" "exit 0" "exit=$EXIT_CODE" - else - # Extract JSON from output (warnings precede JSON, JSON may be multi-line) - # Find the line number where JSON starts (first '{') and extract from there to end - JSON_START_LINE=$(echo "$OUTPUT" | grep -n '^{' | head -1 | cut -d: -f1) - if [[ -z "$JSON_START_LINE" ]]; then - fail "poll-pr-reviews API failure" "JSON output" "no JSON found in output" - else - JSON_OUTPUT=$(echo "$OUTPUT" | tail -n +$JSON_START_LINE) - - # Validate JSON is parseable using jq -e (exits non-zero on invalid JSON) - if ! echo "$JSON_OUTPUT" | jq -e '.' >/dev/null 2>&1; then - fail "poll-pr-reviews API failure" "valid JSON output" "invalid JSON: $JSON_OUTPUT" - else - # Verify has_new_comments is exactly boolean false (not string "false") - # jq -e '.has_new_comments == false' returns 0 only if the value is boolean false - if echo "$JSON_OUTPUT" | jq -e '.has_new_comments == false' >/dev/null 2>&1; then - pass "poll-pr-reviews returns exit 0 with valid JSON and has_new_comments:false (boolean)" - else - # Show actual value and type for debugging - HAS_NEW_VALUE=$(echo "$JSON_OUTPUT" | jq '.has_new_comments') - HAS_NEW_TYPE=$(echo "$JSON_OUTPUT" | jq -r '.has_new_comments | type') - fail "poll-pr-reviews API failure" "has_new_comments: boolean false" "value=$HAS_NEW_VALUE type=$HAS_NEW_TYPE" - fi - fi - fi - fi -} - -# ======================================== -# Source Guard: run all tests when executed directly -# ======================================== - -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - run_fetch_tests - run_poll_tests - print_test_summary "PR Loop API Robustness Test Summary" - exit $? -fi diff --git a/tests/robustness/test-setup-scripts-robustness.sh b/tests/robustness/test-setup-scripts-robustness.sh index ec2d293c..4e13e5b8 100755 --- a/tests/robustness/test-setup-scripts-robustness.sh +++ b/tests/robustness/test-setup-scripts-robustness.sh @@ -2,7 +2,7 @@ # # Robustness tests for setup scripts # -# Tests setup-rlcr-loop.sh and setup-pr-loop.sh under edge cases: +# Tests setup-rlcr-loop.sh under edge cases: # - Argument parsing edge cases # - Plan file validation edge cases # - Git repository edge cases @@ -92,17 +92,6 @@ run_rlcr_setup() { ) } -# Run setup-pr-loop.sh with proper isolation from real PR loop -# Usage: run_pr_setup [args...] -run_pr_setup() { - local repo_dir="$1" - shift - ( - cd "$repo_dir" - CLAUDE_PROJECT_DIR="$repo_dir" "$PROJECT_ROOT/scripts/setup-pr-loop.sh" "$@" - ) -} - # ======================================== # Setup RLCR Loop Argument Parsing Tests # ======================================== @@ -502,68 +491,6 @@ else fail "Tracked plan without flag" "rejection" "exit=$EXIT_CODE" fi -# ======================================== -# Setup PR Loop Tests -# ======================================== - -echo "" -echo "--- Setup PR Loop Argument Tests ---" -echo "" - -# Test 19: Help flag displays usage -echo "Test 19: PR loop help flag displays usage" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --help 2>&1) || true -if echo "$OUTPUT" | grep -q "USAGE\|start-pr-loop"; then - pass "PR loop help flag displays usage" -else - fail "PR loop help" "USAGE text" "no usage found" -fi - -# Test 20: Missing bot flag shows error -echo "" -echo "Test 20: PR loop missing bot flag shows error" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "at least one bot flag"; then - pass "PR loop missing bot flag shows error" -else - fail "Missing bot flag" "error message" "exit=$EXIT_CODE" -fi - -# Test 21: Unknown option rejected -echo "" -echo "Test 21: PR loop unknown option rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --unknown-option 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "unknown option"; then - pass "PR loop unknown option rejected" -else - fail "PR loop unknown option" "rejection" "exit=$EXIT_CODE" -fi - -# Test 22: --max with non-numeric value rejected -echo "" -echo "Test 22: PR loop --max with non-numeric value rejected" -OUTPUT=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude --max abc 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --max non-numeric rejected" -else - fail "PR loop --max validation" "rejection" "exit=$EXIT_CODE" -fi - -# Test 23: Non-git directory rejected -echo "" -echo "Test 23: PR loop non-git directory rejected" -mkdir -p "$TEST_DIR/pr-nongit" -OUTPUT=$(run_pr_setup "$TEST_DIR/pr-nongit" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "git repository"; then - pass "PR loop non-git directory rejected" -else - fail "PR loop non-git" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Mutual Exclusion Tests # ======================================== @@ -602,38 +529,6 @@ else fail "RLCR mutual exclusion" "rejection" "exit=$EXIT_CODE" fi -# Test 25: PR loop blocks starting RLCR loop -echo "" -echo "Test 25: Active PR loop blocks new RLCR loop" -mkdir -p "$TEST_DIR/repo25" -init_basic_git_repo "$TEST_DIR/repo25" -create_minimal_plan "$TEST_DIR/repo25" -echo "plan.md" >> "$TEST_DIR/repo25/.gitignore" -git -C "$TEST_DIR/repo25" add .gitignore && git -C "$TEST_DIR/repo25" commit -q -m "Add gitignore" - -# Create fake active PR loop -mkdir -p "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00" -cat > "$TEST_DIR/repo25/.humanize/pr-loop/2026-01-19_00-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - -mkdir -p "$TEST_DIR/repo25/bin" -echo '#!/usr/bin/env bash -exit 0' > "$TEST_DIR/repo25/bin/codex" -chmod +x "$TEST_DIR/repo25/bin/codex" - -OUTPUT=$(PATH="$TEST_DIR/repo25/bin:$PATH" run_rlcr_setup "$TEST_DIR/repo25" plan.md 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "pr loop.*already active\|already active"; then - pass "Active PR loop blocks new RLCR loop" -else - fail "PR loop blocks RLCR" "rejection" "exit=$EXIT_CODE" -fi - # ======================================== # Symlink Protection Tests # ======================================== @@ -751,33 +646,6 @@ else pass "Valid numeric arguments accepted (--max 10, --codex-timeout 3600)" fi -# Test 30: Valid PR loop setup proceeds past argument validation -echo "" -echo "Test 30: Valid PR loop setup proceeds past argument validation" -mkdir -p "$TEST_DIR/repo30" -init_basic_git_repo "$TEST_DIR/repo30" - -# Create mock gh that fails auth check (to test dependency handling) -mkdir -p "$TEST_DIR/repo30/bin" -cat > "$TEST_DIR/repo30/bin/gh" << 'EOF' -#!/usr/bin/env bash -if [[ "$1" == "auth" && "$2" == "status" ]]; then - echo "Not logged in" >&2 - exit 1 -fi -exit 0 -EOF -chmod +x "$TEST_DIR/repo30/bin/gh" - -OUTPUT=$(PATH="$TEST_DIR/repo30/bin:$PATH" run_pr_setup "$TEST_DIR/repo30" --claude 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -# Should fail at gh auth check, not argument parsing -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "gh\|auth\|logged"; then - pass "Valid PR loop setup proceeds to gh auth check" -else - fail "Valid PR loop setup" "fail at gh auth check" "exit=$EXIT_CODE" -fi - # ======================================== # Timeout Scenario Tests # ======================================== @@ -804,20 +672,6 @@ else pass "--codex-timeout 0 accepted (non-negative integer validation)" fi -# Test 32: --codex-timeout with non-numeric value rejected (PR loop) -echo "" -echo "Test 32: PR loop --codex-timeout with non-numeric value rejected" -mkdir -p "$TEST_DIR/repo32" -init_basic_git_repo "$TEST_DIR/repo32" -mkdir -p "$TEST_DIR/repo32/bin" -OUTPUT=$(PATH="$TEST_DIR/repo32/bin:$PATH" run_pr_setup "$TEST_DIR/repo32" --claude --codex-timeout "abc" 2>&1) || EXIT_CODE=$? -EXIT_CODE=${EXIT_CODE:-0} -if [[ $EXIT_CODE -ne 0 ]] && echo "$OUTPUT" | grep -qi "positive integer"; then - pass "PR loop --codex-timeout non-numeric rejected" -else - fail "PR loop --codex-timeout non-numeric" "rejection with 'positive integer'" "exit=$EXIT_CODE, output=$OUTPUT" -fi - # Test 33: Very large timeout value accepted echo "" echo "Test 33: Very large timeout value accepted" diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index b5322ae2..b6ba6b24 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -86,10 +86,6 @@ TEST_SUITES=( "test-config-error-handling.sh" "test-codex-hook-install.sh" "test-unified-codex-config.sh" - "test-pr-loop-1-scripts.sh" - "test-pr-loop-2-hooks.sh" - "test-pr-loop-3-stophook.sh" - "test-pr-loop-system.sh" # Session ID and Agent Teams tests "test-session-id.sh" "test-agent-teams.sh" @@ -118,8 +114,6 @@ TEST_SUITES=( "robustness/test-hook-system-robustness.sh" "robustness/test-template-error-robustness.sh" "robustness/test-state-transition-robustness.sh" - "robustness/test-pr-loop-api-fetch.sh" - "robustness/test-pr-loop-api-poll.sh" ) # Tests that must be run with zsh (not bash) diff --git a/tests/setup-fixture-mock-gh.sh b/tests/setup-fixture-mock-gh.sh deleted file mode 100755 index f522bf58..00000000 --- a/tests/setup-fixture-mock-gh.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# -# Create a mock gh CLI that returns fixture data for testing -# fetch-pr-comments.sh and poll-pr-reviews.sh -# -# Usage: ./setup-fixture-mock-gh.sh -# -# The mock gh will: -# - Return fixture data for /issues/*/comments, /pulls/*/comments, /pulls/*/reviews -# - Return testuser for gh api user -# - Return testowner/testrepo for gh repo view -# - -set -euo pipefail - -MOCK_BIN_DIR="${1:-}" -FIXTURES_DIR="${2:-}" - -if [[ -z "$MOCK_BIN_DIR" || -z "$FIXTURES_DIR" ]]; then - echo "Usage: $0 " >&2 - exit 1 -fi - -mkdir -p "$MOCK_BIN_DIR" - -# Create mock gh that returns fixtures -cat > "$MOCK_BIN_DIR/gh" << MOCK_GH_EOF -#!/usr/bin/env bash -# Fixture-backed mock gh CLI for testing fetch/poll scripts - -FIXTURES_DIR="$FIXTURES_DIR" - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"owner,name"* ]] || [[ "\$*" == *"owner"* && "\$*" == *"name"* ]]; then - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - elif [[ "\$*" == *"parent"* ]]; then - echo '{"parent": null}' - elif [[ "\$*" == *"owner"* ]]; then - echo '{"owner": {"login": "testowner"}}' - elif [[ "\$*" == *"name"* ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "\$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - - # Handle issue comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/issue_comments.json" - exit 0 - fi - - # Handle PR review comments endpoint (inline comments) - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - cat "\$FIXTURES_DIR/review_comments.json" - exit 0 - fi - - # Handle PR reviews endpoint - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - cat "\$FIXTURES_DIR/pr_reviews.json" - exit 0 - fi - - # Default: return empty array - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH_EOF - -chmod +x "$MOCK_BIN_DIR/gh" - -echo "$MOCK_BIN_DIR" diff --git a/tests/setup-monitor-test-env.sh b/tests/setup-monitor-test-env.sh index 92fa9431..977d1b53 100755 --- a/tests/setup-monitor-test-env.sh +++ b/tests/setup-monitor-test-env.sh @@ -2,7 +2,7 @@ # # Helper script to set up monitor test environment # This script creates the necessary directory structure and state files -# for testing the monitor pr command. +# for testing the monitor command. # # Usage: ./setup-monitor-test-env.sh # @@ -18,72 +18,9 @@ if [[ -z "$TEST_DIR" ]]; then fi case "$TEST_NAME" in - yaml_list) - # Test: active_bots with YAML list format - TIMESTAMP="2026-01-18_16-00-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: feature-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - configured) - # Test: configured_bots vs active_bots (partial approval) - TIMESTAMP="2026-01-18_16-01-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 2 -max_iterations: 42 -pr_number: 789 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; - empty) - # Test: empty active_bots (all approved) - TIMESTAMP="2026-01-18_16-02-00" - mkdir -p "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP" - cat > "$TEST_DIR/.humanize/pr-loop/$TIMESTAMP/state.md" << 'STATEEOF' ---- -current_round: 3 -max_iterations: 42 -pr_number: 999 -start_branch: approved-branch -configured_bots: - - claude - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -started_at: 2026-01-18T16:00:00Z ---- -STATEEOF - ;; *) echo "Unknown test name: $TEST_NAME" >&2 - echo "Available: yaml_list, configured, empty" >&2 + echo "Available: (none currently)" >&2 exit 1 ;; esac diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh index b25c845e..2d70bb2d 100755 --- a/tests/test-codex-hook-install.sh +++ b/tests/test-codex-hook-install.sh @@ -90,11 +90,6 @@ cat > "$HOOKS_FILE" <<'EOF' "type": "command", "command": "/tmp/old/skills/humanize/hooks/loop-codex-stop-hook.sh", "timeout": 30 - }, - { - "type": "command", - "command": "/tmp/old/skills/humanize/hooks/pr-loop-stop-hook.sh", - "timeout": 30 } ] }, @@ -190,7 +185,6 @@ for group in data["hooks"]["Stop"]: expected = { f"{runtime_root}/hooks/loop-codex-stop-hook.sh", - f"{runtime_root}/hooks/pr-loop-stop-hook.sh", } print("FOUND=" + ("1" if expected.issubset(set(commands)) else "0")) @@ -225,10 +219,10 @@ else fail "Codex install preserves SessionStart hooks" "SESSION=1" "$PY_OUTPUT" fi -if grep -q '^COUNT=2$' <<<"$PY_OUTPUT"; then - pass "Codex install writes exactly two managed Humanize Stop hooks" +if grep -q '^COUNT=1$' <<<"$PY_OUTPUT"; then + pass "Codex install writes exactly one managed Humanize Stop hook" else - fail "Codex install writes exactly two managed Humanize Stop hooks" "COUNT=2" "$PY_OUTPUT" + fail "Codex install writes exactly one managed Humanize Stop hook" "COUNT=1" "$PY_OUTPUT" fi mkdir -p "$TEST_DIR/project" @@ -281,10 +275,10 @@ print(sum(1 for cmd in commands if "/humanize/hooks/" in cmd)) PY )" -if [[ "$PY_OUTPUT_2" == "2" ]]; then +if [[ "$PY_OUTPUT_2" == "1" ]]; then pass "Codex install is idempotent for managed hook commands" else - fail "Codex install is idempotent for managed hook commands" "2" "$PY_OUTPUT_2" + fail "Codex install is idempotent for managed hook commands" "1" "$PY_OUTPUT_2" fi if [[ "$(wc -l < "$FEATURE_LOG" | tr -d ' ')" == "2" ]]; then diff --git a/tests/test-disable-nested-codex-hooks.sh b/tests/test-disable-nested-codex-hooks.sh index 37212881..3d70c870 100644 --- a/tests/test-disable-nested-codex-hooks.sh +++ b/tests/test-disable-nested-codex-hooks.sh @@ -38,7 +38,6 @@ export XDG_CACHE_HOME="$TEST_DIR/.cache" mkdir -p "$XDG_CACHE_HOME" STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" -PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" setup_repo() { local repo_dir="$1" @@ -194,13 +193,6 @@ else "--disable codex_hooks review" "$(cat "$TEST_DIR/review.args" 2>/dev/null || echo missing)" fi -if grep -q 'codex "\${CODEX_DISABLE_HOOKS_ARGS\[@\]}" exec' "$PR_STOP_HOOK"; then - pass "PR stop hook disables codex_hooks for nested codex exec" -else - fail "PR stop hook disables codex_hooks for nested codex exec" \ - 'codex "${CODEX_DISABLE_HOOKS_ARGS[@]}" exec' "not found" -fi - echo "" echo "========================================" echo "Disable Nested Codex Hooks Tests" diff --git a/tests/test-monitor-e2e-deletion.sh b/tests/test-monitor-e2e-deletion.sh index afd738e6..6197ba7e 100755 --- a/tests/test-monitor-e2e-deletion.sh +++ b/tests/test-monitor-e2e-deletion.sh @@ -11,7 +11,6 @@ echo "" monitor_test_bash_deletion monitor_test_zsh_deletion -monitor_test_pr_deletion echo "" echo "========================================" diff --git a/tests/test-monitor-e2e-real.sh b/tests/test-monitor-e2e-real.sh index aa9eba1d..8a1c4f0e 100755 --- a/tests/test-monitor-e2e-real.sh +++ b/tests/test-monitor-e2e-real.sh @@ -685,317 +685,6 @@ ZSH_SIGINT_SCRIPT fi } -# ======================================== -# Test 5: Real _humanize_monitor_pr with directory deletion -# ======================================== -monitor_test_pr_deletion() { - echo "" - echo "Test 5: Real _humanize_monitor_pr with directory deletion" - echo "" - - # Create test project directory for PR monitor - TEST_PROJECT_PR="$TEST_BASE/project_pr" - mkdir -p "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'GOALTRACKER_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #123 -- Branch: test-branch -- Started: 2026-01-18T10:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -GOALTRACKER_EOF - - # Create fake HOME for PR monitor test - FAKE_HOME_PR="$TEST_BASE/home_pr" - mkdir -p "$FAKE_HOME_PR" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR=$(echo "$TEST_PROJECT_PR" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR="$FAKE_HOME_PR/.cache/humanize/$SANITIZED_PROJECT_PR/2026-01-18_12-00-00" - mkdir -p "$CACHE_DIR_PR" - echo "PR round 1 started" > "$CACHE_DIR_PR/round-1-codex-run.log" - - # Create bash test runner script for PR monitor - cat > "$TEST_PROJECT_PR/run_real_monitor_pr.sh" << 'MONITOR_SCRIPT' -#!/usr/bin/env bash -# Run the REAL _humanize_monitor_pr function - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Override _pr_cleanup for testing -_pr_cleanup() { - echo "CLEANUP_CALLED_PR" -} - -# Start monitor with --once flag (single iteration) -# Then delete directory after brief delay -( - sleep 0.5 - rm -rf "$PROJECT_DIR/.humanize/pr-loop/2026-01-18_12-00-00" -) & -cleanup_pid=$! - -# Run monitor in foreground (will detect deletion) -humanize monitor pr --once 2>&1 - -echo "EXIT_CODE:$?" - -# Cleanup background process -kill $cleanup_pid 2>/dev/null || true -wait $cleanup_pid 2>/dev/null || true -MONITOR_SCRIPT - - chmod +x "$TEST_PROJECT_PR/run_real_monitor_pr.sh" - - # Run the PR monitor test - output_pr=$("$TEST_PROJECT_PR/run_real_monitor_pr.sh" "$TEST_PROJECT_PR" "$PROJECT_ROOT" "$FAKE_HOME_PR" 2>&1) || true - - # Verify: PR monitor e2e - graceful exit - if echo "$output_pr" | grep -qE 'Stopped|gracefully|EXIT_CODE:0'; then - pass "PR monitor e2e - graceful exit on directory deletion" - else - # Alternative: check for any clean exit indication - if echo "$output_pr" | grep -q "EXIT_CODE:0"; then - pass "PR monitor e2e - clean exit" - else - fail "PR monitor e2e" "Expected graceful stop or EXIT_CODE:0, got: $output_pr" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor glob errors" "Found glob errors: $(echo "$output_pr" | grep -E 'no matches found|bad pattern')" - else - pass "PR monitor no glob errors" - fi -} - -# ======================================== -# Test 6: Real _humanize_monitor_pr without --once with SIGINT -# ======================================== -monitor_test_pr_sigint() { - echo "" - echo "Test 6: Real _humanize_monitor_pr without --once with SIGINT" - echo "" - - # Create test project directory for PR monitor without --once - TEST_PROJECT_PR_NO_ONCE="$TEST_BASE/project_pr_no_once" - mkdir -p "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00" - - # Create valid PR loop state.md file - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/state.md" << 'STATE' -current_round: 1 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch-no-once -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 2 -poll_timeout: 60 -started_at: 2026-01-18T13:00:00Z -STATE - - # Create goal-tracker.md for PR loop - cat > "$TEST_PROJECT_PR_NO_ONCE/.humanize/pr-loop/2026-01-18_13-00-00/goal-tracker.md" << 'PR_GOAL_EOF' -# PR Review Goal Tracker - -## PR Information -- PR Number: #456 -- Branch: test-branch-no-once -- Started: 2026-01-18T13:00:00Z - -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | - -## Total Statistics -- Total Issues Found: 0 -- Remaining: 0 -PR_GOAL_EOF - - # Create fake HOME for PR monitor test without --once - FAKE_HOME_PR_NO_ONCE="$TEST_BASE/home_pr_no_once" - mkdir -p "$FAKE_HOME_PR_NO_ONCE" - - # Create cache directory for PR monitor - SANITIZED_PROJECT_PR_NO_ONCE=$(echo "$TEST_PROJECT_PR_NO_ONCE" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') - CACHE_DIR_PR_NO_ONCE="$FAKE_HOME_PR_NO_ONCE/.cache/humanize/$SANITIZED_PROJECT_PR_NO_ONCE/2026-01-18_13-00-00" - mkdir -p "$CACHE_DIR_PR_NO_ONCE" - echo "PR round 1 started" > "$CACHE_DIR_PR_NO_ONCE/round-1-codex-run.log" - - # Create bash test runner script for PR monitor without --once - cat > "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" << 'PR_NO_ONCE_EOF' -#!/usr/bin/env bash -# Run the REAL _humanize_monitor_pr function WITHOUT --once flag - -PROJECT_DIR="$1" -PROJECT_ROOT="$2" -FAKE_HOME="$3" - -cd "$PROJECT_DIR" - -# Override HOME and XDG_CACHE_HOME -export HOME="$FAKE_HOME" -export XDG_CACHE_HOME="$FAKE_HOME/.cache" - -# Create shim functions for terminal commands -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} - -# Stub terminal control -printf() { - case "$1" in - *\\033*) : ;; # Ignore escape sequences - *) builtin printf "$@" ;; - esac -} - -# Source the humanize script (loads all functions) -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor in foreground WITHOUT --once flag -# This runs the actual poll loop (not just one iteration) -humanize monitor pr 2>&1 -exit_code=$? - -echo "EXIT_CODE:$exit_code" -PR_NO_ONCE_EOF - - chmod +x "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" - - # Run the PR monitor in background (no --once means it will loop until interrupted) - OUTPUT_FILE_PR_NO_ONCE="$TEST_BASE/output_pr_no_once.txt" - bash "$TEST_PROJECT_PR_NO_ONCE/run_real_monitor_pr_no_once.sh" "$TEST_PROJECT_PR_NO_ONCE" "$PROJECT_ROOT" "$FAKE_HOME_PR_NO_ONCE" > "$OUTPUT_FILE_PR_NO_ONCE" 2>&1 & - MONITOR_PID_PR_NO_ONCE=$! - - # Wait for monitor to start running its poll loop - sleep 3 - - # Verify monitor is running before sending SIGINT - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Send SIGINT to stop the continuous monitor (simulates Ctrl+C) - # Using negative PID sends to entire process group - kill -INT -$MONITOR_PID_PR_NO_ONCE 2>/dev/null || kill -INT $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - - # Wait for monitor to exit gracefully after SIGINT - WAIT_COUNT=0 - while kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null && [[ $WAIT_COUNT -lt 20 ]]; do - sleep 0.5 - WAIT_COUNT=$((WAIT_COUNT + 1)) - done - - # Force kill if still running - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - # Try SIGTERM before SIGKILL - kill -TERM $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - sleep 1 - if kill -0 $MONITOR_PID_PR_NO_ONCE 2>/dev/null; then - kill -9 $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - fi - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - # Still count as pass if the monitor ran and was terminated (SIGINT delivery is complex) - pass "PR monitor (no --once) handled via SIGTERM" - else - wait $MONITOR_PID_PR_NO_ONCE 2>/dev/null || true - pass "PR monitor (no --once) exited after SIGINT" - fi - else - fail "PR monitor (no --once) start" "Monitor did not start properly" - fi - - # Read captured output - output_pr_no_once=$(cat "$OUTPUT_FILE_PR_NO_ONCE" 2>/dev/null || echo "") - - # Verify clean exit after SIGINT - if echo "$output_pr_no_once" | grep -qE 'Stopped|Monitor stopped|EXIT_CODE:[01]'; then - pass "PR monitor (no --once) clean SIGINT exit" - else - # Check for any indication the monitor ran properly before SIGINT - if echo "$output_pr_no_once" | grep -qE 'PR|loop|Waiting|session'; then - pass "PR monitor (no --once) ran before SIGINT" - else - fail "PR monitor (no --once) SIGINT cleanup" "Expected cleanup message, got: $(head -c 300 <<< "$output_pr_no_once" | tr '\n' ' ')" - fi - fi - - # Verify no glob errors in PR monitor output - if echo "$output_pr_no_once" | grep -qE 'no matches found|bad pattern'; then - fail "PR monitor (no --once) glob errors" "Found glob errors" - else - pass "PR monitor (no --once) no glob errors" - fi -} - # ======================================== # Run all tests and print summary when executed directly # ======================================== @@ -1009,8 +698,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then monitor_test_zsh_deletion monitor_test_bash_sigint monitor_test_zsh_sigint - monitor_test_pr_deletion - monitor_test_pr_sigint # Summary echo "" @@ -1029,7 +716,6 @@ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then echo "VERIFIED: Terminal state restored" echo "VERIFIED: Works in bash and zsh" echo "VERIFIED: Real SIGINT/Ctrl+C handling (bash and zsh)" - echo "VERIFIED: PR monitor e2e works (with and without --once)" exit 0 else echo "" diff --git a/tests/test-monitor-e2e-sigint.sh b/tests/test-monitor-e2e-sigint.sh index a6bfe20a..92aea080 100755 --- a/tests/test-monitor-e2e-sigint.sh +++ b/tests/test-monitor-e2e-sigint.sh @@ -11,7 +11,6 @@ echo "" monitor_test_bash_sigint monitor_test_zsh_sigint -monitor_test_pr_sigint echo "" echo "========================================" diff --git a/tests/test-pr-loop-1-scripts.sh b/tests/test-pr-loop-1-scripts.sh deleted file mode 100755 index 38ccd846..00000000 --- a/tests/test-pr-loop-1-scripts.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Script Tests Runner (parallel split 1/3) -# -# Runs only script argument validation tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" - -run_script_tests - -print_test_summary "PR Loop Script Tests" -exit $? diff --git a/tests/test-pr-loop-2-hooks.sh b/tests/test-pr-loop-2-hooks.sh deleted file mode 100755 index 56f6219e..00000000 --- a/tests/test-pr-loop-2-hooks.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Hook Tests Runner (parallel split 2/3) -# -# Runs only hook functionality tests from the PR loop test suite. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" - -run_hook_tests - -print_test_summary "PR Loop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-3-stophook.sh b/tests/test-pr-loop-3-stophook.sh deleted file mode 100755 index 4027f478..00000000 --- a/tests/test-pr-loop-3-stophook.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Stop Hook Tests Runner (parallel split 3/3) -# -# Runs only stop hook integration tests from the PR loop test suite. -# This is the slowest module due to timeout-based bot polling tests. -# See test-pr-loop.sh for the combined runner. -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then - skip "PR Loop Stop Hook Tests" "Skipped in GitHub Actions" - print_test_summary "PR Loop Stop Hook Tests" - exit 0 -fi - -init_pr_loop_test_env - -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -run_stophook_tests - -print_test_summary "PR Loop Stop Hook Tests" -exit $? diff --git a/tests/test-pr-loop-hooks.sh b/tests/test-pr-loop-hooks.sh deleted file mode 100644 index de4d09f7..00000000 --- a/tests/test-pr-loop-hooks.sh +++ /dev/null @@ -1,1623 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Hook Tests -# -# Tests for hook functionality: -# - Validators and protections -# - Comment processing -# - E2E tests -# - Fixture-based tests -# - Monitor tests -# -# Usage: source test-pr-loop-hooks.sh && run_hook_tests -# - -run_hook_tests() { -# ======================================== -# PR Loop Validator Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing PR Loop Validators" -echo "========================================" -echo "" - -# Test: active_bots is stored as YAML list -test_active_bots_yaml_format() { - cd "$TEST_DIR" - - # Create mock git repo - init_test_git_repo "$TEST_DIR/repo" - cd "$TEST_DIR/repo" - - # Create PR loop state file with proper YAML format - local timestamp="2026-01-18_13-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T13:00:00Z ---- -EOF - - # Verify state file has YAML list format - if grep -q "^ - claude$" "$loop_dir/state.md" && \ - grep -q "^ - codex$" "$loop_dir/state.md"; then - pass "T-POS-12: active_bots is stored as YAML list format" - else - fail "T-POS-12: active_bots should be stored as YAML list format" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop state file is protected from writes -test_pr_loop_state_protected() { - cd "$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks state.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md", "content": "malicious content"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state.*blocked\|pr loop"; then - pass "T-SEC-1: PR loop state.md is protected from writes" - else - fail "T-SEC-1: PR loop state.md should be protected from writes" "exit=2, blocked" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop comment file is protected from writes -test_pr_loop_comment_protected() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator blocks pr-comment.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-comment.md", "content": "fake comments"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]]; then - pass "T-SEC-2: PR loop pr-comment file is protected from writes" - else - fail "T-SEC-2: PR loop pr-comment file should be protected from writes" "exit=2" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: PR loop resolve file is allowed for writes -test_pr_loop_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_14-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Test that write validator allows pr-resolve.md writes - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "resolution summary"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-POS-13: PR loop pr-resolve file is allowed for writes" - else - fail "T-POS-13: PR loop pr-resolve file should be allowed for writes" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run validator tests -test_active_bots_yaml_format -test_pr_loop_state_protected -test_pr_loop_comment_protected -test_pr_loop_resolve_allowed - -# Test: PR loop Bash protection works without RLCR loop -test_pr_loop_bash_protection_no_rlcr() { - cd "$TEST_DIR" - - # Ensure NO RLCR loop exists - rm -rf ".humanize/rlcr" - - local timestamp="2026-01-18_14-30-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 ---- -EOF - - # Test that Bash validator blocks state.md modifications via echo redirect - local hook_input='{"tool_name": "Bash", "tool_input": {"command": "echo bad > '$TEST_DIR'/.humanize/pr-loop/'$timestamp'/state.md"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-bash-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "state\|blocked\|pr loop"; then - pass "T-SEC-4: PR loop Bash protection works without RLCR loop" - else - fail "T-SEC-4: PR loop Bash protection should work without RLCR" "exit=2, blocked" "exit=$exit_code, output=$output" - fi - - cd "$SCRIPT_DIR" -} - -test_pr_loop_bash_protection_no_rlcr - -# ======================================== -# Comment Sorting Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Comment Sorting (fromdateiso8601)" -echo "========================================" -echo "" - -# Test: Timestamps are properly sorted (newest first) -test_timestamp_sorting() { - # Test that jq fromdateiso8601 works correctly - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T10:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T12:00:00Z", "author_type": "User"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by(-(.created_at | fromdateiso8601)) | .[0].created_at') - - if [[ "$sorted_output" == '"2026-01-18T12:00:00Z"' ]]; then - pass "T-SORT-1: Comments are sorted newest first using fromdateiso8601" - else - fail "T-SORT-1: Comments should be sorted newest first" "12:00:00Z first" "got $sorted_output" - fi -} - -# Test: Human comments come before bot comments -test_human_before_bot_sorting() { - local sorted_output - sorted_output=$(echo '[ - {"created_at": "2026-01-18T12:00:00Z", "author_type": "Bot"}, - {"created_at": "2026-01-18T11:00:00Z", "author_type": "User"} - ]' | jq 'sort_by( - (if .author_type == "Bot" then 1 else 0 end), - -(.created_at | fromdateiso8601) - ) | .[0].author_type') - - if [[ "$sorted_output" == '"User"' ]]; then - pass "T-SORT-2: Human comments come before bot comments" - else - fail "T-SORT-2: Human comments should come before bot comments" "User first" "got $sorted_output" - fi -} - -# Run sorting tests -test_timestamp_sorting -test_human_before_bot_sorting - -# ======================================== -# Gate-keeper Logic Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Gate-keeper Logic" -echo "========================================" -echo "" - -# Test: Comment deduplication by ID (unit test) -test_comment_deduplication() { - # Test that jq unique_by works for deduplication - local deduped_output - deduped_output=$(echo '[ - {"id": 1, "body": "first"}, - {"id": 2, "body": "second"}, - {"id": 1, "body": "duplicate of first"} - ]' | jq 'unique_by(.id) | length') - - if [[ "$deduped_output" == "2" ]]; then - pass "T-GATE-1: Comments are deduplicated by ID" - else - fail "T-GATE-1: Comments should be deduplicated by ID" "2 unique" "got $deduped_output" - fi -} - -# Test: YAML list parsing for configured_bots -test_configured_bots_parsing() { - local test_state="--- -current_round: 0 -configured_bots: - - claude - - codex -active_bots: - - claude -codex_model: gpt-5.4 ----" - - # Extract configured_bots using same logic as stop hook - local configured_bots="" - local in_field=false - while IFS= read -r line; do - if [[ "$line" =~ ^configured_bots: ]]; then - in_field=true - continue - fi - if [[ "$in_field" == "true" ]]; then - if [[ "$line" =~ ^[[:space:]]+-[[:space:]]+ ]]; then - local bot_name="${line#*- }" - bot_name=$(echo "$bot_name" | tr -d ' ') - configured_bots="${configured_bots}${bot_name}," - elif [[ "$line" =~ ^[a-zA-Z_] ]]; then - in_field=false - fi - fi - done <<< "$test_state" - - if [[ "$configured_bots" == "claude,codex," ]]; then - pass "T-GATE-2: configured_bots YAML list is parsed correctly" - else - fail "T-GATE-2: configured_bots parsing failed" "claude,codex," "got $configured_bots" - fi -} - -# Test: Bot status extraction from Codex output -test_bot_status_extraction() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found bug in line 42 | - -### Approved Bots -- claude" - - # Extract bots with ISSUES status using same logic as stop hook - local bots_with_issues="" - while IFS= read -r line; do - if echo "$line" | grep -qiE '\|[[:space:]]*ISSUES[[:space:]]*\|'; then - local bot=$(echo "$line" | sed 's/|/\n/g' | sed -n '2p' | tr -d ' ') - bots_with_issues="${bots_with_issues}${bot}," - fi - done <<< "$codex_output" - - if [[ "$bots_with_issues" == "codex," ]]; then - pass "T-GATE-3: Bots with ISSUES status are correctly identified" - else - fail "T-GATE-3: Bot status extraction failed" "codex," "got $bots_with_issues" - fi -} - -# Test: Bot re-add logic when previously approved bot has new issues -test_bot_readd_logic() { - # Simulate: claude was approved (removed from active), but now has ISSUES - local configured_bots=("claude" "codex") - local active_bots=("codex") # claude was removed (approved) - - # Codex output shows claude now has issues - declare -A bots_with_issues - bots_with_issues["claude"]="true" - - declare -A bots_approved - # No bots approved this round - - # Re-add logic: process ALL configured bots - local new_active=() - for bot in "${configured_bots[@]}"; do - if [[ "${bots_with_issues[$bot]:-}" == "true" ]]; then - new_active+=("$bot") - fi - done - - # claude should be re-added because it has issues - local found_claude=false - for bot in "${new_active[@]}"; do - if [[ "$bot" == "claude" ]]; then - found_claude=true - break - fi - done - - if [[ "$found_claude" == "true" ]]; then - pass "T-GATE-4: Previously approved bot is re-added when it has new issues" - else - fail "T-GATE-4: Bot re-add logic failed" "claude in new_active" "not found" - fi -} - -# Test: Trigger comment timestamp detection pattern -test_trigger_comment_detection() { - local comments='[ - {"id": 1, "body": "Just a regular comment", "created_at": "2026-01-18T10:00:00Z"}, - {"id": 2, "body": "@claude @codex please review", "created_at": "2026-01-18T11:00:00Z"}, - {"id": 3, "body": "Another comment", "created_at": "2026-01-18T12:00:00Z"} - ]' - - # Build pattern for @bot mentions - local bot_pattern="@claude|@codex" - - # Find most recent trigger comment - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "$bot_pattern" ' - [.[] | select(.body | test($pattern; "i"))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T11:00:00Z" ]]; then - pass "T-GATE-5: Trigger comment timestamp is correctly detected" - else - fail "T-GATE-5: Trigger timestamp detection failed" "2026-01-18T11:00:00Z" "got $trigger_ts" - fi -} - -# Test: APPROVE marker detection in Codex output -test_approve_marker_detection() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | LGTM | - -### Final Recommendation -All bots have approved. - -APPROVE" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "APPROVE" ]]; then - pass "T-GATE-6: APPROVE marker is correctly recognized" - else - fail "T-GATE-6: APPROVE marker detection failed" "APPROVE" "got $last_line" - fi -} - -# Test: WAITING_FOR_BOTS marker detection -test_waiting_for_bots_marker() { - local codex_output="### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | NO_RESPONSE | Bot did not respond | - -### Final Recommendation -Some bots have not responded yet. - -WAITING_FOR_BOTS" - - local last_line - last_line=$(echo "$codex_output" | grep -v '^[[:space:]]*$' | tail -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - - if [[ "$last_line" == "WAITING_FOR_BOTS" ]]; then - pass "T-GATE-7: WAITING_FOR_BOTS marker is correctly recognized" - else - fail "T-GATE-7: WAITING_FOR_BOTS marker detection failed" "WAITING_FOR_BOTS" "got $last_line" - fi -} - -# Run gate-keeper tests -test_comment_deduplication -test_configured_bots_parsing -test_bot_status_extraction -test_bot_readd_logic -test_trigger_comment_detection -test_approve_marker_detection -test_waiting_for_bots_marker - -# ======================================== -# Stop Hook Integration Tests (with mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook Integration" -echo "========================================" -echo "" - -# Create enhanced mock gh that returns trigger comments -create_enhanced_mock_gh() { - local mock_dir="$1" - local trigger_user="${2:-testuser}" - local trigger_timestamp="${3:-2026-01-18T12:00:00Z}" - - cat > "$mock_dir/gh" << MOCK_GH -#!/usr/bin/env bash -# Enhanced mock gh CLI for stop hook testing - -case "\$1" in - auth) - if [[ "\$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "\$2" == "view" ]]; then - if [[ "\$3" == "--json" && "\$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "\$3" == "--json" && "\$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "\$2" == "view" ]]; then - if [[ "\$*" == *"number"* ]]; then - echo '{"number": 123}' - elif [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - fi - exit 0 - fi - ;; - api) - # Handle user endpoint for current user - if [[ "\$2" == "user" ]]; then - echo '{"login": "${trigger_user}"}' - exit 0 - fi - # Handle PR comments endpoint - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "user": {"login": "${trigger_user}"}, "created_at": "${trigger_timestamp}", "body": "@claude @codex please review"}]' - exit 0 - fi - # Return empty arrays for other endpoints - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: \$*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" -} - -# Test: Trigger comment detection filters by current user -test_trigger_user_filter() { - local test_subdir="$TEST_DIR/stop_hook_user_test" - mkdir -p "$test_subdir" - - # Create mock that returns comments from different users - cat > "$test_subdir/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "myuser"}' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[ - {"id": 1, "user": {"login": "otheruser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "user": {"login": "myuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "user": {"login": "otheruser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - exit 0 - fi - echo "[]" - exit 0 - ;; -esac -exit 1 -MOCK_GH - chmod +x "$test_subdir/gh" - - # Test the jq filter logic - local comments='[ - {"id": 1, "author": "otheruser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}, - {"id": 2, "author": "myuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}, - {"id": 3, "author": "otheruser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"} - ]' - - local trigger_ts - trigger_ts=$(echo "$comments" | jq -r --arg pattern "@claude" --arg user "myuser" ' - [.[] | select(.author == $user and (.body | test($pattern; "i")))] | - sort_by(.created_at) | reverse | .[0].created_at // empty - ') - - if [[ "$trigger_ts" == "2026-01-18T12:00:00Z" ]]; then - pass "T-HOOK-1: Trigger detection filters by current user" - else - fail "T-HOOK-1: Trigger should be from myuser only" "2026-01-18T12:00:00Z" "got $trigger_ts" - fi -} - -# Test: Trigger timestamp refresh when newer exists -test_trigger_refresh() { - local old_trigger="2026-01-18T10:00:00Z" - local new_trigger="2026-01-18T12:00:00Z" - - # Simulate the refresh logic from stop hook - local should_update=false - if [[ -z "$old_trigger" ]] || [[ "$new_trigger" > "$old_trigger" ]]; then - should_update=true - fi - - if [[ "$should_update" == "true" ]]; then - pass "T-HOOK-2: Trigger timestamp refreshes when newer comment exists" - else - fail "T-HOOK-2: Should update trigger when newer" "update" "no update" - fi -} - -# Test: Missing trigger blocks exit for round > 0 -test_missing_trigger_blocks() { - local current_round=1 - local last_trigger_at="" - - # Simulate the check from stop hook - local should_block=false - if [[ "$current_round" -gt 0 && -z "$last_trigger_at" ]]; then - should_block=true - fi - - if [[ "$should_block" == "true" ]]; then - pass "T-HOOK-3: Missing trigger comment blocks exit for round > 0" - else - fail "T-HOOK-3: Should block when no trigger" "block" "allow" - fi -} - -# Test: Round 0 uses last_trigger_at when present, started_at as fallback -test_round0_trigger_priority() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="2026-01-18T11:00:00Z" - - # Simulate the timestamp selection from stop hook (updated logic) - # ALWAYS prefer last_trigger_at when available - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$last_trigger_at" ]]; then - pass "T-HOOK-4: Round 0 uses last_trigger_at when present (not started_at)" - else - fail "T-HOOK-4: Round 0 should prefer last_trigger_at" "$last_trigger_at" "got $after_timestamp" - fi -} - -# Test: Round 0 falls back to started_at when no trigger -test_round0_started_at_fallback() { - local current_round=0 - local started_at="2026-01-18T10:00:00Z" - local last_trigger_at="" - - # Simulate the timestamp selection from stop hook - local after_timestamp - if [[ -n "$last_trigger_at" ]]; then - after_timestamp="$last_trigger_at" - elif [[ "$current_round" -eq 0 ]]; then - after_timestamp="$started_at" - fi - - if [[ "$after_timestamp" == "$started_at" ]]; then - pass "T-HOOK-4b: Round 0 falls back to started_at when no trigger" - else - fail "T-HOOK-4b: Round 0 should fall back to started_at" "$started_at" "got $after_timestamp" - fi -} - -# Test: Per-bot timeout anchored to trigger timestamp -test_timeout_anchored_to_trigger() { - # Simulate: trigger at T=0, poll starts at T=60, timeout is 900s - local trigger_epoch=1000 - local poll_start_epoch=1060 - local current_time=1900 # 900s after trigger, 840s after poll start - local timeout=900 - - # With trigger-anchored timeout: - local elapsed_from_trigger=$((current_time - trigger_epoch)) - # With poll-anchored timeout (wrong): - local elapsed_from_poll=$((current_time - poll_start_epoch)) - - local timed_out_trigger=false - local timed_out_poll=false - - if [[ $elapsed_from_trigger -ge $timeout ]]; then - timed_out_trigger=true - fi - if [[ $elapsed_from_poll -ge $timeout ]]; then - timed_out_poll=true - fi - - # Should be timed out based on trigger (900s elapsed), not poll (840s elapsed) - if [[ "$timed_out_trigger" == "true" && "$timed_out_poll" == "false" ]]; then - pass "T-HOOK-5: Per-bot timeout is anchored to trigger timestamp" - else - fail "T-HOOK-5: Timeout should be from trigger, not poll start" "trigger-based timeout" "poll-based timeout" - fi -} - -# Test: State file includes configured_bots -test_state_has_configured_bots() { - local test_subdir="$TEST_DIR/state_configured_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -configured_bots: - - claude - - codex -active_bots: - - claude -last_trigger_at: 2026-01-18T12:00:00Z ---- -EOF - - # Extract configured_bots count - local configured_count - configured_count=$(grep -c "^ - " "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | head -1) - - if [[ "$configured_count" -ge 2 ]]; then - pass "T-HOOK-6: State file tracks configured_bots separately" - else - fail "T-HOOK-6: State should have configured_bots" "2+ bots" "got $configured_count" - fi -} - -# Test: Round file naming consistency -test_round_file_naming() { - # All round-N files should use NEXT_ROUND - local current_round=1 - local next_round=$((current_round + 1)) - - local comment_file="round-${next_round}-pr-comment.md" - local check_file="round-${next_round}-pr-check.md" - local feedback_file="round-${next_round}-pr-feedback.md" - - # All should use next_round (2) - if [[ "$comment_file" == "round-2-pr-comment.md" && \ - "$check_file" == "round-2-pr-check.md" && \ - "$feedback_file" == "round-2-pr-feedback.md" ]]; then - pass "T-HOOK-7: Round file naming is consistent (all use NEXT_ROUND)" - else - fail "T-HOOK-7: Round files should all use NEXT_ROUND" "round-2-*" "inconsistent" - fi -} - -# Run stop hook integration tests -test_trigger_user_filter -test_trigger_refresh -test_missing_trigger_blocks -test_round0_trigger_priority -test_round0_started_at_fallback -test_timeout_anchored_to_trigger -test_state_has_configured_bots -test_round_file_naming - -# ======================================== -# Stop Hook End-to-End Tests (Execute Hook with Mocked gh/codex) -# ======================================== - -echo "" -echo "========================================" -echo "Testing Stop Hook End-to-End Execution" -echo "========================================" -echo "" - -# Test: Stop hook blocks when no resolve file exists -test_e2e_missing_resolve_blocks() { - local test_subdir="$TEST_DIR/e2e_resolve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create mock binaries - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo '{"login": "testuser"}' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook with mocked environment - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for block decision about missing resolve file - if echo "$hook_output" | grep -q "Resolution Summary Missing\|resolution summary\|round-0-pr-resolve"; then - pass "T-E2E-1: Stop hook blocks when resolve file missing" - else - fail "T-E2E-1: Stop hook should block for missing resolve" "block message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook detects trigger comment and updates state -test_e2e_trigger_detection() { - local test_subdir="$TEST_DIR/e2e_trigger_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty last_trigger_at - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T12:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock binaries that return trigger comment - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that properly returns jq-parsed user and trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - # gh api user --jq '.login' returns just the login string - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # The hook's jq: '.[] | {id: .id, author: .user.login, created_at: .created_at, body: .body}' - if [[ "$*" == *"--jq"* ]]; then - # Return pre-transformed format (what jq would output) - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}' - else - # Return raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T13:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Capture stderr for debug messages - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check for trigger detection message OR that last_trigger_at is being used - # (which indicates the trigger was detected and persisted) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-2: Stop hook detects and reports trigger comment" - else - fail "T-E2E-2: Stop hook should detect trigger" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook handles paginated API response (multi-page trigger detection) -test_e2e_pagination_runtime() { - local test_subdir="$TEST_DIR/e2e_pagination_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates paginated response (returns multiple JSON arrays) - # The trigger comment is on page 2 (second array) - only visible if pagination works - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - else - echo '{"login": "testuser"}' - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # When --jq and --paginate are used, gh applies jq per-element and outputs transformed objects - # Page 1: old comment without trigger - # Page 2: newer comment WITH trigger - must combine to find it - if [[ "$*" == *"--paginate"* ]] && [[ "$*" == *"--jq"* ]]; then - # --paginate with --jq: output transformed objects (one per line) - echo '{"id": 1, "author": "other", "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}' - echo '{"id": 2, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}' - elif [[ "$*" == *"--paginate"* ]]; then - # --paginate without --jq: output raw arrays - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - echo '[{"id": 2, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review the pagination fix"}]' - else - # No pagination: only first page (trigger NOT found) - echo '[{"id": 1, "user": {"login": "other"}, "created_at": "2026-01-18T11:00:00Z", "body": "old comment"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that trigger was found (proving pagination worked to combine arrays) - if echo "$hook_stderr" | grep -q "Found trigger comment at:\|using trigger timestamp"; then - pass "T-E2E-3: Pagination combines arrays and finds trigger on page 2" - else - fail "T-E2E-3: Pagination should find trigger on page 2" "trigger detected" "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Stop hook uses last_trigger_at when present (even for round 0) -test_e2e_trigger_priority_runtime() { - local test_subdir="$TEST_DIR/e2e_priority_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with BOTH started_at and last_trigger_at set - # The trigger timestamp is LATER than started_at - if priority works, - # the hook should use the trigger timestamp (not started_at) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T14:30:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - if [[ "$*" == *"--jq"* ]]; then - echo "testuser" - fi - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id": 1, "author": "testuser", "created_at": "2026-01-18T14:30:00Z", "body": "@claude review"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) echo "/tmp/git" ;; - status) echo "" ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Check that it reports using trigger timestamp for --after (not started_at) - # Must match the SPECIFIC log format: "Round 0: using trigger timestamp for --after: " - # This proves last_trigger_at is prioritized even for round 0 - if echo "$hook_stderr" | grep -q "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z"; then - pass "T-E2E-4: Round 0 uses last_trigger_at for --after (not started_at)" - else - fail "T-E2E-4: Round 0 should use last_trigger_at for --after" \ - "Round 0: using trigger timestamp for --after: 2026-01-18T14:30:00Z" \ - "got: $hook_stderr" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run end-to-end tests -test_e2e_missing_resolve_blocks -test_e2e_trigger_detection -test_e2e_pagination_runtime -test_e2e_trigger_priority_runtime - -# ======================================== -# Approval-Only Review Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Approval-Only Review Handling" -echo "========================================" -echo "" - -# Test: Empty-body PR reviews are captured with state placeholder -test_approval_only_review_captured() { - # Simulate PR review with APPROVED state but empty body - local reviews='[ - {"id": 1, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": null, "submitted_at": "2026-01-18T12:00:00Z"}, - {"id": 2, "user": {"login": "claude[bot]"}, "state": "APPROVED", "body": "", "submitted_at": "2026-01-18T12:01:00Z"}, - {"id": 3, "user": {"login": "claude[bot]"}, "state": "CHANGES_REQUESTED", "body": "Fix bug", "submitted_at": "2026-01-18T12:02:00Z"} - ]' - - # Apply the same jq logic as poll-pr-reviews.sh (fixed version) - local processed - processed=$(echo "$reviews" | jq '[.[] | { - id: .id, - author: .user.login, - state: .state, - body: (if .body == null or .body == "" then "[Review state: \(.state)]" else .body end) - }]') - - local count - count=$(echo "$processed" | jq 'length') - - if [[ "$count" == "3" ]]; then - pass "T-APPROVE-1: Empty-body PR reviews are captured (count=3)" - else - fail "T-APPROVE-1: All reviews should be captured including empty-body" "3" "got $count" - fi - - # Check that empty body gets placeholder - local placeholder_count - placeholder_count=$(echo "$processed" | jq '[.[] | select(.body | test("\\[Review state:"))] | length') - - if [[ "$placeholder_count" == "2" ]]; then - pass "T-APPROVE-2: Empty-body reviews get state placeholder" - else - fail "T-APPROVE-2: Empty-body reviews should get placeholder" "2" "got $placeholder_count" - fi -} - -# Test: Approval-only reviews match bot patterns for polling -test_approval_polls_correctly() { - local bot_pattern="claude\\[bot\\]" - local reviews='[ - {"type": "pr_review", "author": "claude[bot]", "state": "APPROVED", "body": "[Review state: APPROVED]", "created_at": "2026-01-18T12:00:00Z"} - ]' - - local filtered - filtered=$(echo "$reviews" | jq --arg pattern "$bot_pattern" '[.[] | select(.author | test($pattern; "i"))]') - local count - count=$(echo "$filtered" | jq 'length') - - if [[ "$count" == "1" ]]; then - pass "T-APPROVE-3: Approval-only reviews match bot pattern for polling" - else - fail "T-APPROVE-3: Approval-only review should match bot" "1" "got $count" - fi -} - -# Run approval-only review tests -test_approval_only_review_captured -test_approval_polls_correctly - -# ======================================== -# Fixture-Backed Fetch/Poll Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Fetch/Poll with Fixture-Backed Mock GH" -echo "========================================" -echo "" - -# Set up fixture-backed mock gh -setup_fixture_mock_gh() { - local mock_bin_dir="$TEST_DIR/mock_bin" - local fixtures_dir="$SCRIPT_DIR/fixtures" - - # Create the mock gh - "$SCRIPT_DIR/setup-fixture-mock-gh.sh" "$mock_bin_dir" "$fixtures_dir" > /dev/null - - echo "$mock_bin_dir" -} - -# Test: fetch-pr-comments.sh returns all comment types including approval-only reviews -test_fetch_pr_comments_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run fetch-pr-comments.sh with mock gh in PATH - local output_file="$TEST_DIR/pr-comments.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-1: fetch-pr-comments.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - if [[ ! -f "$output_file" ]]; then - fail "T-FIXTURE-1: Output file should exist" "file exists" "file not found" - return - fi - - # Check for issue comments - if ! grep -q "humanuser" "$output_file"; then - fail "T-FIXTURE-1: Output should contain human issue comment" "humanuser comment" "not found" - return - fi - - # Check for review comments (inline code comments) - if ! grep -q "const instead of let" "$output_file"; then - fail "T-FIXTURE-1: Output should contain inline review comment" "const instead of let" "not found" - return - fi - - # Check for approval-only PR reviews with placeholder - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-1: Output should contain approval-only review with placeholder" "[Review state: APPROVED]" "not found" - return - fi - - pass "T-FIXTURE-1: fetch-pr-comments.sh returns all comment types including approval-only" - cd "$SCRIPT_DIR" -} - -# Test: fetch-pr-comments.sh respects --after timestamp filter -test_fetch_pr_comments_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run with --after filter (after 12:00, should exclude early comments) - local output_file="$TEST_DIR/pr-comments-filtered.md" - PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/fetch-pr-comments.sh" 123 "$output_file" --after "2026-01-18T12:00:00Z" - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-2: fetch-pr-comments.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should include late comments (13:00+ approvals) - if ! grep -q "\[Review state: APPROVED\]" "$output_file"; then - fail "T-FIXTURE-2: Should include late approval-only review" "[Review state: APPROVED]" "not found" - return - fi - - # Should NOT include early human comment from 09:00 - # (humanreviewer's "LGTM!" was at 09:00) - if grep -q "LGTM" "$output_file"; then - fail "T-FIXTURE-2: Should exclude comments before --after timestamp" "no LGTM" "LGTM found" - return - fi - - pass "T-FIXTURE-2: fetch-pr-comments.sh --after filter works correctly" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh returns JSON with approval-only reviews -test_poll_pr_reviews_with_fixtures() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Run poll-pr-reviews.sh with mock gh in PATH - # Use early timestamp to catch all bot reviews - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T10:00:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-3: poll-pr-reviews.sh should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Validate JSON structure - if ! echo "$output" | jq . > /dev/null 2>&1; then - fail "T-FIXTURE-3: Output should be valid JSON" "valid JSON" "invalid JSON" - return - fi - - # Check for approval-only reviews in comments - local has_placeholder - has_placeholder=$(echo "$output" | jq '[.comments[]? | select(.body | test("\\[Review state:"))] | length') - - if [[ "$has_placeholder" -lt 1 ]]; then - fail "T-FIXTURE-3: Should include approval-only reviews with placeholder" ">=1" "$has_placeholder" - return - fi - - # Check bots_responded includes both bots - local bots_count - bots_count=$(echo "$output" | jq '.bots_responded | length') - - if [[ "$bots_count" -lt 1 ]]; then - fail "T-FIXTURE-3: Should have bots in bots_responded" ">=1" "$bots_count" - return - fi - - pass "T-FIXTURE-3: poll-pr-reviews.sh returns approval-only reviews in JSON" - cd "$SCRIPT_DIR" -} - -# Test: poll-pr-reviews.sh filters by --after timestamp correctly -test_poll_pr_reviews_after_filter() { - cd "$TEST_DIR" - - local mock_bin_dir - mock_bin_dir=$(setup_fixture_mock_gh) - - # Use timestamp that filters out early CHANGES_REQUESTED (11:00) - # but includes late APPROVED reviews (13:00, 13:30) - local output - output=$(PATH="$mock_bin_dir:$PATH" "$PROJECT_ROOT/scripts/poll-pr-reviews.sh" 123 \ - --after "2026-01-18T12:30:00Z" \ - --bots "claude,codex") - - local exit_code=$? - - if [[ $exit_code -ne 0 ]]; then - fail "T-FIXTURE-4: poll-pr-reviews.sh --after should succeed" "exit=0" "exit=$exit_code" - return - fi - - # Should have claude[bot] approval at 13:00 and codex approval at 13:30 - local comment_count - comment_count=$(echo "$output" | jq '.comments | length') - - # At minimum, should have the late approvals - if [[ "$comment_count" -lt 1 ]]; then - fail "T-FIXTURE-4: Should include late approvals" ">=1" "$comment_count" - return - fi - - # Should NOT include the CHANGES_REQUESTED from 11:00 (before our --after) - local changes_requested - changes_requested=$(echo "$output" | jq '[.comments[]? | select(.body | test("security concerns"))] | length') - - if [[ "$changes_requested" -gt 0 ]]; then - fail "T-FIXTURE-4: Should exclude comments before --after" "0" "$changes_requested" - return - fi - - pass "T-FIXTURE-4: poll-pr-reviews.sh --after filter excludes early comments" - cd "$SCRIPT_DIR" -} - -# Run fixture-backed tests -test_fetch_pr_comments_with_fixtures -test_fetch_pr_comments_after_filter -test_poll_pr_reviews_with_fixtures -test_poll_pr_reviews_after_filter - -# ======================================== -# Wrong-Round Validation Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Wrong-Round Validation" -echo "========================================" -echo "" - -# Test: Wrong-round pr-resolve write is blocked -test_wrong_round_pr_resolve_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to write to round-0 (wrong round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-0-pr-resolve.md", "content": "wrong round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-1: Wrong-round pr-resolve write is blocked" - else - fail "T-ROUND-1: Wrong-round pr-resolve should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Correct-round pr-resolve write is allowed -test_correct_round_pr_resolve_allowed() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-01-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - # State says current_round is 2 - cat > "$loop_dir/state.md" << EOF ---- -current_round: 2 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Write to round-2 (correct round) - local hook_input='{"tool_name": "Write", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-2-pr-resolve.md", "content": "correct round"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 0 ]]; then - pass "T-ROUND-2: Correct-round pr-resolve write is allowed" - else - fail "T-ROUND-2: Correct-round pr-resolve should be allowed" "exit=0" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Test: Wrong-round pr-resolve edit is blocked -test_wrong_round_pr_resolve_edit_blocked() { - cd "$TEST_DIR" - - local timestamp="2026-01-18_15-02-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 3 -max_iterations: 42 -pr_number: 123 ---- -EOF - - # Try to edit round-1 (wrong round) - local hook_input='{"tool_name": "Edit", "tool_input": {"file_path": "'$TEST_DIR'/.humanize/pr-loop/'$timestamp'/round-1-pr-resolve.md", "old_string": "x", "new_string": "y"}}' - - local output - local exit_code - output=$(echo "$hook_input" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -eq 2 ]] && echo "$output" | grep -qi "wrong round"; then - pass "T-ROUND-3: Wrong-round pr-resolve edit is blocked" - else - fail "T-ROUND-3: Wrong-round pr-resolve edit should be blocked" "exit=2, wrong round" "exit=$exit_code" - fi - - cd "$SCRIPT_DIR" -} - -# Run wrong-round validation tests -test_wrong_round_pr_resolve_blocked -test_correct_round_pr_resolve_allowed -test_wrong_round_pr_resolve_edit_blocked - -# ======================================== -# Monitor PR Active Bots Tests -# ======================================== - -echo "" -echo "========================================" -echo "Testing Monitor PR Active Bots Display" -echo "========================================" -echo "" - -# Test: Monitor parses YAML list for active_bots -test_monitor_yaml_list_parsing() { - local test_subdir="$TEST_DIR/monitor_yaml_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" yaml_list >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots are displayed correctly (comma-separated) - if echo "$output" | grep -q "Active Bots:.*claude.*codex\|Active Bots:.*codex.*claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - # Also accept claude,codex format - if echo "$output" | grep -q "Active Bots:.*claude,codex\|Active Bots:.*codex,claude"; then - pass "T-MONITOR-1: Monitor parses and displays YAML list active_bots" - else - fail "T-MONITOR-1: Monitor should display active bots from YAML list" "claude,codex" "got: $output" - fi - fi -} - -# Test: Monitor shows configured_bots separately -test_monitor_configured_bots() { - local test_subdir="$TEST_DIR/monitor_configured_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" configured >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that both configured and active bots are displayed - if echo "$output" | grep -q "Configured Bots:.*claude.*codex\|Configured Bots:.*codex.*claude\|Configured Bots:.*claude,codex\|Configured Bots:.*codex,claude"; then - pass "T-MONITOR-2: Monitor displays configured_bots" - else - fail "T-MONITOR-2: Monitor should display configured bots" "claude,codex" "got: $output" - fi -} - -# Test: Monitor shows 'none' when active_bots is empty -test_monitor_empty_active_bots() { - local test_subdir="$TEST_DIR/monitor_empty_test" - mkdir -p "$test_subdir" - - # Use helper script to create state file (avoids validator blocking) - "$SCRIPT_DIR/setup-monitor-test-env.sh" "$test_subdir" empty >/dev/null - - # Source the humanize script and run monitor from test subdirectory (use --once for non-interactive) - cd "$test_subdir" - local output - output=$(source "$PROJECT_ROOT/scripts/humanize.sh" && humanize monitor pr --once 2>&1) || true - cd "$SCRIPT_DIR" - - # Check that active bots shows 'none' - if echo "$output" | grep -q "Active Bots:.*none"; then - pass "T-MONITOR-3: Monitor shows 'none' for empty active_bots" - else - fail "T-MONITOR-3: Monitor should show 'none' for empty active_bots" "none" "got: $output" - fi -} - -# Run monitor tests -test_monitor_yaml_list_parsing -test_monitor_configured_bots -test_monitor_empty_active_bots - -} diff --git a/tests/test-pr-loop-lib.sh b/tests/test-pr-loop-lib.sh deleted file mode 100644 index a619e052..00000000 --- a/tests/test-pr-loop-lib.sh +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env bash -# -# Common library for PR loop tests -# -# Provides shared setup, helpers, and mock functions used by all test modules. -# -# Usage: source test-pr-loop-lib.sh -# - -# Determine script location -if [[ -z "${TEST_PR_LOOP_LIB_LOADED:-}" ]]; then - TEST_PR_LOOP_LIB_LOADED=1 - - # Get directories if not already set - SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}" - PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" - - # Source test helpers if not already sourced - if ! declare -f setup_test_dir &>/dev/null; then - source "$SCRIPT_DIR/test-helpers.sh" - fi - - # ======================================== - # Mock Creation Functions - # ======================================== - - # Create mock scripts for gh CLI - create_mock_gh() { - local mock_dir="$1" - mkdir -p "$mock_dir" - - cat > "$mock_dir/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Mock gh CLI for testing - -case "$1" in - auth) - if [[ "$2" == "status" ]]; then - echo "Logged in to github.com" - exit 0 - fi - ;; - repo) - if [[ "$2" == "view" ]]; then - if [[ "$3" == "--json" && "$4" == "owner" ]]; then - echo '{"login": "testowner"}' - elif [[ "$3" == "--json" && "$4" == "name" ]]; then - echo '{"name": "testrepo"}' - fi - exit 0 - fi - ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - elif [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "number" ]]; then - echo '{"number": 123}' - exit 0 - elif [[ "$3" == "--json" && "$4" == "state" ]] || [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - exit 0 - fi - ;; - api) - # Handle user endpoint - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty arrays for comment/review fetching - echo "[]" - exit 0 - ;; -esac - -echo "Mock gh: unhandled command: $*" >&2 -exit 1 -MOCK_GH - chmod +x "$mock_dir/gh" - } - - # Create mock codex command - create_mock_codex() { - local mock_dir="$1" - - cat > "$mock_dir/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex CLI for testing -echo "Mock codex output" -exit 0 -MOCK_CODEX - chmod +x "$mock_dir/codex" - } - - # ======================================== - # Test Environment Setup - # ======================================== - - # Initialize test environment (call once at start of test run) - init_pr_loop_test_env() { - setup_test_dir - - # Create mock scripts directory and wire it into PATH - MOCK_BIN_DIR="$TEST_DIR/mock_bin" - mkdir -p "$MOCK_BIN_DIR" - export PATH="$MOCK_BIN_DIR:$PATH" - - # Initialize mock gh and codex in the PATH - create_mock_gh "$MOCK_BIN_DIR" - create_mock_codex "$MOCK_BIN_DIR" - - export MOCK_BIN_DIR - } - - # ======================================== - # Test Result Summary - # ======================================== - - # Print test summary and exit with appropriate code - print_test_summary() { - echo "" - echo "========================================" - echo "PR Loop Tests" - echo "========================================" - echo -e "Passed: \033[0;32m$TESTS_PASSED\033[0m" - echo -e "Failed: \033[0;31m$TESTS_FAILED\033[0m" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - echo -e "\033[0;31mSome tests failed!\033[0m" - return 1 - else - echo -e "\033[0;32mAll tests passed!\033[0m" - return 0 - fi - } -fi diff --git a/tests/test-pr-loop-scripts.sh b/tests/test-pr-loop-scripts.sh deleted file mode 100644 index d77b9067..00000000 --- a/tests/test-pr-loop-scripts.sh +++ /dev/null @@ -1,410 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Script Tests -# -# Tests for script argument parsing and validation: -# - setup-pr-loop.sh -# - cancel-pr-loop.sh -# - fetch-pr-comments.sh -# - poll-pr-reviews.sh -# -# Usage: source test-pr-loop-scripts.sh && run_script_tests -# - -# ======================================== -# setup-pr-loop.sh Tests -# ======================================== - -run_setup_tests() { - echo "" - echo "========================================" - echo "Testing setup-pr-loop.sh" - echo "========================================" - echo "" - - SETUP_SCRIPT="$PROJECT_ROOT/scripts/setup-pr-loop.sh" - - # Test: Help flag works - test_setup_help() { - local output - output=$("$SETUP_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "start-pr-loop"; then - pass "T-POS-1: --help displays usage information" - else - fail "T-POS-1: --help should display usage information" - fi - } - - # Test: Missing bot flag shows error - test_setup_no_bot_flag() { - local output - local exit_code - output=$("$SETUP_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "at least one bot flag"; then - pass "T-NEG-1: Missing bot flag shows error" - else - fail "T-NEG-1: Missing bot flag should show error" "exit code != 0 and error message" "exit=$exit_code, output=$output" - fi - } - - # Test: Invalid bot flag shows error - test_setup_invalid_bot() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --invalid-bot 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "unknown option"; then - pass "T-NEG-2: Invalid bot flag shows error" - else - fail "T-NEG-2: Invalid bot flag should show error" "exit code != 0" "exit=$exit_code" - fi - } - - # Test: --claude flag is recognized - test_setup_claude_flag() { - # This will fail because no git repo, but we test that --claude is parsed - local output - output=$("$SETUP_SCRIPT" --claude 2>&1) || true - - # Should not complain about missing bot flag - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-2: --claude flag is recognized" - else - fail "T-POS-2: --claude flag should be recognized" - fi - } - - # Test: --codex flag is recognized - test_setup_codex_flag() { - local output - output=$("$SETUP_SCRIPT" --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-3: --codex flag is recognized" - else - fail "T-POS-3: --codex flag should be recognized" - fi - } - - # Test: Both bot flags work together - test_setup_both_bots() { - local output - output=$("$SETUP_SCRIPT" --claude --codex 2>&1) || true - - if ! echo "$output" | grep -qi "at least one bot flag"; then - pass "T-POS-4: Both bot flags work together" - else - fail "T-POS-4: Both bot flags should work together" - fi - } - - # Test: --max argument is parsed - test_setup_max_arg() { - local output - output=$("$SETUP_SCRIPT" --claude --max 10 2>&1) || true - - # Should not complain about --max - if ! echo "$output" | grep -qi "max requires"; then - pass "T-POS-5: --max argument is parsed" - else - fail "T-POS-5: --max argument should be parsed" - fi - } - - # Test: --max with invalid value shows error - test_setup_max_invalid() { - local output - local exit_code - output=$("$SETUP_SCRIPT" --claude --max abc 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "must be.*integer"; then - pass "T-NEG-3: --max with invalid value shows error" - else - fail "T-NEG-3: --max with invalid value should show error" - fi - } - - # Test: --codex-model argument is parsed - test_setup_codex_model() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-model gpt-4:high 2>&1) || true - - if ! echo "$output" | grep -qi "codex-model requires"; then - pass "T-POS-6: --codex-model argument is parsed" - else - fail "T-POS-6: --codex-model argument should be parsed" - fi - } - - # Test: --codex-timeout argument is parsed - test_setup_codex_timeout() { - local output - output=$("$SETUP_SCRIPT" --claude --codex-timeout 1800 2>&1) || true - - if ! echo "$output" | grep -qi "codex-timeout requires"; then - pass "T-POS-7: --codex-timeout argument is parsed" - else - fail "T-POS-7: --codex-timeout argument should be parsed" - fi - } - - # Run setup tests - test_setup_help - test_setup_no_bot_flag - test_setup_invalid_bot - test_setup_claude_flag - test_setup_codex_flag - test_setup_both_bots - test_setup_max_arg - test_setup_max_invalid - test_setup_codex_model - test_setup_codex_timeout -} - -# ======================================== -# cancel-pr-loop.sh Tests -# ======================================== - -run_cancel_tests() { - echo "" - echo "========================================" - echo "Testing cancel-pr-loop.sh" - echo "========================================" - echo "" - - CANCEL_SCRIPT="$PROJECT_ROOT/scripts/cancel-pr-loop.sh" - - # Test: Help flag works - test_cancel_help() { - local output - output=$("$CANCEL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "cancel-pr-loop"; then - pass "T-POS-8: --help displays usage information" - else - fail "T-POS-8: --help should display usage information" - fi - } - - # Test: No loop returns NO_LOOP - test_cancel_no_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 1 ]] && echo "$output" | grep -q "NO_LOOP"; then - pass "T-NEG-4: No active loop returns NO_LOOP" - else - fail "T-NEG-4: No active loop should return NO_LOOP" "exit=1, NO_LOOP" "exit=$exit_code, output=$output" - fi - cd - > /dev/null - } - - # Test: Cancel works with active loop - test_cancel_active_loop() { - cd "$TEST_DIR" - # Export CLAUDE_PROJECT_DIR to ensure cancel script looks in test dir - export CLAUDE_PROJECT_DIR="$TEST_DIR" - - # Create mock loop directory - local timestamp="2026-01-18_12-00-00" - local loop_dir=".humanize/pr-loop/$timestamp" - mkdir -p "$loop_dir" - - cat > "$loop_dir/state.md" << EOF ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 ---- -EOF - - local output - local exit_code - output=$("$CANCEL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - unset CLAUDE_PROJECT_DIR - - if [[ $exit_code -eq 0 ]] && echo "$output" | grep -q "CANCELLED"; then - if [[ -f "$loop_dir/cancel-state.md" ]] && [[ ! -f "$loop_dir/state.md" ]]; then - pass "T-POS-9: Cancel works and renames state file" - else - fail "T-POS-9: Cancel should rename state.md to cancel-state.md" - fi - else - fail "T-POS-9: Cancel should work with active loop" "exit=0, CANCELLED" "exit=$exit_code" - fi - - cd - > /dev/null - } - - # Run cancel tests - test_cancel_help - test_cancel_no_loop - test_cancel_active_loop -} - -# ======================================== -# fetch-pr-comments.sh Tests -# ======================================== - -run_fetch_tests() { - echo "" - echo "========================================" - echo "Testing fetch-pr-comments.sh" - echo "========================================" - echo "" - - FETCH_SCRIPT="$PROJECT_ROOT/scripts/fetch-pr-comments.sh" - - # Test: Help flag works - test_fetch_help() { - local output - output=$("$FETCH_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "fetch-pr-comments"; then - pass "T-POS-10: --help displays usage information" - else - fail "T-POS-10: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_fetch_no_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-5: Missing PR number shows error" - else - fail "T-NEG-5: Missing PR number should show error" - fi - } - - # Test: Missing output file shows error - test_fetch_no_output() { - local output - local exit_code - output=$("$FETCH_SCRIPT" 123 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "output file.*required"; then - pass "T-NEG-6: Missing output file shows error" - else - fail "T-NEG-6: Missing output file should show error" - fi - } - - # Test: Invalid PR number shows error - test_fetch_invalid_pr() { - local output - local exit_code - output=$("$FETCH_SCRIPT" abc /tmp/out.md 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "invalid pr number"; then - pass "T-NEG-7: Invalid PR number shows error" - else - fail "T-NEG-7: Invalid PR number should show error" - fi - } - - # Run fetch tests - test_fetch_help - test_fetch_no_pr - test_fetch_no_output - test_fetch_invalid_pr -} - -# ======================================== -# poll-pr-reviews.sh Tests -# ======================================== - -run_poll_tests() { - echo "" - echo "========================================" - echo "Testing poll-pr-reviews.sh" - echo "========================================" - echo "" - - POLL_SCRIPT="$PROJECT_ROOT/scripts/poll-pr-reviews.sh" - - # Test: Help flag works - test_poll_help() { - local output - output=$("$POLL_SCRIPT" --help 2>&1) || true - if echo "$output" | grep -q "poll-pr-reviews"; then - pass "T-POS-11: --help displays usage information" - else - fail "T-POS-11: --help should display usage information" - fi - } - - # Test: Missing PR number shows error - test_poll_no_pr() { - local output - local exit_code - output=$("$POLL_SCRIPT" 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "pr number.*required"; then - pass "T-NEG-8: Missing PR number shows error" - else - fail "T-NEG-8: Missing PR number should show error" - fi - } - - # Test: Missing --after shows error - test_poll_no_after() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --bots claude 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "after.*required"; then - pass "T-NEG-9: Missing --after shows error" - else - fail "T-NEG-9: Missing --after should show error" - fi - } - - # Test: Missing --bots shows error - test_poll_no_bots() { - local output - local exit_code - output=$("$POLL_SCRIPT" 123 --after 2026-01-18T00:00:00Z 2>&1) || exit_code=$? - exit_code=${exit_code:-0} - - if [[ $exit_code -ne 0 ]] && echo "$output" | grep -qi "bots.*required"; then - pass "T-NEG-10: Missing --bots shows error" - else - fail "T-NEG-10: Missing --bots should show error" - fi - } - - # Run poll tests - test_poll_help - test_poll_no_pr - test_poll_no_after - test_poll_no_bots -} - -# ======================================== -# Main Entry Point -# ======================================== - -run_script_tests() { - run_setup_tests - run_cancel_tests - run_fetch_tests - run_poll_tests -} diff --git a/tests/test-pr-loop-stophook.sh b/tests/test-pr-loop-stophook.sh deleted file mode 100644 index a73f8a4b..00000000 --- a/tests/test-pr-loop-stophook.sh +++ /dev/null @@ -1,1782 +0,0 @@ -#!/usr/bin/env bash -# -# PR Loop Stop Hook Tests -# -# Tests for the stop hook functionality: -# - Force push detection -# - Trigger validation -# - Bot timeout handling -# - State file management -# - Dynamic startup_case updates -# -# Usage: source test-pr-loop-stophook.sh && run_stophook_tests -# - -run_stophook_tests() { -# ======================================== -# Stop-Hook Integration Tests -# ======================================== - -# Test: Force push trigger validation - old triggers rejected after force push -test_stophook_force_push_rejects_old_trigger() { - local test_subdir="$TEST_DIR/stophook_force_push_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with latest_commit_at set to AFTER the old trigger comment - # This simulates: force push happened after the old trigger was posted - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: newsha123 -latest_commit_at: 2026-01-18T14:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns OLD trigger comment (BEFORE latest_commit_at) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return old trigger comment from 12:00 (BEFORE latest_commit_at of 14:00) - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 1, "author": "testuser", "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 1, "user": {"login": "testuser"}, "created_at": "2026-01-18T12:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha123" # Match state file - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; # Pretend no force push in this test -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook and capture output - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # The old trigger should be rejected because it's before latest_commit_at - # Stop hook should block requiring a new trigger - if echo "$hook_output" | grep -qi "trigger\|comment @\|re-trigger\|no trigger"; then - pass "T-STOPHOOK-1: Force push validation rejects old trigger comment" - else - fail "T-STOPHOOK-1: Should reject old trigger after force push" "block/require trigger" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 Case 1 exception - no trigger required for startup_case=1, round=0 -test_stophook_case1_no_trigger_required() { - local test_subdir="$TEST_DIR/stophook_case1_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with startup_case=1 and round=0 - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments, but has codex +1 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - # Return codex +1 reaction (triggers approval) - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_stderr - hook_stderr=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1 >/dev/null) || true - - # Case 1 exception: should NOT block for missing trigger - if echo "$hook_stderr" | grep -q "trigger not required\|Case 1\|startup_case=1"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required" - else - # Alternative: check that it didn't block - if ! echo "$hook_stderr" | grep -qi "block.*trigger\|missing.*trigger\|comment @"; then - pass "T-STOPHOOK-2: Case 1 exception - no trigger required (no block)" - else - fail "T-STOPHOOK-2: Case 1 should not require trigger" "no block" "got: $hook_stderr" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 9 - APPROVE creates approve-state.md -test_stophook_approve_creates_state() { - local test_subdir="$TEST_DIR/stophook_approve_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file with empty active_bots (YAML list format, no items) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 123 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - # Create resolve file (required by stop hook) - echo "# Resolution Summary" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - export CLAUDE_PROJECT_DIR="$test_subdir" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export PATH="$mock_bin:$PATH" - - # Run stop hook - with empty active_bots, it should approve - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Check for approve-state.md creation - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md" - else - # Alternative: check output for approval message - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-3: APPROVE creates approve-state.md (via message)" - else - fail "T-STOPHOOK-3: Should create approve-state.md" "approve-state.md exists" "not found" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when new comments arrive -test_stophook_dynamic_startup_case() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Start with startup_case=1 (no comments) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return bot comments (claude and codex have commented) - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[{"id":1,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:05:00Z","body":"Found issue"},{"id":2,"user":{"login":"chatgpt-codex-connector[bot]"},"created_at":"2026-01-18T10:06:00Z","body":"Also found issue"}]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T09:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T09:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout (it may poll, so limit to 5 seconds) - timeout 5 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" >/dev/null 2>&1 || true - - # Check if startup_case was updated in state file - local new_case - new_case=$(grep "^startup_case:" "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - - # With both bots commented and no new commits, should be Case 3 - if [[ "$new_case" == "3" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated to 3 (all commented, no new commits)" - elif [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-4: Dynamic startup_case updated from 1 to $new_case" - else - fail "T-STOPHOOK-4: startup_case should update dynamically" "case 3" "got: $new_case" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6 - unpushed commits block exit -test_stophook_step6_unpushed_commits() { - local test_subdir="$TEST_DIR/stophook_step6_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that reports unpushed commits - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" # Clean working directory - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch...origin/test-branch [ahead 2]" # 2 unpushed commits - fi - ;; - branch) - echo "test-branch" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with unpushed commits message - if echo "$hook_output" | grep -qi "unpushed\|ahead\|push.*commit"; then - pass "T-STOPHOOK-5: Step 6 blocks on unpushed commits" - else - fail "T-STOPHOOK-5: Step 6 should block on unpushed commits" "unpushed/ahead message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 6.5 - force push detection with actual history rewrite simulation -test_stophook_step65_force_push_detection() { - local test_subdir="$TEST_DIR/stophook_step65_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with old commit SHA - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: oldsha123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - # Mock git that simulates force push: old commit is NOT ancestor of current HEAD - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "newsha456" # Different from oldsha123 in state - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) - # Simulate force push: old commit is NOT an ancestor - # --is-ancestor exits 1 when not ancestor - exit 1 - ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect force push and block - if echo "$hook_output" | grep -qi "force.*push\|history.*rewrite\|re-trigger"; then - pass "T-STOPHOOK-6: Step 6.5 detects force push (history rewrite)" - else - fail "T-STOPHOOK-6: Step 6.5 should detect force push" "force push message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Step 7 - missing trigger comment blocks (Case 4/5) -test_stophook_step7_missing_trigger() { - local test_subdir="$TEST_DIR/stophook_step7_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=4 (requires trigger) but no trigger - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 4 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T12:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns no trigger comments - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - echo '[]' # No comments - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T12:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T12:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should block with missing trigger message - if echo "$hook_output" | grep -qi "trigger\|@.*mention\|comment"; then - pass "T-STOPHOOK-7: Step 7 blocks on missing trigger (Case 4)" - else - fail "T-STOPHOOK-7: Step 7 should block on missing trigger" "trigger/mention message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Bot timeout auto-removes bot from active_bots -test_stophook_bot_timeout_auto_remove() { - local test_subdir="$TEST_DIR/stophook_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with short poll_timeout (2 seconds) to test timeout behavior - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T10:30:00Z -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO bot comments (simulates bot not responding) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty for all comment/review queries - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with short timeout - it should time out and auto-remove bots - local hook_output - hook_output=$(timeout 10 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should either mention timeout or create approve-state (if all bots timed out) - if echo "$hook_output" | grep -qi "timeout\|timed out\|auto-remove\|approved"; then - pass "T-STOPHOOK-8: Bot timeout handling" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-8: Bot timeout created approve-state.md" - else - fail "T-STOPHOOK-8: Bot timeout should trigger auto-remove" "timeout/approved message" "got: $hook_output" - fi - - # VERIFICATION: Check that active_bots was actually updated (removed the bot) - # After timeout, either: - # 1. approve-state.md exists with empty active_bots (all bots timed out) - # 2. state.md has the timed-out bot removed from active_bots - local state_file="" - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - fi - - # VERIFICATION: Check that approve-state.md was created with empty active_bots - local approve_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - if [[ -f "$approve_file" ]]; then - pass "T-STOPHOOK-8a: approve-state.md created - bot timeout led to loop completion" - # Verify active_bots is empty (not containing 'codex') - local active_bots_line - active_bots_line=$(grep "^active_bots:" "$approve_file" 2>/dev/null || true) - # After the line "active_bots:", check if there are any bot entries - local next_line_has_bot - next_line_has_bot=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$approve_file" | grep -E '^\s*-\s*\w' || true) - if [[ -z "$next_line_has_bot" ]]; then - pass "T-STOPHOOK-8b: active_bots is empty after timeout" - else - fail "T-STOPHOOK-8b: active_bots should be empty after timeout" "no bots listed" "got: $next_line_has_bot" - fi - else - fail "T-STOPHOOK-8a: approve-state.md should exist after bot timeout" "approve-state.md exists" "file not found" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Codex +1 detection removes codex from active_bots -test_stophook_codex_thumbsup_approval() { - local test_subdir="$TEST_DIR/stophook_thumbsup_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with startup_case=1 (required for +1 check) and only codex as active bot - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 2 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns +1 reaction from codex - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return +1 reaction for PR reactions query - if [[ "$2" == *"/issues/"*"/reactions"* ]]; then - echo '[{"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T10:05:00Z"}]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should detect +1 and create approve-state.md (since codex is only bot) - if echo "$hook_output" | grep -qi "+1\|thumbsup\|approved"; then - pass "T-STOPHOOK-9: Codex +1 detection" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-9: Codex +1 created approve-state.md" - else - fail "T-STOPHOOK-9: Codex +1 should be detected" "+1/approved message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Claude eyes timeout blocks exit -test_stophook_claude_eyes_timeout() { - local test_subdir="$TEST_DIR/stophook_eyes_timeout_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # State with claude configured and trigger required (round > 0) - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 1 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude -active_bots: - - claude -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: 2026-01-18T11:00:00Z -trigger_comment_id: 12345 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns NO eyes reaction (simulates claude bot not configured) - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Check if --jq is in arguments (for transformed format) -HAS_JQ=false -for arg in "$@"; do - if [[ "$arg" == "--jq" || "$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "$1" in - api) - if [[ "$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return empty reactions - no eyes - if [[ "$2" == *"/reactions"* ]]; then - echo "[]" - exit 0 - fi - if [[ "$2" == *"/issues/"*"/comments"* ]]; then - # Return trigger comment - if [[ "$HAS_JQ" == "true" ]]; then - # With --jq --paginate, output one transformed object per line - echo '{"id": 12345, "author": "testuser", "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}' - else - # Raw GitHub API format - echo '[{"id": 12345, "user": {"login": "testuser"}, "created_at": "2026-01-18T11:00:00Z", "body": "@claude please review"}]' - fi - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used - echo "2026-01-18T10:00:00Z" - exit 0 - fi - if [[ "$*" == *"commits"* ]]; then - echo '{"commits":[{"committedDate":"2026-01-18T10:00:00Z"}]}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run with timeout since eyes check has 3x5s retry (15s total) - local hook_output - hook_output=$(timeout 20 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Should block with eyes timeout message - if echo "$hook_output" | grep -qi "eyes\|not responding\|timeout\|bot.*configured"; then - pass "T-STOPHOOK-10: Claude eyes timeout blocks exit" - else - fail "T-STOPHOOK-10: Claude eyes timeout should block" "eyes/timeout message" "got: $hook_output" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Dynamic startup_case update when comments arrive -test_stophook_dynamic_startup_case_update() { - local test_subdir="$TEST_DIR/stophook_dynamic_case_test2" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> comment (all recent, all within poll_timeout) - local trigger_ts commit_ts comment_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Comment arrived 5 seconds ago (after trigger, after commit -> case 3) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Start with startup_case=1 (no comments initially), then comments arrive - # Provide a trigger comment to proceed past timeout checks - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns bot comments (simulating comments arriving) - # IMPORTANT: poll-pr-reviews.sh expects RAW GitHub API format (with .user.login) - # check-pr-reviewer-status.sh uses --jq so needs transformed format - # Use COMMENT_TS environment variable for dynamic timestamp - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -# Dynamic comment timestamp from test setup -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -# Check if --jq is in arguments and what type of jq expression -HAS_JQ=false -JQ_RETURNS_ARRAY=false -ARGS=("\$@") -for ((i=0; i<\${#ARGS[@]}; i++)); do - if [[ "\${ARGS[i]}" == "--jq" || "\${ARGS[i]}" == "-q" ]]; then - HAS_JQ=true - # Check next argument for jq expression starting with [ - next_idx=\$((i + 1)) - if [[ \$next_idx -lt \${#ARGS[@]} ]]; then - next_arg="\${ARGS[next_idx]}" - if [[ "\$next_arg" == "["* ]]; then - JQ_RETURNS_ARRAY=true - fi - fi - fi -done - -case "\$1" in - repo) - # check-pr-reviewer-status.sh needs repo owner/name with jq transformation - if [[ "\$*" == *"--json owner,name"* ]] || [[ "\$*" == *"--json owner"* && "\$*" == *"--json name"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.owner.login + "/" + .name' returns "owner/repo" - echo "testowner/testrepo" - else - echo '{"owner": {"login": "testowner"}, "name": "testrepo"}' - fi - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # jq '.parent.owner.login + "/" + .parent.name' returns empty for non-fork - echo "" - else - echo '{"parent": null}' - fi - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return codex comment - format depends on whether --jq is used and its pattern - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - if [[ "\$JQ_RETURNS_ARRAY" == "true" ]]; then - # check-pr-reviewer-status.sh uses '[.[] | {...}]' - returns array - echo "[{\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - else - # stop hook uses '.[] | {...}' then 'jq -s' - returns individual objects - echo "{\"id\":1001,\"author\":\"chatgpt-codex-connector[bot]\",\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}" - fi - else - # Raw GitHub API format for poll-pr-reviews.sh - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found issues\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - echo '[]' - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - # For check-pr-reviewer-status.sh: returns jq-processed format - # {sha: .headRefOid, date: (.commits | last | .committedDate)} - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # Return just the timestamp when --jq is used (stop hook commit fetch) - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - # Commit before the comment - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook with timeout and capture output for debugging - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - # Check if startup_case was updated in state file (or approve-state.md if all bots approved/timed out) - local new_case state_file - if [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" - else - state_file="" - fi - - if [[ -n "$state_file" ]]; then - new_case=$(grep "^startup_case:" "$state_file" 2>/dev/null | sed 's/startup_case: *//' | tr -d ' ' || true) - else - new_case="" - fi - - # Verify startup_case is present in the updated state file (confirms re-evaluation code path ran) - if [[ -n "$new_case" ]]; then - pass "T-STOPHOOK-11: Hook completes with startup_case in state" - else - fail "T-STOPHOOK-11: startup_case should be preserved in state" "startup_case present" "got: empty/missing" - fi - - # VERIFICATION: Assert startup_case changed from initial value (1) to expected value - # Mock setup: codex comment at 10:05:00Z, commit at 09:00:00Z (before comment) - # Expected: Case 3 (all reviewers commented, no new commits after) - if [[ -n "$new_case" && "$new_case" != "1" ]]; then - pass "T-STOPHOOK-11a: startup_case changed from 1 to $new_case" - elif [[ -n "$new_case" && "$new_case" == "1" ]]; then - # Debug: check if stop hook re-evaluated startup_case - if echo "$hook_output" | grep -qi "Startup case changed"; then - # Re-evaluation ran but case didn't change in state file - state write issue - fail "T-STOPHOOK-11a: startup_case changed in hook but not persisted" "!= 1" "case_change logged but state=1" - elif echo "$hook_output" | grep -qi "check-pr-reviewer-status\|NEW_REVIEWER_STATUS"; then - # Re-evaluation script was called - fail "T-STOPHOOK-11a: startup_case check ran but returned 1" "!= 1" "got: 1" - else - # Re-evaluation didn't run - likely exited early - local exit_reason - exit_reason=$(echo "$hook_output" | grep -i "exit\|block\|timeout" | head -3 || echo "unknown") - fail "T-STOPHOOK-11a: startup_case re-evaluation not reached" "!= 1" "got: 1, exit: $exit_reason" - fi - else - fail "T-STOPHOOK-11a: startup_case should be present and changed" "number != 1" "got: empty" - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Fork PR support - stop hook resolves base repo from parent -test_stophook_fork_pr_base_repo_resolution() { - local test_subdir="$TEST_DIR/stophook_fork_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Create state file - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << 'EOF' ---- -current_round: 0 -max_iterations: 42 -pr_number: 456 -start_branch: test-branch -configured_bots: - - codex -active_bots: -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 30 -poll_timeout: 900 -started_at: 2026-01-18T10:00:00Z -last_trigger_at: -trigger_comment_id: -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: 2026-01-18T10:00:00Z ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that simulates a fork scenario: - # - Current repo (fork) doesn't have PR 456 - # - Parent repo (upstream) has PR 456 - cat > "$mock_bin/gh" << 'MOCK_GH' -#!/usr/bin/env bash -# Track which repo we're querying -FORK_REPO="forkuser/forkrepo" -UPSTREAM_REPO="upstreamowner/upstreamrepo" - -case "$1" in - repo) - if [[ "$*" == *"--json owner,name"* ]]; then - # Current repo is the fork - echo "forkuser/forkrepo" - exit 0 - fi - if [[ "$*" == *"--json parent"* ]]; then - # Return parent (upstream) repo - echo "upstreamowner/upstreamrepo" - exit 0 - fi - ;; - pr) - # Check which --repo was specified - if [[ "$*" == *"--repo forkuser/forkrepo"* ]]; then - # Fork doesn't have PR 456 - return empty/error - exit 1 - fi - if [[ "$*" == *"--repo upstreamowner/upstreamrepo"* ]]; then - # Upstream has PR 456 - if [[ "$*" == *"number"* ]] && [[ "$*" != *"commits"* ]]; then - echo '{"number": 456}' - exit 0 - fi - if [[ "$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "$*" == *"commits"* ]] && [[ "$*" == *"--jq"* ]]; then - echo "2026-01-18T10:00:00Z" - exit 0 - fi - fi - # Default: try to handle without --repo (should fail for forks) - if [[ "$*" != *"--repo"* ]]; then - exit 1 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) echo "" ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - should resolve PR from parent repo - local hook_output - hook_output=$(echo '{}' | "$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" 2>&1) || true - - # Should not fail with "PR not found" because it should have found it in parent repo - # And since active_bots is empty, it should approve - if echo "$hook_output" | grep -qi "approved\|complete"; then - pass "T-STOPHOOK-12: Fork PR support - resolved PR from parent repo" - elif [[ -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-12: Fork PR support - created approve-state.md" - else - # Check if it at least didn't fail with "PR not found" - if ! echo "$hook_output" | grep -qi "pr.*not.*found\|no.*pull.*request"; then - pass "T-STOPHOOK-12: Fork PR support - did not fail on PR lookup" - else - fail "T-STOPHOOK-12: Fork PR should resolve from parent" "success" "got: $hook_output" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Test: Goal tracker - resolved count stays 0 when some bots have issues -test_stophook_goal_tracker_mixed_approval() { - local test_subdir="$TEST_DIR/stophook_goal_tracker_test" - mkdir -p "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps to ensure polling doesn't time out immediately - # Timeline: commit -> trigger -> bot comments (all recent, within poll_timeout) - local trigger_ts commit_ts claude_ts codex_ts - # Trigger was 10 seconds ago - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - # Commit was 60 seconds ago (before trigger) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - # Claude comment arrived 5 seconds ago (after trigger) - claude_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - # Codex comment arrived 4 seconds ago (after trigger) - codex_ts=$(date -u -d "-4 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-4S +%Y-%m-%dT%H:%M:%SZ) - - # State with two bots configured - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - claude - - codex -active_bots: - - claude - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 3 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - echo "# Resolution" > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create initial goal tracker - cat > "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << EOF -# PR Loop Goal Tracker - -## Stats -- Issues Found: 0 -- Issues Resolved: 0 - -## Log -| Round | Timestamp | Event | -|-------|-----------|-------| -| 0 | $commit_ts | Loop started | -EOF - - local mock_bin="$test_subdir/bin" - mkdir -p "$mock_bin" - - # Mock gh that returns: - # - claude: APPROVE (LGTM) - # - codex: ISSUES (has issues) - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -# Dynamic timestamps from test setup -CLAUDE_TS="$claude_ts" -CODEX_TS="$codex_ts" -COMMIT_TS="$commit_ts" - -HAS_JQ=false -for arg in "\$@"; do - if [[ "\$arg" == "--jq" || "\$arg" == "-q" ]]; then - HAS_JQ=true - break - fi -done - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner,name"* ]]; then - echo "testowner/testrepo" - exit 0 - fi - if [[ "\$*" == *"--json parent"* ]]; then - echo "" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - # Return comments from both bots - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - if [[ "\$HAS_JQ" == "true" ]]; then - # Claude approves, Codex has issues - echo "{\"id\": 1, \"author\": \"claude[bot]\", \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"}" - echo "{\"id\": 2, \"author\": \"chatgpt-codex-connector[bot]\", \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}" - else - echo "[{\"id\": 1, \"user\": {\"login\": \"claude[bot]\"}, \"created_at\": \"\$CLAUDE_TS\", \"body\": \"LGTM! No issues found.\"},{\"id\": 2, \"user\": {\"login\": \"chatgpt-codex-connector[bot]\"}, \"created_at\": \"\$CODEX_TS\", \"body\": \"Found 2 issues that need fixing.\"}]" - fi - exit 0 - fi - if [[ "\$2" == *"/reactions"* ]]; then - # Return eyes for claude (no need for this test but keep consistent) - echo "[]" - exit 0 - fi - echo "[]" - exit 0 - ;; - pr) - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '{"number": 123}' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - echo "\$COMMIT_TS" - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - else - echo "/tmp/git" - fi - ;; - status) - if [[ "$2" == "--porcelain" ]]; then - echo "" - elif [[ "$2" == "-sb" ]]; then - echo "## test-branch" - fi - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex that outputs mixed approval - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex output: claude approves, codex has issues -cat << 'CODEX_OUTPUT' -# PR Review Validation - -### Per-Bot Status -| Bot | Status | Summary | -|-----|--------|---------| -| claude | APPROVE | No issues found | -| codex | ISSUES | Found 2 issues that need fixing | - -### Issues Found (if any) -1. Issue from codex: Missing error handling -2. Issue from codex: Needs tests - -### Approved Bots (to remove from active_bots) -- claude - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -MOCK_CODEX - chmod +x "$mock_bin/codex" - - export CLAUDE_PROJECT_DIR="$test_subdir" - export PATH="$mock_bin:$PATH" - - # Run stop hook - local hook_output - hook_output=$(timeout 30 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT") || true - - # Verify that ISSUES_RESOLVED_COUNT is 0, not inflated to ISSUES_FOUND_COUNT - # The goal tracker should show issues found > 0 but resolved = 0 - # (because codex still has issues, even though claude approved) - - # Check the feedback file or check file for the correct issue counts - local check_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/round-1-pr-check.md" - if [[ -f "$check_file" ]]; then - # Check that issues were found - if grep -q "Issues Found\|ISSUES" "$check_file" 2>/dev/null; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues" - else - fail "T-STOPHOOK-13: Check file should contain issues" "issues listed" "not found" - fi - else - # Check file may not exist if polling didn't complete - # Check output instead - if echo "$hook_output" | grep -qi "issues.*remaining\|ISSUES_REMAINING"; then - pass "T-STOPHOOK-13: Goal tracker correctly identifies issues (via output)" - else - fail "T-STOPHOOK-13: Should detect issues remaining" "issues_remaining" "got: $hook_output" - fi - fi - - # VERIFICATION: The key fix - resolved count should NOT be inflated - # Since we can't directly check ISSUES_RESOLVED_COUNT variable, verify the behavior: - # - claude approved (removed from active_bots) - # - codex has issues (stays in active_bots) - # - loop should continue (not complete) because codex still has issues - - if [[ ! -f "$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/approve-state.md" ]]; then - pass "T-STOPHOOK-13a: Loop continues with mixed approval (not prematurely completed)" - else - fail "T-STOPHOOK-13a: Loop should not complete with mixed approval" "no approve-state.md" "approve-state.md exists" - fi - - # Check that claude was removed from active_bots but codex remains - local state_file="$test_subdir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" - if [[ -f "$state_file" ]]; then - local active_bots_content - active_bots_content=$(sed -n '/^active_bots:/,/^[a-z_]*:/p' "$state_file" | grep -E '^\s*-' || true) - - if echo "$active_bots_content" | grep -q "codex"; then - pass "T-STOPHOOK-13b: Codex remains in active_bots (has issues)" - else - fail "T-STOPHOOK-13b: Codex should remain in active_bots" "codex in list" "got: $active_bots_content" - fi - - if ! echo "$active_bots_content" | grep -q "claude"; then - pass "T-STOPHOOK-13c: Claude removed from active_bots (approved)" - else - fail "T-STOPHOOK-13c: Claude should be removed from active_bots" "no claude" "got: $active_bots_content" - fi - fi - - unset CLAUDE_PROJECT_DIR -} - -# Run stop-hook integration tests -test_stophook_force_push_rejects_old_trigger -test_stophook_case1_no_trigger_required -test_stophook_approve_creates_state -test_stophook_step6_unpushed_commits -test_stophook_step65_force_push_detection -test_stophook_step7_missing_trigger -test_stophook_bot_timeout_auto_remove -test_stophook_codex_thumbsup_approval -test_stophook_claude_eyes_timeout -test_stophook_dynamic_startup_case_update -test_stophook_fork_pr_base_repo_resolution -test_stophook_goal_tracker_mixed_approval - -} diff --git a/tests/test-pr-loop-system.sh b/tests/test-pr-loop-system.sh deleted file mode 100755 index 05cf3b87..00000000 --- a/tests/test-pr-loop-system.sh +++ /dev/null @@ -1,1904 +0,0 @@ -#!/usr/bin/env bash -# -# Test runner for PR loop system -# -# Runs all tests in the tests/ directory using the mock gh CLI -# -# Usage: -# ./tests/run-tests.sh [test-name] -# -# Environment: -# TEST_VERBOSE=1 - Show verbose output - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Test configuration -TESTS_DIR="$SCRIPT_DIR" -MOCKS_DIR="$TESTS_DIR/mocks" -FIXTURES_DIR="$TESTS_DIR/fixtures" -TEST_VERBOSE="${TEST_VERBOSE:-0}" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -NC='\033[0m' # No Color - -# Counters -TESTS_RUN=0 -TESTS_PASSED=0 -TESTS_FAILED=0 - -# Test helper functions -log_test() { - echo -e "${YELLOW}[TEST]${NC} $1" -} - -log_pass() { - echo -e "${GREEN}[PASS]${NC} $1" - TESTS_PASSED=$((TESTS_PASSED + 1)) -} - -log_fail() { - echo -e "${RED}[FAIL]${NC} $1" - TESTS_FAILED=$((TESTS_FAILED + 1)) -} - -# Setup test environment -setup_test_env() { - # Add mocks to PATH - export PATH="$MOCKS_DIR:$PATH" - export MOCK_GH_FIXTURES_DIR="$FIXTURES_DIR" - - # Create temp directory for tests - export TEST_TEMP_DIR=$(mktemp -d) - export CLAUDE_PROJECT_DIR="$TEST_TEMP_DIR" - - # Initialize git repo for tests - ( - cd "$TEST_TEMP_DIR" - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - git config commit.gpgsign false - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - ) >/dev/null 2>&1 -} - -# Cleanup test environment -cleanup_test_env() { - if [[ -n "${TEST_TEMP_DIR:-}" && -d "$TEST_TEMP_DIR" ]]; then - rm -rf "$TEST_TEMP_DIR" - fi -} - -# Run a test function -run_test() { - local test_name="$1" - local test_func="$2" - - TESTS_RUN=$((TESTS_RUN + 1)) - log_test "$test_name" - - setup_test_env - - # Run test in subshell to isolate failures - local result=0 - ( - cd "$TEST_TEMP_DIR" - $test_func - ) && result=0 || result=$? - - if [[ $result -eq 0 ]]; then - log_pass "$test_name" - else - log_fail "$test_name (exit code: $result)" - fi - - cleanup_test_env -} - -# ======================================== -# Test: Mutual Exclusion -# ======================================== - -test_mutual_exclusion_rlcr_blocks_pr() { - # Create an active RLCR loop - mkdir -p .humanize/rlcr/2026-01-18_12-00-00 - echo "--- -current_round: 1 -max_iterations: 10 ----" > .humanize/rlcr/2026-01-18_12-00-00/state.md - - # Try to start a PR loop - should fail - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - - local result - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && return 1 || true - - # Should contain error about RLCR loop active - echo "$result" | grep -q "RLCR loop is already active" || return 1 -} - -test_mutual_exclusion_pr_blocks_rlcr() { - # Create an active PR loop - mkdir -p .humanize/pr-loop/2026-01-18_12-00-00 - echo "--- -current_round: 0 -max_iterations: 42 -pr_number: 123 ----" > .humanize/pr-loop/2026-01-18_12-00-00/state.md - - # Try to start an RLCR loop - should fail - echo "# Test Plan" > test-plan.md - - local result - result=$("$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" test-plan.md 2>&1) && return 1 || true - - # Should contain error about PR loop active - echo "$result" | grep -q "PR loop is already active" || return 1 -} - -# ======================================== -# Test: Check PR Reviewer Status -# ======================================== - -test_reviewer_status_case1_no_comments() { - # Fixture with no bot comments - must clear ALL comment sources - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 1 - local test_passed=true - echo "$result" | jq -e '.case == 1' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -test_reviewer_status_case2_partial_comments() { - # Only claude has commented - must clear codex comments too - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo "[]" > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 2 (partial) - local test_passed=true - echo "$result" | jq -e '.case == 2' || test_passed=false - echo "$result" | jq -e '.reviewers_missing | contains(["codex"])' || test_passed=false - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - $test_passed -} - -# ======================================== -# Test: Codex +1 Detection -# ======================================== - -test_codex_thumbsup_detected() { - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123) - - # Should find the +1 reaction - echo "$result" | jq -e '.content == "+1"' || return 1 -} - -test_codex_thumbsup_with_after_filter() { - # Test --after filter - reaction is at 11:10:00Z, we filter for after 12:00:00Z - # So no reaction should be found - local result - if "$PROJECT_ROOT/scripts/check-bot-reactions.sh" codex-thumbsup 123 --after "2026-01-18T12:00:00Z" 2>/dev/null; then - # Should NOT succeed - reaction is before the filter time - return 1 - fi - # Correctly failed - reaction is before filter time - return 0 -} - -# ======================================== -# Test: Claude Eyes Detection -# ======================================== - -test_claude_eyes_detected() { - # Use delay 0 and retry 1 for fast test - local result - result=$("$PROJECT_ROOT/scripts/check-bot-reactions.sh" claude-eyes 12345 --retry 1 --delay 0) - - # Should find the eyes reaction - echo "$result" | jq -e '.content == "eyes"' || return 1 -} - -# ======================================== -# Test: PR Reviews Detection (PR submissions) -# ======================================== - -test_reviewer_status_includes_pr_reviews() { - # Set up fixture where codex has APPROVED via PR review (not comment) - echo "[]" > "$FIXTURES_DIR/issue-comments.json" - echo "[]" > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "codex") - - # Codex should be in reviewers_commented because of PR review - local test_passed=true - echo "$result" | jq -e '.reviewers_commented | contains(["codex"])' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: Phase Detection -# ======================================== - -test_phase_detection_approved() { - # Source monitor-common.sh (located in scripts/lib/) - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/approve-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || return 1 -} - -test_phase_detection_waiting_initial() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 0 and startup_case 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_initial_review" ]] || return 1 -} - -test_phase_detection_waiting_reviewer() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with state.md at round 1 - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "waiting_reviewer" ]] || return 1 -} - -# ======================================== -# Test: Goal Tracker Parsing -# ======================================== - -test_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# Goal Tracker - -### Ultimate Goal -Get all bots to approve the PR. - -### Acceptance Criteria - -| AC | Description | -|----|-------------| -| AC-1 | Bot claude approves | -| AC-2 | Bot codex approves | - -### Completed and Verified - -| AC | Description | -|----|-------------| -| AC-1 | Completed | - -#### Active Tasks - -| Task | Description | Status | -|------|-------------|--------| -| Fix bug | Fix the bug | pending | -| Add test | Add a test | completed | - -### Explicitly Deferred - -| Task | Description | -|------|-------------| - -### Open Issues - -| Issue | Description | -|-------|-------------| - -EOF - - local result - result=$(parse_goal_tracker "$tracker_file") - - # Should return: total_acs|completed_acs|active_tasks|completed_tasks|deferred_tasks|open_issues|goal_summary - # Expected: 2|1|1|0|0|0|Get all bots to approve the PR. - - local total_acs completed_acs active_tasks - IFS='|' read -r total_acs completed_acs active_tasks _ _ _ _ <<< "$result" - - [[ "$total_acs" == "2" ]] || { echo "Expected total_acs=2, got $total_acs"; return 1; } - [[ "$completed_acs" == "1" ]] || { echo "Expected completed_acs=1, got $completed_acs"; return 1; } - [[ "$active_tasks" == "1" ]] || { echo "Expected active_tasks=1, got $active_tasks"; return 1; } -} - -# ======================================== -# Test: PR Goal Tracker Parsing -# ======================================== - -test_pr_goal_tracker_parsing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake PR goal tracker file - local tracker_file="$TEST_TEMP_DIR/pr-goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 5 -- Total Issues Resolved: 3 -- Remaining: 2 - -## Issue Summary - -| ID | Reviewer | Round | Status | Description | -|----|----------|-------|--------|-------------| -| 1 | Claude | 0 | resolved | Issue one | -| 2 | Claude | 0 | resolved | Issue two | -| 3 | Codex | 1 | open | Issue three | -| 4 | Codex | 1 | resolved | Issue four | -| 5 | Claude | 2 | open | Issue five | - -EOF - - local result - result=$(humanize_parse_pr_goal_tracker "$tracker_file") - - # Should return: total_issues|resolved_issues|remaining_issues|last_reviewer - # Expected: 5|3|2|Claude - - local total_issues resolved_issues remaining_issues last_reviewer - IFS='|' read -r total_issues resolved_issues remaining_issues last_reviewer <<< "$result" - - [[ "$total_issues" == "5" ]] || { echo "Expected total_issues=5, got $total_issues"; return 1; } - [[ "$resolved_issues" == "3" ]] || { echo "Expected resolved_issues=3, got $resolved_issues"; return 1; } - [[ "$remaining_issues" == "2" ]] || { echo "Expected remaining_issues=2, got $remaining_issues"; return 1; } - [[ "$last_reviewer" == "Claude" ]] || { echo "Expected last_reviewer=Claude, got $last_reviewer"; return 1; } -} - -# ======================================== -# Test: State File Detection -# ======================================== - -test_state_file_detection_active() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create active state - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "current_round: 0" > "$session_dir/state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return state.md with active status - echo "$result" | grep -q "state.md|active" || { echo "Expected active state, got $result"; return 1; } -} - -test_state_file_detection_approve() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create approve state (no state.md, only approve-state.md) - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - local result - result=$(monitor_find_state_file "$session_dir") - - # Should return approve-state.md with approve status - echo "$result" | grep -q "approve-state.md|approve" || { echo "Expected approve state, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Cancelled -# ======================================== - -test_phase_detection_cancelled() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with cancel-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/cancel-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "cancelled" ]] || { echo "Expected cancelled, got $phase"; return 1; } -} - -test_phase_detection_maxiter() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a fake session dir with maxiter-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - touch "$session_dir/maxiter-state.md" - - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "maxiter" ]] || { echo "Expected maxiter, got $phase"; return 1; } -} - -# ======================================== -# Test: Startup Case Detection -# ======================================== - -test_reviewer_status_case3_all_commented() { - # All bots have commented - should be case 3 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex") - - # Should return case 3 (all bots commented) - local test_passed=true - echo "$result" | jq -e '.case == 3' || test_passed=false - - $test_passed -} - -# ======================================== -# Test: update_pr_goal_tracker helper -# ======================================== - -test_update_pr_goal_tracker() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Goal Tracker - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 1 -- Remaining: 1 - -## Issue Summary -EOF - - # Update with new bot results (JSON format: issues=new found, resolved=new resolved) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 2, "bot": "Codex"}' - - # Verify update - should add 3 found, 2 resolved (new totals: 5 found, 3 resolved, 2 remaining) - grep -q "Total Issues Found: 5" "$tracker_file" || { echo "Expected 5 total found"; return 1; } - grep -q "Total Issues Resolved: 3" "$tracker_file" || { echo "Expected 3 total resolved"; return 1; } - grep -q "Remaining: 2" "$tracker_file" || { echo "Expected 2 remaining"; return 1; } -} - -# ======================================== -# Test: Unpushed Commits Detection -# ======================================== - -test_unpushed_commits_detected() { - # Create a git repo with unpushed commits - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Initialize git repo and create a commit - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - echo "# Test" > README.md - git add README.md - git commit -q -m "Initial commit" - - # Create a fake remote tracking branch (simulates having unpushed commits) - # This creates a local branch that pretends to track origin/main - git branch --set-upstream-to=HEAD 2>/dev/null || true - - # Add another commit (this will be "unpushed") - echo "new content" >> README.md - git add README.md - git commit -q -m "New commit" - - # Check git status for unpushed detection pattern - local ahead_count=$(git status -sb 2>/dev/null | grep -oE '\[ahead [0-9]+\]' | grep -oE '[0-9]+' || echo "0") - - # Test passes if we can detect we have local commits - # Note: In this test setup, we can't truly simulate upstream, so we verify the pattern matching works - [[ -n "$(git log --oneline -1)" ]] || return 1 -} - -# ======================================== -# Test: Force Push Detection Logic -# ======================================== - -test_force_push_ancestry_check() { - # Test git merge-base --is-ancestor behavior - local test_dir="$TEST_TEMP_DIR" - cd "$test_dir" - - # Create a git repo with two branches - git init -q - git config user.email "test@example.com" - git config user.name "Test User" - - # Create initial commit - echo "v1" > file.txt - git add file.txt - git commit -q -m "Initial" - local INITIAL_SHA=$(git rev-parse HEAD) - - # Create second commit - echo "v2" >> file.txt - git add file.txt - git commit -q -m "Second" - local SECOND_SHA=$(git rev-parse HEAD) - - # Test: INITIAL_SHA should be ancestor of SECOND_SHA - git merge-base --is-ancestor "$INITIAL_SHA" "$SECOND_SHA" || { echo "Expected $INITIAL_SHA to be ancestor of $SECOND_SHA"; return 1; } - - # Test: SECOND_SHA should NOT be ancestor of INITIAL_SHA - if git merge-base --is-ancestor "$SECOND_SHA" "$INITIAL_SHA" 2>/dev/null; then - echo "Expected $SECOND_SHA to NOT be ancestor of $INITIAL_SHA" - return 1 - fi - - return 0 -} - -# ======================================== -# Test: Approve State Creation -# ======================================== - -test_approve_state_detection() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session dir with approve-state.md - local session_dir="$TEST_TEMP_DIR/.humanize/pr-loop/2026-01-18_12-00-00" - mkdir -p "$session_dir" - echo "approved" > "$session_dir/approve-state.md" - - # Phase should be "approved" - local phase - phase=$(get_pr_loop_phase "$session_dir") - - [[ "$phase" == "approved" ]] || { echo "Expected phase=approved, got $phase"; return 1; } - - # State file detection should also work - local state_info - state_info=$(monitor_find_state_file "$session_dir") - - echo "$state_info" | grep -q "approve" || { echo "Expected approve in state_info, got $state_info"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Schema -# ======================================== - -test_goal_tracker_schema() { - # Read the goal tracker init template - local template_file="$PROJECT_ROOT/prompt-template/pr-loop/goal-tracker-initial.md" - - # Verify required sections exist per plan - grep -q "## Issue Summary" "$template_file" || { echo "Missing Issue Summary section"; return 1; } - grep -q "## Total Statistics" "$template_file" || { echo "Missing Total Statistics section"; return 1; } - grep -q "## Issue Log" "$template_file" || { echo "Missing Issue Log section"; return 1; } - - # Verify Total Statistics has required fields - grep -q "Total Issues Found:" "$template_file" || { echo "Missing Total Issues Found field"; return 1; } - grep -q "Total Issues Resolved:" "$template_file" || { echo "Missing Total Issues Resolved field"; return 1; } - grep -q "Remaining:" "$template_file" || { echo "Missing Remaining field"; return 1; } -} - -# ======================================== -# Test: Dynamic Startup Case -# ======================================== - -test_startup_case_4_5_detection() { - # Test that check-pr-reviewer-status.sh detects case 4/5 (commits after reviews) - # Set up fixtures: both bots commented, but there's a newer commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:15:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Note: The mock would need to simulate a newer commit timestamp - # For this test, we verify the script returns valid JSON - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return valid JSON with case field - echo "$result" | jq -e '.case' >/dev/null || { echo "Invalid JSON or missing case field"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update with Issue Summary Row -# ======================================== - -test_goal_tracker_update_adds_row() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with new bot results - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify Issue Log has Round 1 entry - grep -q "### Round 1" "$tracker_file" || { echo "Missing Round 1 in Issue Log"; return 1; } - - # Verify totals updated - grep -q "Total Issues Found: 2" "$tracker_file" || { echo "Expected 2 total found"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Update Idempotency -# ======================================== - -test_goal_tracker_update_idempotent() { - # Source loop-common.sh - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Create a goal tracker file with proper schema - local tracker_file="$TEST_TEMP_DIR/goal-tracker.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # First update - should succeed - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 3, "resolved": 0, "bot": "Codex"}' - - # Verify first update worked - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "First update failed - expected 3 total found"; return 1; } - - # Second update with SAME round AND SAME bot - should be SKIPPED (idempotent) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 5, "resolved": 0, "bot": "Codex"}' - - # Totals should still be 3 (not 8) because round 1 was already recorded - grep -q "Total Issues Found: 3" "$tracker_file" || { echo "Idempotency failed - totals changed on duplicate update"; return 1; } - - # Count Issue Summary rows - should only have 2 (Round 0 + Round 1) - local row_count=$(grep -cE '^\|[[:space:]]*[0-9]+[[:space:]]*\|' "$tracker_file") - [[ "$row_count" -eq 2 ]] || { echo "Idempotency failed - expected 2 rows, got $row_count"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Find Latest Session -# ======================================== - -test_shared_monitor_find_latest_session() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create session directories with different timestamps - local loop_dir="$TEST_TEMP_DIR/.humanize/pr-loop" - mkdir -p "$loop_dir/2026-01-18_10-00-00" - mkdir -p "$loop_dir/2026-01-18_12-00-00" - mkdir -p "$loop_dir/2026-01-18_11-00-00" - - # Test that the latest session is found - local result - result=$(monitor_find_latest_session "$loop_dir") - - [[ "$(basename "$result")" == "2026-01-18_12-00-00" ]] || { - echo "Expected 2026-01-18_12-00-00, got $(basename "$result")" - return 1 - } -} - -# ======================================== -# Test: Shared Monitor - Find State File -# ======================================== - -test_shared_monitor_find_state_file() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Test 1: active state - touch "$session_dir/state.md" - local result - result=$(monitor_find_state_file "$session_dir") - local status="${result#*|}" - [[ "$status" == "active" ]] || { echo "Expected active, got $status"; return 1; } - - # Test 2: approve state (remove state.md, add approve-state.md) - rm "$session_dir/state.md" - touch "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "approve" ]] || { echo "Expected approve, got $status"; return 1; } - - # Test 3: no state file - rm "$session_dir/approve-state.md" - result=$(monitor_find_state_file "$session_dir") - status="${result#*|}" - [[ "$status" == "unknown" ]] || { echo "Expected unknown, got $status"; return 1; } -} - -# ======================================== -# Test: Shared Monitor - Get File Size -# ======================================== - -test_shared_monitor_get_file_size() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - # Create a test file with known content - local test_file="$TEST_TEMP_DIR/test-file.txt" - echo "Hello World" > "$test_file" - - local result - result=$(monitor_get_file_size "$test_file") - - # File should have content (size > 0) - [[ "$result" -gt 0 ]] || { echo "Expected size > 0, got $result"; return 1; } - - # Test non-existent file returns 0 - result=$(monitor_get_file_size "$TEST_TEMP_DIR/nonexistent.txt") - [[ "$result" -eq 0 ]] || { echo "Expected 0 for nonexistent file, got $result"; return 1; } -} - -# ======================================== -# Test: Phase Detection - Codex Analyzing (File Growth) -# ======================================== - -test_phase_detection_codex_analyzing() { - # Source monitor-common.sh - source "$PROJECT_ROOT/scripts/lib/monitor-common.sh" - - local session_dir="$TEST_TEMP_DIR/session" - mkdir -p "$session_dir" - - # Create state.md for active session - cat > "$session_dir/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 ---- -EOF - - # Create a pr-check file with recent mtime (simulates Codex writing) - local check_file="$session_dir/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - # Test phase detection shows codex_analyzing - local result - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "codex_analyzing" ]] || { - echo "Expected codex_analyzing, got $result" - return 1 - } - - # For the second test: make the file old and ensure cache shows no growth - # Touch with past timestamp - touch -d "2026-01-18 10:00:00" "$check_file" - - # Get the current file size and write it to cache twice - # (so second call sees no growth) - local size - size=$(stat -c%s "$check_file" 2>/dev/null || stat -f%z "$check_file" 2>/dev/null || echo 0) - local session_name=$(basename "$session_dir") - local cache_file="/tmp/humanize-phase-${session_name}-1.size" - echo "$size" > "$cache_file" - - # Now call again - same size, old mtime -> should be waiting_reviewer - result=$(get_pr_loop_phase "$session_dir") - [[ "$result" == "waiting_reviewer" ]] || { - echo "Expected waiting_reviewer after old mtime and no growth, got $result" - return 1 - } - - # Cleanup - rm -f "$cache_file" 2>/dev/null || true -} - -# ======================================== -# Test: Monitor Phase Display Output Assertions -# ======================================== - -# Helper: Run monitor with --once and capture output -run_monitor_once_capture_output() { - local session_dir="$1" - local project_dir="$2" - - # Create wrapper script that runs monitor and captures output - local wrapper="$project_dir/run_monitor_test.sh" - cat > "$wrapper" << 'WRAPPER_EOF' -#!/usr/bin/env bash -PROJECT_DIR="$1" -PROJECT_ROOT="$2" - -cd "$PROJECT_DIR" - -# Stub terminal commands for non-interactive mode -tput() { - case "$1" in - cols) echo "80" ;; - lines) echo "24" ;; - *) : ;; - esac -} -export -f tput -clear() { :; } -export -f clear - -# Disable ANSI colors for easier parsing -export NO_COLOR=1 - -# Source humanize.sh -source "$PROJECT_ROOT/scripts/humanize.sh" - -# Run monitor with --once flag -humanize monitor pr --once 2>&1 -WRAPPER_EOF - chmod +x "$wrapper" - - # Run and capture output - timeout 10 bash "$wrapper" "$project_dir" "$PROJECT_ROOT" 2>&1 || true -} - -# Test: Monitor displays "All reviews approved" for approved state -test_monitor_output_phase_approved() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_approved" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create approve-state.md (final approved state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/approve-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: ---- -EOF - - # Create goal-tracker.md (required by monitor) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains approved phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*approved\|Phase:.*All reviews"; then - return 0 - else - echo "Expected 'All reviews approved' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Waiting for initial PR review" for waiting_initial_review state -test_monitor_output_phase_waiting_initial() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_waiting" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md with startup_case=1, round=0 (waiting for initial review) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 0 -startup_case: 1 -pr_number: 123 -configured_bots: - - codex - - claude -active_bots: - - codex - - claude ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains waiting phase (require Phase: label) - # For startup_case=1 (no comments yet), the loop is waiting for initial review - if echo "$output" | grep -qi "Phase:.*waiting"; then - return 0 - else - echo "Expected 'Phase:...waiting' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Loop cancelled" for cancelled state -test_monitor_output_phase_cancelled() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_cancelled" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create cancel-state.md (cancelled state) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/cancel-state.md" << 'EOF' ---- -current_round: 1 -startup_case: 3 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex -cancelled_at: 2026-01-18T12:00:00Z ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains cancel phase (require Phase: label) - if echo "$output" | grep -qi "Phase:.*cancel"; then - return 0 - else - echo "Expected 'Phase:...cancel' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# Test: Monitor displays "Codex analyzing..." for codex_analyzing phase -test_monitor_output_phase_codex_analyzing() { - local test_dir="$TEST_TEMP_DIR/monitor_phase_analyzing" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" - - # Create state.md for active session - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/state.md" << 'EOF' ---- -current_round: 1 -startup_case: 2 -pr_number: 123 -configured_bots: - - codex -active_bots: - - codex ---- -EOF - - cat > "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/goal-tracker.md" << 'GOAL' -# Goal Tracker -## Issue Summary -| Round | Reviewer | Issues Found | Status | -|-------|----------|--------------|--------| -| 0 | - | 0 | Initial | -GOAL - - # Create a pr-check file with current mtime (simulates Codex actively writing) - local check_file="$test_dir/.humanize/pr-loop/2026-01-18_10-00-00/round-1-pr-check.md" - echo "Analyzing PR..." > "$check_file" - # Touch with current time ensures mtime is within 10 seconds - touch "$check_file" - - local output - output=$(run_monitor_once_capture_output "$test_dir/.humanize/pr-loop/2026-01-18_10-00-00" "$test_dir") - - # Assert output contains "Codex analyzing" phase (require Phase: prefix) - if echo "$output" | grep -qi "Phase:.*Codex.*analyz"; then - return 0 - else - echo "Expected 'Phase:...Codex analyzing' in output, got: $(echo "$output" | head -20)" - return 1 - fi -} - -# ======================================== -# Test: Case 1 Exception - No Trigger Required -# ======================================== - -test_case1_exception_no_trigger() { - # For startup_case 1/2/3 in round 0, no trigger is required - # This tests the logic that determines REQUIRE_TRIGGER - - # Test startup_case 1, round 0 -> REQUIRE_TRIGGER=false - local round=0 - local startup_case=1 - local require_trigger=false - - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 1 should not require trigger"; return 1; } - - # Test startup_case 2, round 0 -> REQUIRE_TRIGGER=false - startup_case=2 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "false" ]] || { echo "Case 2 should not require trigger"; return 1; } - - # Test startup_case 4, round 0 -> REQUIRE_TRIGGER=true - startup_case=4 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Case 4 should require trigger"; return 1; } - - # Test round 1 (any case) -> REQUIRE_TRIGGER=true - round=1 - startup_case=1 - require_trigger=false - if [[ "$round" -gt 0 ]]; then - require_trigger=true - elif [[ "$round" -eq 0 ]]; then - case "$startup_case" in - 1|2|3) require_trigger=false ;; - 4|5) require_trigger=true ;; - esac - fi - - [[ "$require_trigger" == "true" ]] || { echo "Round 1 should require trigger"; return 1; } -} - -# ======================================== -# Test: Goal Tracker Row Inside Table -# ======================================== - -test_goal_tracker_row_inside_table() { - # Verify that update_pr_goal_tracker inserts rows INSIDE the Issue Summary table - # Not before "## Total Statistics" - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - local tracker_file="$TEST_TEMP_DIR/goal-tracker-table.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update with round 1 - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Verify: The new row should be BEFORE the blank line that ends the table - # Check that there's a table row with Round 1 BEFORE "## Total Statistics" - - # Extract just the Issue Summary section - local summary_section - summary_section=$(sed -n '/^## Issue Summary/,/^## Total Statistics/p' "$tracker_file") - - # The section should contain | 1 | somewhere (Round 1 row) - echo "$summary_section" | grep -qE '^\|[[:space:]]*1[[:space:]]*\|' || { - echo "Round 1 row not found in Issue Summary table" - echo "Content:" - cat "$tracker_file" - return 1 - } - - # Verify the row appears BEFORE "## Total Statistics" (already ensured by sed range) - # and the table structure is valid (rows end before blank line before ## Total Statistics) - - # Count table rows in Issue Summary (should be 3: header, separator, round 0, round 1) - local row_count - row_count=$(echo "$summary_section" | grep -cE '^\|' || echo 0) - [[ "$row_count" -ge 4 ]] || { - echo "Expected at least 4 table rows (header + separator + 2 data rows), got $row_count" - return 1 - } -} - -# ======================================== -# Test: Goal Tracker Partial Update Repair -# ======================================== - -test_goal_tracker_partial_update_repair() { - # Verify that update_pr_goal_tracker repairs partial updates - # (when only summary OR log exists, not both) - - source "$PROJECT_ROOT/hooks/lib/loop-common.sh" - - # Test 1: Tracker with summary row but NO log entry - local tracker_file="$TEST_TEMP_DIR/goal-tracker-partial1.md" - cat > "$tracker_file" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | -| 1 | Codex | 2 | 0 | Issues Found | - -## Total Statistics - -- Total Issues Found: 2 -- Total Issues Resolved: 0 -- Remaining: 2 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* -EOF - - # Update - should add log entry but not summary row (since summary exists) - update_pr_goal_tracker "$tracker_file" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in Issue Log - grep -q "### Round 1" "$tracker_file" || { echo "Log entry for Round 1 not added"; return 1; } - - # Test 2: Tracker with log entry but NO summary row - local tracker_file2="$TEST_TEMP_DIR/goal-tracker-partial2.md" - cat > "$tracker_file2" << 'EOF' -# PR Review Goal Tracker - -## Issue Summary - -| Round | Reviewer | Issues Found | Issues Resolved | Status | -|-------|----------|--------------|-----------------|--------| -| 0 | - | 0 | 0 | Initial | - -## Total Statistics - -- Total Issues Found: 0 -- Total Issues Resolved: 0 -- Remaining: 0 - -## Issue Log - -### Round 0 -*Awaiting initial reviews* - -### Round 1 -Codex: Found 2 issues, Resolved 0 -EOF - - # Update - should add summary row but not log entry (since log exists) - update_pr_goal_tracker "$tracker_file2" 1 '{"issues": 2, "resolved": 0, "bot": "Codex"}' - - # Should now have Round 1 in summary table - grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$tracker_file2" || { echo "Summary row for Round 1 not added"; return 1; } -} - -# ======================================== -# Test: Case 4 Emission (all commented + new commits) -# ======================================== - -test_case4_all_commented_new_commits() { - # Verify Case 4 is emitted when ALL reviewers commented and new commits after - - # Fixture: All bots commented at 10:00, latest commit at 11:00 - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T10:05:00Z","body":"LGTM","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - # Mock commit at 11:00 (after reviews) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 4 (all commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "4" ]] || { echo "Expected Case 4, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT -} - -# ======================================== -# Test: Case 5 Emission (partial + new commits) -# ======================================== - -test_case5_partial_commented_new_commits() { - # Verify Case 5 is emitted when SOME reviewers commented and new commits after - - # Fixture: Only claude commented at 10:00, codex missing - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T10:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" # No codex - - # Mock commit at 11:00 (after claude's review) - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T11:00:00Z" - - local result - result=$("$PROJECT_ROOT/scripts/check-pr-reviewer-status.sh" 123 --bots "claude,codex" 2>/dev/null) || true - - # Should return Case 5 (partial commented, new commits) - local case_num - case_num=$(echo "$result" | jq -r '.case') - [[ "$case_num" == "5" ]] || { echo "Expected Case 5, got $case_num"; return 1; } - - # has_commits_after_reviews should be true - local has_commits - has_commits=$(echo "$result" | jq -r '.has_commits_after_reviews') - [[ "$has_commits" == "true" ]] || { echo "Expected has_commits_after_reviews=true, got $has_commits"; return 1; } - - # Cleanup mock - unset MOCK_GH_LATEST_COMMIT_AT - - # Restore original fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM! Code looks good.","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" -} - -# ======================================== -# Test: Setup Case 4/5 Failure Path (missing trigger_comment_id) -# ======================================== - -test_setup_case45_missing_trigger_comment_id() { - # Test that setup-pr-loop.sh fails when trigger_comment_id cannot be retrieved - # for Case 4/5 with --claude option - # This tests the fix that requires eyes verification - - # Set up fixtures for Case 4: All bots commented, new commits after reviews - # Only claude for simplicity - fixture needs bot comment BEFORE latest commit - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T08:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - - # Set latest commit AFTER bot comments to trigger Case 4 - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T12:00:00Z" - export MOCK_GH_PR_NUMBER=123 - export MOCK_GH_PR_STATE="OPEN" - # Make the regular mock return null for the comment lookup that gets the trigger ID - export MOCK_GH_COMMENT_ID_LOOKUP_FAIL=true - - # Run setup-pr-loop.sh with --claude - should fail due to missing trigger_comment_id - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --claude 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_LATEST_COMMIT_AT MOCK_GH_COMMENT_ID_LOOKUP_FAIL - - # Verify it failed - if [[ $exit_code -eq 0 ]]; then - echo "Expected setup to fail but it succeeded" - echo "Output (last 30 lines): $(echo "$result" | tail -30)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify error message about missing trigger comment ID - if ! echo "$result" | grep -q "Could not find trigger comment ID"; then - echo "Expected error message about missing trigger_comment_id" - echo "Got: $result" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Verify loop directory was cleaned up - if ls .humanize/pr-loop/*/state.md 2>/dev/null | head -1 | grep -q .; then - echo "Loop directory was not cleaned up on failure" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - return 1 - fi - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - - return 0 -} - -# ======================================== -# Test: Goal Tracker Creation/Update Integration Test -# ======================================== - -test_goal_tracker_creation_integration() { - # Test that setup-pr-loop.sh creates goal-tracker.md - # This verifies: goal tracker is created at setup - - # Set up fixtures for Case 1: No comments yet (simplest setup) - echo '[]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[]' > "$FIXTURES_DIR/reactions.json" - - export MOCK_GH_PR_NUMBER=999 - export MOCK_GH_PR_STATE="OPEN" - export MOCK_GH_LATEST_COMMIT_AT="2026-01-18T10:00:00Z" - export MOCK_GH_HEAD_SHA="abc123xyz" - - # Clean up any existing pr-loop directories - rm -rf .humanize/pr-loop 2>/dev/null || true - - # Run setup-pr-loop.sh with --codex - local result exit_code - result=$("$PROJECT_ROOT/scripts/setup-pr-loop.sh" --codex 2>&1) && exit_code=0 || exit_code=$? - - # Clean up mock env vars - unset MOCK_GH_PR_NUMBER MOCK_GH_PR_STATE MOCK_GH_LATEST_COMMIT_AT MOCK_GH_HEAD_SHA - - # Find the created loop directory - local loop_dir - loop_dir=$(ls -d .humanize/pr-loop/*/ 2>/dev/null | head -1) - - if [[ -z "$loop_dir" ]]; then - echo "No loop directory created by setup-pr-loop.sh" - echo "Output: $(echo "$result" | tail -20)" - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md was created - if [[ ! -f "${loop_dir}goal-tracker.md" ]]; then - echo "goal-tracker.md not found in $loop_dir" - echo "Files in loop dir: $(ls -la "$loop_dir" 2>/dev/null)" - # Clean up - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has expected structure (Issue Summary table) - if ! grep -q "Issue Summary" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing 'Issue Summary' section" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Verify goal-tracker.md has PR number from mock - if ! grep -q "999" "${loop_dir}goal-tracker.md"; then - echo "goal-tracker.md missing PR number 999" - echo "Contents: $(cat "${loop_dir}goal-tracker.md")" - rm -rf .humanize/pr-loop - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - return 1 - fi - - # Clean up - rm -rf .humanize/pr-loop - - # Restore fixtures - echo '[{"id":1001,"user":{"login":"claude[bot]"},"created_at":"2026-01-18T11:00:00Z","body":"Issue found"}]' > "$FIXTURES_DIR/issue-comments.json" - echo '[]' > "$FIXTURES_DIR/review-comments.json" - echo '[{"id":4001,"user":{"login":"chatgpt-codex-connector[bot]"},"submitted_at":"2026-01-18T11:15:00Z","body":"LGTM!","state":"APPROVED"}]' > "$FIXTURES_DIR/pr-reviews.json" - echo '[{"id":5001,"user":{"login":"chatgpt-codex-connector[bot]"},"content":"+1","created_at":"2026-01-18T11:10:00Z"}]' > "$FIXTURES_DIR/reactions.json" - - return 0 -} - -# Test: Stop hook updates goal tracker with round results -test_stophook_updates_goal_tracker() { - # This test verifies that running the stop hook after bot review updates the goal tracker - local test_dir="$TEST_TEMP_DIR/stophook_goal_test" - mkdir -p "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00" - - # Use dynamic timestamps - local trigger_ts commit_ts comment_ts - trigger_ts=$(date -u -d "-10 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-10S +%Y-%m-%dT%H:%M:%SZ) - commit_ts=$(date -u -d "-60 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-60S +%Y-%m-%dT%H:%M:%SZ) - comment_ts=$(date -u -d "-5 seconds" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -v-5S +%Y-%m-%dT%H:%M:%SZ) - - # Create state.md for Round 0 - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/state.md" << EOF ---- -current_round: 0 -max_iterations: 42 -pr_number: 123 -start_branch: test-branch -configured_bots: - - codex -active_bots: - - codex -codex_model: gpt-5.4 -codex_effort: medium -codex_timeout: 900 -poll_interval: 1 -poll_timeout: 60 -started_at: $commit_ts -last_trigger_at: $trigger_ts -trigger_comment_id: 999 -startup_case: 1 -latest_commit_sha: abc123 -latest_commit_at: $commit_ts ---- -EOF - - # Create initial goal tracker (need blank line after table header for row insertion) - cat > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" << 'EOF' -# PR Review Goal Tracker (PR #123) - -## Issue Summary - -| Round | Bot | Issues Found | Issues Resolved | Status | -|-------|-----|--------------|-----------------|--------| - -## Total Statistics -- Total Issues Found: 0 -- Total Issues Resolved: 0 -EOF - - # Create round-0 resolve file - echo "# Resolution" > "$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/round-0-pr-resolve.md" - - # Create mock gh and git - local mock_bin="$test_dir/bin" - mkdir -p "$mock_bin" - - cat > "$mock_bin/gh" << MOCK_GH -#!/usr/bin/env bash -COMMENT_TS="$comment_ts" -COMMIT_TS="$commit_ts" - -case "\$1" in - repo) - if [[ "\$*" == *"--json owner"* ]]; then - echo "testowner" - exit 0 - fi - if [[ "\$*" == *"--json name"* ]]; then - echo "testrepo" - exit 0 - fi - ;; - api) - if [[ "\$2" == "user" ]]; then - echo "testuser" - exit 0 - fi - if [[ "\$2" == *"/issues/"*"/comments"* ]]; then - # Return codex comment with issues - echo "[{\"id\":1001,\"user\":{\"login\":\"chatgpt-codex-connector[bot]\",\"type\":\"Bot\"},\"created_at\":\"\$COMMENT_TS\",\"body\":\"Found 2 issues: fix X, fix Y\"}]" - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/reviews"* ]]; then - echo '[]' - exit 0 - fi - if [[ "\$2" == *"/pulls/"*"/comments"* ]]; then - echo '[]' - exit 0 - fi - echo '[]' - exit 0 - ;; - pr) - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"headRefOid"* ]]; then - echo "{\"sha\":\"abc123\",\"date\":\"\$COMMIT_TS\"}" - exit 0 - fi - if [[ "\$*" == *"commits"* ]] && [[ "\$*" == *"--jq"* ]]; then - # When --jq is used, return just the extracted timestamp - echo "\$COMMIT_TS" - exit 0 - fi - if [[ "\$*" == *"commits"* ]]; then - echo "{\"commits\":[{\"committedDate\":\"\$COMMIT_TS\"}]}" - exit 0 - fi - # PR lookup with number and url: gh pr view --json number,url -q '.number,.url' - if [[ "\$*" == *"number,url"* ]]; then - echo '123' - echo 'https://github.com/testowner/testrepo/pull/123' - exit 0 - fi - # PR existence check: gh pr view --repo ... --json number -q .number - if [[ "\$*" == *"number"* ]] && [[ "\$*" != *"commits"* ]]; then - echo '123' - exit 0 - fi - if [[ "\$*" == *"state"* ]]; then - echo '{"state": "OPEN"}' - exit 0 - fi - ;; -esac -exit 0 -MOCK_GH - chmod +x "$mock_bin/gh" - - cat > "$mock_bin/git" << 'MOCK_GIT' -#!/usr/bin/env bash -case "$1" in - rev-parse) - if [[ "$2" == "HEAD" ]]; then - echo "abc123" - elif [[ "$2" == "--git-dir" ]]; then - echo ".git" - else - echo "/tmp/git" - fi - ;; - status) - echo "" - ;; - merge-base) exit 0 ;; -esac -exit 0 -MOCK_GIT - chmod +x "$mock_bin/git" - - # Mock codex command - returns ISSUES_REMAINING to trigger goal tracker update - cat > "$mock_bin/codex" << 'MOCK_CODEX' -#!/usr/bin/env bash -# Mock codex for testing - output review analysis -cat << 'CODEX_OUTPUT' -## Bot Review Analysis - -### codex (chatgpt-codex-connector[bot]) -**Status**: ISSUES -**Issues Found**: 1 -- Fix issue X - -### Issues Found (if any) -- Fix issue X - -### Approved Bots (to remove from active_bots) -(none) - -### Final Recommendation -ISSUES_REMAINING -CODEX_OUTPUT -exit 0 -MOCK_CODEX - chmod +x "$mock_bin/codex" - - # Run stop hook - export CLAUDE_PROJECT_DIR="$test_dir" - local old_path="$PATH" - export PATH="$mock_bin:$PATH" - - local hook_output - hook_output=$(timeout 15 bash -c 'echo "{}" | "$1/hooks/pr-loop-stop-hook.sh" 2>&1' _ "$PROJECT_ROOT" 2>&1) || true - - export PATH="$old_path" - unset CLAUDE_PROJECT_DIR - - # Verify goal tracker was updated with Round 1 row - local goal_file="$test_dir/.humanize/pr-loop/2026-01-18_12-00-00/goal-tracker.md" - if [[ ! -f "$goal_file" ]]; then - echo "Goal tracker file not found" - rm -rf "$test_dir" - return 1 - fi - - # Check that Round 1 row was added (format: | 1 | with possible spaces) - if ! grep -qE '^\|[[:space:]]*1[[:space:]]*\|' "$goal_file"; then - echo "Goal tracker not updated with Round 1" - echo "Contents: $(cat "$goal_file")" - echo "Hook output: $(echo "$hook_output" | tail -20)" - rm -rf "$test_dir" - return 1 - fi - - # Check that codex bot is mentioned in the row (lowercase to match configured bot names) - if ! grep -qi "codex" "$goal_file"; then - echo "Goal tracker missing codex bot entry" - echo "Contents: $(cat "$goal_file")" - rm -rf "$test_dir" - return 1 - fi - - rm -rf "$test_dir" - return 0 -} - -# ======================================== -# Main test runner -# ======================================== - -main() { - local test_filter="${1:-}" - - echo "==========================================" - echo " PR Loop System Tests" - echo "==========================================" - echo "" - echo "Project root: $PROJECT_ROOT" - echo "Mock directory: $MOCKS_DIR" - echo "Fixtures directory: $FIXTURES_DIR" - echo "" - - # Run tests - if [[ -z "$test_filter" || "$test_filter" == "mutual_exclusion" ]]; then - run_test "Mutual exclusion - RLCR blocks PR" test_mutual_exclusion_rlcr_blocks_pr - run_test "Mutual exclusion - PR blocks RLCR" test_mutual_exclusion_pr_blocks_rlcr - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status" ]]; then - run_test "Reviewer status - Case 1 (no comments)" test_reviewer_status_case1_no_comments - run_test "Reviewer status - Case 2 (partial comments)" test_reviewer_status_case2_partial_comments - fi - - if [[ -z "$test_filter" || "$test_filter" == "reactions" ]]; then - run_test "Codex +1 detection" test_codex_thumbsup_detected - run_test "Codex +1 with --after filter" test_codex_thumbsup_with_after_filter - run_test "Claude eyes detection" test_claude_eyes_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_reviews" ]]; then - run_test "PR reviews detection" test_reviewer_status_includes_pr_reviews - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase" ]]; then - run_test "Phase detection - approved" test_phase_detection_approved - run_test "Phase detection - waiting initial" test_phase_detection_waiting_initial - run_test "Phase detection - waiting reviewer" test_phase_detection_waiting_reviewer - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker" ]]; then - run_test "Goal tracker parsing" test_goal_tracker_parsing - fi - - if [[ -z "$test_filter" || "$test_filter" == "pr_goal_tracker" ]]; then - run_test "PR goal tracker parsing" test_pr_goal_tracker_parsing - run_test "update_pr_goal_tracker helper" test_update_pr_goal_tracker - fi - - if [[ -z "$test_filter" || "$test_filter" == "state_file" ]]; then - run_test "State file detection - active" test_state_file_detection_active - run_test "State file detection - approve" test_state_file_detection_approve - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_extended" ]]; then - run_test "Phase detection - cancelled" test_phase_detection_cancelled - run_test "Phase detection - maxiter" test_phase_detection_maxiter - fi - - if [[ -z "$test_filter" || "$test_filter" == "reviewer_status_extended" ]]; then - run_test "Reviewer status - Case 3 (all commented)" test_reviewer_status_case3_all_commented - fi - - if [[ -z "$test_filter" || "$test_filter" == "unpushed" ]]; then - run_test "Unpushed commits detection" test_unpushed_commits_detected - fi - - if [[ -z "$test_filter" || "$test_filter" == "force_push" ]]; then - run_test "Force push ancestry check" test_force_push_ancestry_check - fi - - if [[ -z "$test_filter" || "$test_filter" == "approve_state" ]]; then - run_test "Approve state detection" test_approve_state_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_schema" ]]; then - run_test "Goal tracker schema" test_goal_tracker_schema - run_test "Goal tracker update adds row" test_goal_tracker_update_adds_row - run_test "Goal tracker update idempotent" test_goal_tracker_update_idempotent - fi - - if [[ -z "$test_filter" || "$test_filter" == "startup_case" ]]; then - run_test "Startup case 4/5 detection" test_startup_case_4_5_detection - fi - - if [[ -z "$test_filter" || "$test_filter" == "shared_monitor" ]]; then - run_test "Shared monitor - find latest session" test_shared_monitor_find_latest_session - run_test "Shared monitor - find state file" test_shared_monitor_find_state_file - run_test "Shared monitor - get file size" test_shared_monitor_get_file_size - fi - - if [[ -z "$test_filter" || "$test_filter" == "phase_analyzing" ]]; then - run_test "Phase detection - codex analyzing (file growth)" test_phase_detection_codex_analyzing - fi - - # Monitor output assertions for phase labels - if [[ -z "$test_filter" || "$test_filter" == "monitor_output" ]]; then - run_test "Monitor output - approved phase display" test_monitor_output_phase_approved - run_test "Monitor output - waiting initial phase display" test_monitor_output_phase_waiting_initial - run_test "Monitor output - cancelled phase display" test_monitor_output_phase_cancelled - run_test "Monitor output - codex analyzing phase display" test_monitor_output_phase_codex_analyzing - fi - - if [[ -z "$test_filter" || "$test_filter" == "case1_exception" ]]; then - run_test "Case 1 exception - no trigger required for startup_case 1" test_case1_exception_no_trigger - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_table" ]]; then - run_test "Goal tracker row inserted inside table" test_goal_tracker_row_inside_table - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_partial" ]]; then - run_test "Goal tracker partial update repair" test_goal_tracker_partial_update_repair - fi - - if [[ -z "$test_filter" || "$test_filter" == "case_4_5" ]]; then - run_test "Case 4 emission (all commented + new commits)" test_case4_all_commented_new_commits - run_test "Case 5 emission (partial + new commits)" test_case5_partial_commented_new_commits - fi - - if [[ -z "$test_filter" || "$test_filter" == "setup_failure" ]]; then - run_test "Setup Case 4/5 failure path (missing trigger_comment_id)" test_setup_case45_missing_trigger_comment_id - fi - - if [[ -z "$test_filter" || "$test_filter" == "goal_tracker_integration" ]]; then - run_test "Goal tracker creation via setup-pr-loop.sh" test_goal_tracker_creation_integration - run_test "Stop hook updates goal tracker with round results" test_stophook_updates_goal_tracker - fi - - echo "" - echo "==========================================" - echo " Results" - echo "==========================================" - echo "" - echo "Tests run: $TESTS_RUN" - echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}" - echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}" - echo "" - - if [[ $TESTS_FAILED -gt 0 ]]; then - exit 1 - fi -} - -main "$@" diff --git a/tests/test-pr-loop.sh b/tests/test-pr-loop.sh deleted file mode 100755 index 0bb615b8..00000000 --- a/tests/test-pr-loop.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# -# Tests for PR loop feature -# -# This is the main test runner that sources and executes all test modules: -# - test-pr-loop-scripts.sh: Script argument validation tests -# - test-pr-loop-hooks.sh: Hook functionality tests -# - test-pr-loop-stophook.sh: Stop hook tests -# -# Usage: ./test-pr-loop.sh -# - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" - -# Source test helpers and common library -source "$SCRIPT_DIR/test-helpers.sh" -source "$SCRIPT_DIR/test-pr-loop-lib.sh" - -# ======================================== -# Test Environment Setup -# ======================================== - -init_pr_loop_test_env - -# ======================================== -# Source Test Modules -# ======================================== - -source "$SCRIPT_DIR/test-pr-loop-scripts.sh" -source "$SCRIPT_DIR/test-pr-loop-hooks.sh" -source "$SCRIPT_DIR/test-pr-loop-stophook.sh" - -# ======================================== -# Run All Tests -# ======================================== - -# Script tests (setup, cancel, fetch, poll) -run_script_tests - -# Hook functionality tests -run_hook_tests - -# Stop hook tests -run_stophook_tests - -# ======================================== -# Print Summary -# ======================================== - -print_test_summary diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index 66a0eebe..5948193f 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -673,76 +673,6 @@ done echo "" -# ======================================== -# PR loop respects config-backed codex_model (AC-5) -# ======================================== - -echo "--- PR loop config-backed defaults ---" - -SETUP_PR_LOOP="$PROJECT_ROOT/scripts/setup-pr-loop.sh" -PR_STOP_HOOK="$PROJECT_ROOT/hooks/pr-loop-stop-hook.sh" - -if [[ ! -f "$LOOP_COMMON" ]]; then - skip "PR loop config tests require loop-common.sh" "file not found" -elif [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop config tests require setup-pr-loop.sh" "file not found" -else - # PR loop setup does NOT pre-set DEFAULT_CODEX_MODEL (should come from config) - assert_no_grep "setup-pr-loop.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$SETUP_PR_LOOP" - - # PR loop setup DOES pre-set DEFAULT_CODEX_EFFORT to medium - assert_grep "setup-pr-loop.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$SETUP_PR_LOOP" - - # PR stop hook also does NOT pre-set DEFAULT_CODEX_MODEL - if [[ ! -f "$PR_STOP_HOOK" ]]; then - skip "pr-loop-stop-hook.sh tests require pr-loop-stop-hook.sh" "file not found" - else - assert_no_grep "pr-loop-stop-hook.sh: does not pre-set DEFAULT_CODEX_MODEL" \ - 'DEFAULT_CODEX_MODEL=' "$PR_STOP_HOOK" - - assert_grep "pr-loop-stop-hook.sh: pre-sets DEFAULT_CODEX_EFFORT to medium" \ - 'DEFAULT_CODEX_EFFORT="medium"' "$PR_STOP_HOOK" - fi - - # Behavioral: sourcing loop-common.sh with PR loop effort pre-set picks up config model - setup_test_dir - PR_CFG_PROJECT="$TEST_DIR/pr-cfg-project" - mkdir -p "$PR_CFG_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_CFG_PROJECT/.humanize/config.json" - - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export CLAUDE_PROJECT_DIR='$PR_CFG_PROJECT' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: config codex_model respected (o3-mini)" \ - "o3-mini" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: pre-set effort kept over config (medium)" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" - - # Without config, falls back to hardcoded default model but keeps medium effort - result=$(bash -c " - export DEFAULT_CODEX_EFFORT='medium' - export XDG_CONFIG_HOME='$TEST_DIR/no-user-config' - source '$LOOP_COMMON' 2>/dev/null - echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" - " 2>/dev/null || echo "ERROR") - - assert_eq "PR loop behavioral: no config falls back to gpt-5.4" \ - "gpt-5.4" "$(echo "$result" | cut -d'|' -f1)" - - assert_eq "PR loop behavioral: no config keeps medium effort" \ - "medium" "$(echo "$result" | cut -d'|' -f2)" -fi - -echo "" - # ======================================== # ask-codex respects config-backed defaults (AC-5) # ======================================== @@ -775,127 +705,6 @@ fi echo "" -# ======================================== -# PR loop --codex-model override (runtime behavioral) -# ======================================== - -echo "--- PR loop --codex-model override (runtime) ---" - -if [[ ! -f "$SETUP_PR_LOOP" ]]; then - skip "PR loop override test requires setup-pr-loop.sh" "file not found" -else - # Run setup-pr-loop.sh --help with project config to verify help text shows config-backed default - # --help exits before requiring gh/PR prerequisites, so no external deps needed - setup_test_dir - PR_OVERRIDE_PROJECT="$TEST_DIR/pr-override-project" - mkdir -p "$PR_OVERRIDE_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_OVERRIDE_PROJECT/.humanize/config.json" - - help_output=$(cd "$PR_OVERRIDE_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_OVERRIDE_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - timeout 10 bash "$SETUP_PR_LOOP" --help 2>&1) || true - - # Help text must mention config-backed default (not a hardcoded model name) - if echo "$help_output" | grep -q 'default from config'; then - pass "PR loop runtime: --help shows config-backed default" - else - fail "PR loop runtime: --help shows config-backed default" "contains 'default from config'" "$(echo "$help_output" | grep codex-model)" - fi - - # End-to-end: run setup-pr-loop.sh with mock gh/codex and --codex-model override - if ! command -v jq >/dev/null 2>&1; then - skip "PR loop e2e test requires jq" "jq not found" - else - setup_test_dir - PR_E2E_PROJECT="$TEST_DIR/pr-e2e-project" - init_test_git_repo "$PR_E2E_PROJECT" - mkdir -p "$PR_E2E_PROJECT/.humanize" - printf '{"codex_model": "o3-mini", "codex_effort": "low"}' > "$PR_E2E_PROJECT/.humanize/config.json" - - # Create a local bare remote (setup-pr-loop.sh needs a git remote) - PR_BARE_REMOTE="$TEST_DIR/pr-remote.git" - git clone --bare "$PR_E2E_PROJECT" "$PR_BARE_REMOTE" -q 2>/dev/null - (cd "$PR_E2E_PROJECT" && git remote remove origin 2>/dev/null; git remote add origin "$PR_BARE_REMOTE") 2>/dev/null || true - - # Create mock gh that handles all setup-pr-loop.sh calls - PR_MOCK_BIN="$TEST_DIR/pr-mock-bin" - mkdir -p "$PR_MOCK_BIN" - cat > "$PR_MOCK_BIN/gh" << 'GH_MOCK_EOF' -#!/usr/bin/env bash -# Mock gh for setup-pr-loop.sh end-to-end test -ALL_ARGS="$*" -case "$1" in - auth) exit 0 ;; - repo) - if [[ "$ALL_ARGS" == *"owner,name"* ]]; then - echo "testowner/testrepo"; exit 0 - elif [[ "$ALL_ARGS" == *"parent"* ]]; then - echo "null/"; exit 0 - fi ;; - pr) - if [[ "$2" == "view" ]]; then - if [[ "$ALL_ARGS" == *"number,url"* ]]; then - printf '123\nhttps://github.com/testowner/testrepo/pull/123'; exit 0 - elif [[ "$ALL_ARGS" == *"state"* ]]; then - echo "OPEN"; exit 0 - elif [[ "$ALL_ARGS" == *"number"* ]]; then - echo "123"; exit 0 - elif [[ "$ALL_ARGS" == *"headRefOid"* ]]; then - echo '{"sha":"abc123","date":"2026-01-01T00:00:00Z"}'; exit 0 - fi - elif [[ "$2" == "comment" ]]; then - echo "https://github.com/testowner/testrepo/pull/123#comment-1"; exit 0 - fi ;; - api) - if [[ "$2" == "user" ]]; then - echo '{"login":"testuser"}'; exit 0 - elif [[ "$2" == *"/comments"* ]] || [[ "$2" == *"/reviews"* ]]; then - echo "[]"; exit 0 - fi - echo "[]"; exit 0 ;; -esac -echo "Mock gh: unhandled: $ALL_ARGS" >&2; exit 1 -GH_MOCK_EOF - chmod +x "$PR_MOCK_BIN/gh" - - # Create mock codex (not called during setup, but required by command -v check) - cat > "$PR_MOCK_BIN/codex" << 'CODEX_MOCK_EOF' -#!/usr/bin/env bash -exit 0 -CODEX_MOCK_EOF - chmod +x "$PR_MOCK_BIN/codex" - - # Run setup-pr-loop.sh with --codex-model override - pr_setup_exit=0 - pr_output=$(cd "$PR_E2E_PROJECT" && \ - CLAUDE_PROJECT_DIR="$PR_E2E_PROJECT" \ - XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ - PATH="$PR_MOCK_BIN:$PATH" \ - timeout 30 bash "$SETUP_PR_LOOP" --claude --codex-model override-model:xhigh 2>&1) || pr_setup_exit=$? - - assert_eq "PR loop e2e: setup-pr-loop.sh exited successfully" \ - "0" "$pr_setup_exit" - - # Find the generated PR loop state.md - PR_STATE_FILE=$(find "$PR_E2E_PROJECT/.humanize/pr-loop" -name "state.md" 2>/dev/null | head -1 || true) - if [[ -z "$PR_STATE_FILE" ]]; then - fail "PR loop e2e: state.md was created" "non-empty path" "empty" - else - pass "PR loop e2e: state.md was created" - - # Assert --codex-model override is stored in state, not config values - assert_eq "PR loop e2e: --codex-model set codex_model (override-model)" \ - "override-model" "$(grep '^codex_model:' "$PR_STATE_FILE" | sed 's/codex_model: *//')" - - assert_eq "PR loop e2e: --codex-model set codex_effort (xhigh)" \ - "xhigh" "$(grep '^codex_effort:' "$PR_STATE_FILE" | sed 's/codex_effort: *//')" - fi - fi -fi - -echo "" - # ======================================== # ask-codex runtime behavioral test # ======================================== From 3374acb61ac904f76cac9b30a9d52a5924d0fa04 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sun, 29 Mar 2026 09:13:24 -0700 Subject: [PATCH 40/50] fix minor display bug of monitor script --- scripts/humanize.sh | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/scripts/humanize.sh b/scripts/humanize.sh index 346c1802..9804bde5 100755 --- a/scripts/humanize.sh +++ b/scripts/humanize.sh @@ -882,6 +882,10 @@ _humanize_monitor_codex() { # Handle case when no log file exists for current session if [[ -z "$current_file" ]]; then + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local centered_last_cols=$(tput cols) + local centered_last_rows=$(tput lines) + # Render centered no-log message if status changed or not yet shown if [[ "$last_no_log_status" != "$current_loop_status" ]]; then if [[ "$current_loop_status" == "active" ]]; then @@ -903,11 +907,17 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point + # Detect terminal resize via both SIGWINCH flag and actual dimension change local redraw_centered_msg=false - if [[ "$resize_needed" == "true" ]]; then + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$centered_last_cols" ]] || \ + [[ "$cur_rows" != "$centered_last_rows" ]]; then resize_needed=false redraw_centered_msg=true + centered_last_cols="$cur_cols" + centered_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -919,6 +929,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + centered_last_cols=$(tput cols) + centered_last_rows=$(tput lines) else _update_scroll_region fi @@ -1003,6 +1015,10 @@ _humanize_monitor_codex() { local log_lines=$(_get_log_area_height) tail -n "$log_lines" "$current_file" 2>/dev/null + # Track terminal dimensions to detect resize (fallback for SIGWINCH) + local follow_last_cols=$(tput cols) + local follow_last_rows=$(tput lines) + # Incremental monitoring loop while [[ "$monitor_running" == "true" ]]; do sleep 0.5 # Check more frequently for smoother output @@ -1014,9 +1030,15 @@ _humanize_monitor_codex() { return 0 fi - # Handle terminal resize at a safe point - if [[ "$resize_needed" == "true" ]]; then + # Detect terminal resize via both SIGWINCH flag and actual dimension change + local cur_cols=$(tput cols) + local cur_rows=$(tput lines) + if [[ "$resize_needed" == "true" ]] || \ + [[ "$cur_cols" != "$follow_last_cols" ]] || \ + [[ "$cur_rows" != "$follow_last_rows" ]]; then resize_needed=false + follow_last_cols="$cur_cols" + follow_last_rows="$cur_rows" # Check if terminal is too small if ! _check_terminal_size; then _display_terminal_too_small @@ -1028,6 +1050,8 @@ _humanize_monitor_codex() { [[ "$monitor_running" != "true" ]] && break # Terminal is now big enough, reinitialize _setup_terminal + follow_last_cols=$(tput cols) + follow_last_rows=$(tput lines) else _update_scroll_region fi From a1f3614c283ce8fba19de7104f3904bc91837ef2 Mon Sep 17 00:00:00 2001 From: Sihao Liu Date: Sun, 29 Mar 2026 10:07:23 -0700 Subject: [PATCH 41/50] Be more specific about what is a round --- commands/start-rlcr-loop.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commands/start-rlcr-loop.md b/commands/start-rlcr-loop.md index 1b53806d..f24fb156 100644 --- a/commands/start-rlcr-loop.md +++ b/commands/start-rlcr-loop.md @@ -127,6 +127,15 @@ This command starts an iterative development loop where: 7. If code review finds issues (`[P0-9]` markers), you fix them and continue 8. When no issues are found, the loop ends with a Finalize Phase +## What Is a Round + +**One round = the agent believes the entire plan is finished.** A round boundary is when the agent writes a summary and attempts to exit, triggering Codex review. This is the fundamental semantic: + +- A round is NOT one task, one milestone, one stage, or one layer of the plan. +- If the plan has multiple stages or milestones, they are all completed within a single round before writing the round summary. +- Intermediate progress checks (e.g., verifying a stage before starting the next) should use manual `ask-codex` calls, not round boundaries. +- Only write `round-N-summary.md` and attempt to exit when you believe ALL tasks in the plan are done. + ## Goal Tracker System This loop uses a **Goal Tracker** to prevent goal drift across iterations: From c045dff1c6b0a3a07eb738b0895951ad3baf6e15 Mon Sep 17 00:00:00 2001 From: gyy0592 Date: Sat, 4 Apr 2026 01:25:36 +0000 Subject: [PATCH 42/50] fix: normalize path slashes in PostToolUse hook to prevent session_id write failure When CLAUDE_PLUGIN_ROOT has a trailing slash, the command template produces double slashes (e.g. "humania//scripts/setup-rlcr-loop.sh"). The setup script normalizes its own path via cd+pwd (single slash), but tool_input.command preserves the original double-slash string. This causes the boundary-aware string match in loop-post-bash-hook.sh to always fail, so the .pending-session-id signal is never consumed and session_id is never written to state.md. With an empty session_id, find_active_loop() backward-compat logic matches any session, causing cross-instance stop hook hijacking when multiple Claude Code sessions run concurrently. Fix: normalize consecutive slashes with tr -s '/' before comparison. Fixes: https://github.com/humania-org/humanize/issues/67 Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- hooks/loop-post-bash-hook.sh | 10 ++++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 5895b6ed..781c5e4c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.1" + "version": "1.15.2" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 671561ce..93e1b789 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.1", + "version": "1.15.2", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index bf969241..7e46527b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.1** +**Current Version: 1.15.2** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index eeed7bde..4dee0e7f 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -67,6 +67,16 @@ if [[ -n "$COMMAND_SIGNATURE" ]]; then exit 0 fi + # Normalize consecutive slashes (e.g. "humania//scripts" -> "humania/scripts"). + # CLAUDE_PLUGIN_ROOT may have a trailing slash, producing double slashes when + # concatenated with "/scripts/..." in the command template. The setup script + # normalizes its own path via cd+pwd (removing double slashes), but the + # tool_input.command preserves the original string. Without normalization, + # the string comparison below always fails and session_id is never written. + # See: https://github.com/humania-org/humanize/issues/67 + HOOK_COMMAND=$(printf '%s' "$HOOK_COMMAND" | tr -s '/') + COMMAND_SIGNATURE=$(printf '%s' "$COMMAND_SIGNATURE" | tr -s '/') + # Boundary-aware match: command must be a valid setup invocation form. # Requires the script path to be followed by end-of-string or any POSIX # whitespace ([[:space:]]), preventing concatenated forms. From 707097c52da364381addd9b34d47ca3b183a7f63 Mon Sep 17 00:00:00 2001 From: gyy0592 Date: Sat, 4 Apr 2026 18:35:28 +0000 Subject: [PATCH 43/50] revert: remove version bump, keep only hook fix Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 781c5e4c..5895b6ed 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.2" + "version": "1.15.1" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 93e1b789..671561ce 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.15.2", + "version": "1.15.1", "author": { "name": "humania-org" }, diff --git a/README.md b/README.md index 7e46527b..bf969241 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.15.2** +**Current Version: 1.15.1** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. From 79714b73d50e696593b23fccf289974c450a5daa Mon Sep 17 00:00:00 2001 From: Ligeng Zhu Date: Fri, 10 Apr 2026 02:28:58 +0800 Subject: [PATCH 44/50] Add support for multiple comment formats in refine-plan Extends comment parsing to support three formats: - Classic: CMT:/ENDCMT (existing) - Short tag: (new) - Long tag: (new) All formats support inline and multi-line usage and can be mixed within the same file. Updated documentation and error messages to be format-agnostic. Co-Authored-By: Claude Sonnet 4 --- README.md | 2 +- commands/refine-plan.md | 43 +++-- docs/usage.md | 38 ++++- scripts/validate-refine-plan-io.sh | 261 ++++++++++++++++++++--------- 4 files changed, 246 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index bf969241..aba201ac 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. **Refine an annotated plan** before implementation when reviewers add `CMT:` ... `ENDCMT` comments: +2. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `` ... ``, or `` ... ``): ```bash /humanize:refine-plan --input docs/plan.md ``` diff --git a/commands/refine-plan.md b/commands/refine-plan.md index fc129016..0d97142f 100644 --- a/commands/refine-plan.md +++ b/commands/refine-plan.md @@ -34,7 +34,7 @@ The refined plan MUST reuse the existing `gen-plan` schema. Do not invent new to 1. **Execution Mode Setup**: Parse CLI arguments and derive output paths 2. **Load Project Config**: Resolve `alternative_plan_language` and mode defaults using `config-loader.sh` semantics 3. **IO Validation**: Run `validate-refine-plan-io.sh` -4. **Comment Extraction**: Scan the annotated plan and extract valid `CMT:` / `ENDCMT` blocks +4. **Comment Extraction**: Scan the annotated plan and extract valid comment blocks (`CMT:`/`ENDCMT`, ``/``, ``/``) 5. **Comment Classification**: Classify each extracted comment for downstream handling 6. **Comment Processing**: Answer questions, apply requested plan edits, and perform targeted research 7. **Plan Refinement**: Produce the comment-free refined plan while preserving the `gen-plan` structure @@ -167,7 +167,7 @@ Handle exit codes exactly: - Exit code 0: Continue to Phase 2 - Exit code 1: Report `Input file not found` and stop - Exit code 2: Report `Input file is empty` and stop -- Exit code 3: Report `Input file has no CMT:/ENDCMT blocks` and stop +- Exit code 3: Report `Input file has no comment blocks` and stop - Exit code 4: Report `Input file is missing required gen-plan sections` and stop - Exit code 5: Report `Output directory does not exist or is not writable - please fix it` and stop - Exit code 6: Report `QA directory is not writable` and stop @@ -196,17 +196,32 @@ Track these states while scanning the validated input in document order: Extraction rules: -1. Recognize `CMT:` as the start marker and `ENDCMT` as the end marker. -2. Support both inline and multi-line blocks: +1. Support three comment formats: + - Classic: `CMT:` as start marker and `ENDCMT` as end marker + - Short tag: `` as start marker and `` as end marker + - Long tag: `` as start marker and `` as end marker +2. Support both inline and multi-line blocks for all formats: - Inline: `Text before CMT: comment text ENDCMT text after` + - Inline: `Text before comment text text after` + - Inline: `Text before comment text text after` - Multi-line: ```markdown CMT: comment text ENDCMT ``` -3. Ignore `CMT:` and `ENDCMT` sequences inside fenced code blocks. -4. Ignore `CMT:` and `ENDCMT` sequences inside HTML comments. + ```markdown + + comment text + + ``` + ```markdown + + comment text + + ``` +3. Ignore comment markers inside fenced code blocks. +4. Ignore comment markers inside HTML comments. 5. Update `NEAREST_HEADING` whenever a Markdown heading is encountered outside fenced code and HTML comments. 6. Preserve surrounding non-comment text when removing inline comment blocks from the working plan text. 7. Assign raw comment IDs in document order as `CMT-1`, `CMT-2`, ... only for non-empty blocks. @@ -217,7 +232,7 @@ Extraction rules: For each non-empty comment block, capture: - `id` (`CMT-N`) -- `original_text` exactly as written between `CMT:` and `ENDCMT` +- `original_text` exactly as written between the comment markers - `normalized_text` with surrounding whitespace trimmed - `start_line`, `start_column` - `end_line`, `end_column` @@ -230,8 +245,8 @@ For each non-empty comment block, capture: These are fatal extraction errors: -1. Nested `CMT:` while already inside a comment block -2. `ENDCMT` encountered while not inside a comment block +1. Nested comment start marker while already inside a comment block +2. Comment end marker encountered while not inside a comment block or wrong end marker for the format 3. End of file reached while still inside a comment block Every fatal parse error MUST report: @@ -243,9 +258,9 @@ Every fatal parse error MUST report: Examples of acceptable messages: -- `Comment parse error: nested CMT block at line 48, column 3 near "## Acceptance Criteria" (context: "CMT: split AC-2...")` -- `Comment parse error: stray ENDCMT at line 109, column 1 near "## Task Breakdown" (context: "ENDCMT")` -- `Comment parse error: missing ENDCMT for block opened at line 72, column 5 near "## Dependencies and Sequence"` +- `Comment parse error: nested comment block at line 48, column 3 near "## Acceptance Criteria" (context: "split AC-2...")` +- `Comment parse error: stray comment end marker at line 109, column 1 near "## Task Breakdown" (context: "")` +- `Comment parse error: missing end marker for block opened at line 72, column 5 near "## Dependencies and Sequence"` ### Outputs from Phase 2 @@ -403,7 +418,7 @@ Optional sections that MUST be preserved when present in the input: ### Refinement Rules -1. Remove every resolved `CMT:` / `ENDCMT` tag and all enclosed comment text from the refined plan. +1. Remove every resolved comment marker and all enclosed comment text from the refined plan. 2. Do not add any new top-level schema section. 3. Preserve `AC-X` / `AC-X.Y` formatting. 4. Preserve task IDs unless a comment explicitly requests a structural change. @@ -429,7 +444,7 @@ Rules: Before generating the QA document, verify: 1. All required sections are still present -2. No `CMT:` or `ENDCMT` markers remain +2. No comment markers remain 3. Every referenced `AC-*` exists 4. Every task dependency references an existing task ID or `-` 5. Every task row has exactly one valid routing tag: `coding` or `analyze` diff --git a/docs/usage.md b/docs/usage.md index e12d45b9..0ef06046 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,7 +46,7 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -2. If the plan is reviewed with `CMT:` ... `ENDCMT` annotations, refine it and generate a QA ledger: +2. If the plan is reviewed with comment annotations, refine it and generate a QA ledger: ```bash /humanize:refine-plan --input docs/plan.md ``` @@ -127,7 +127,7 @@ Workflow: 5. Generates a structured plan.md with acceptance criteria 6. Optionally starts `/humanize:start-rlcr-loop` if `--auto-start-rlcr-if-converged` conditions are met -If reviewers later annotate the generated plan with `CMT:` ... `ENDCMT` blocks, run +If reviewers later annotate the generated plan with comment blocks, run `/humanize:refine-plan --input ` before starting or resuming implementation. ### refine-plan @@ -169,9 +169,10 @@ how each comment was handled. **Annotated comment block format:** -`refine-plan` looks for reviewer comments wrapped in `CMT:` and `ENDCMT` markers. Both inline -and multi-line comment blocks are supported: +`refine-plan` supports three comment formats for reviewer annotations. Both inline +and multi-line comment blocks are supported in all formats: +**Classic format (CMT:/ENDCMT):** ```markdown Text before CMT: clarify why AC-3 is split here ENDCMT text after ``` @@ -183,11 +184,36 @@ If the dependency is unclear, add a pending decision instead of guessing. ENDCMT ``` +**Short tag format ():** +```markdown +Text before clarify why AC-3 is split here text after +``` + +```markdown + +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. + +``` + +**Long tag format ():** +```markdown +Text before clarify why AC-3 is split here text after +``` + +```markdown + +Please investigate whether this task should depend on task4 or task5. +If the dependency is unclear, add a pending decision instead of guessing. + +``` + Rules: -- At least one non-empty `CMT:` block must exist in the input file. -- `CMT:` and `ENDCMT` markers inside fenced code blocks or HTML comments are ignored. +- At least one non-empty comment block must exist in the input file. +- Comment markers inside fenced code blocks or HTML comments are ignored. - Empty comment blocks are removed but do not create QA ledger entries. - The input plan must still follow the `gen-plan` section schema. +- All three formats can be mixed within the same file. **QA output structure:** diff --git a/scripts/validate-refine-plan-io.sh b/scripts/validate-refine-plan-io.sh index f0ca70c0..34649193 100755 --- a/scripts/validate-refine-plan-io.sh +++ b/scripts/validate-refine-plan-io.sh @@ -5,7 +5,7 @@ # 0 - Success, all validations passed # 1 - Input file does not exist # 2 - Input file is empty -# 3 - Input file has no valid CMT:/ENDCMT blocks or has malformed CMT syntax +# 3 - Input file has no valid comment blocks or has malformed comment syntax # 4 - Input file missing required gen-plan sections # 5 - Output directory does not exist or is not writable, or input directory is not writable for in-place mode # 6 - QA directory not writable @@ -51,14 +51,67 @@ scan_cmt_blocks() { heading = current_heading() if (kind == "nested") { - printf "Comment parse error: nested CMT block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: nested comment block at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } else if (kind == "stray_end") { - printf "Comment parse error: stray ENDCMT at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" + printf "Comment parse error: stray comment end marker at line %d, column %d near \"%s\" (context: \"%s\")\n", line_num, column, heading, excerpt > "/dev/stderr" } exit fatal_code } + function find_comment_markers(text, start_pos, markers, i, pos, min_pos, closest_marker, closest_pos) { + # Initialize markers array + markers["CMT:"] = "classic_start" + markers[""] = "cmt_tag_start" + markers[""] = "comment_tag_start" + markers["ENDCMT"] = "classic_end" + markers[""] = "cmt_tag_end" + markers[""] = "comment_tag_end" + markers[""] = "html_end" + + closest_marker = "" + closest_pos = 0 + min_pos = length(text) + 1 + + for (marker in markers) { + pos = index(substr(text, start_pos), marker) + if (pos > 0) { + pos = start_pos + pos - 1 + if (pos < min_pos) { + min_pos = pos + closest_marker = marker + closest_pos = pos + } + } + } + + if (closest_marker == "") { + return "" + } else { + return closest_marker ":" closest_pos + } + } + + function get_end_marker_for_format(format) { + if (format == "classic") return "ENDCMT" + if (format == "cmt_tag") return "" + if (format == "comment_tag") return "" + return "" + } + + function get_marker_length(marker) { + if (marker == "CMT:") return 4 + if (marker == "") return 5 + if (marker == "") return 9 + if (marker == "ENDCMT") return 6 + if (marker == "") return 6 + if (marker == "") return 10 + if (marker == "") return 3 + return 0 + } + BEGIN { count = 0 in_fence = 0 @@ -71,6 +124,7 @@ scan_cmt_blocks() { cmt_open_heading = "Preamble" cmt_open_excerpt = "" cmt_has_text = 0 + cmt_format = "" # Track format: "classic", "cmt_tag", "comment_tag" fatal = 0 fatal_code = 0 } @@ -126,26 +180,8 @@ scan_cmt_blocks() { } if (in_cmt) { - html_rel = index(rest, " --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Goal Tracker Audit (MANDATORY) Read @{{GOAL_TRACKER_FILE}} and verify: diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 7db26ea2..4d4a8680 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -17,6 +17,8 @@ Below is Claude's summary of the work completed: --- +{{COMMIT_HISTORY_SECTION}} + ## Part 1: Implementation Review - Your task is to conduct a deep critical review, focusing on finding implementation issues and identifying gaps between "plan-design" and actual implementation. diff --git a/tests/test-commit-history-section.sh b/tests/test-commit-history-section.sh new file mode 100755 index 00000000..7177f5e8 --- /dev/null +++ b/tests/test-commit-history-section.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# +# Test script for the Integral (I) component: commit-history-section +# +# Validates: +# 1. Round 0: "(no commits yet)" and "(first round, no prior history)" +# 2. Round 2+: commit log and round file references rendered correctly +# 3. Corrupted BASE_COMMIT: graceful fallback with annotation +# 4. Template missing: fallback renders the full section including round files +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" +source "$PROJECT_ROOT/hooks/lib/template-loader.sh" + +TEMPLATE_DIR="$PROJECT_ROOT/prompt-template" + +echo "========================================" +echo "Testing commit-history-section (I component)" +echo "========================================" +echo "" + +# ======================================== +# Setup: create a temporary git repo +# ======================================== +setup_test_dir +init_test_git_repo "$TEST_DIR/repo" + +# ======================================== +# Test 1: Round 0 - no commits since base, first round +# ======================================== +echo "Test 1: Round 0 - no commits, first round" + +CURRENT_ROUND=0 +BASE_COMMIT=$(git -C "$TEST_DIR/repo" rev-parse HEAD) + +# No commits since BASE_COMMIT..HEAD (same commit) +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +LOOP_TIMESTAMP="2026-01-01_00-00-00" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +if echo "$RESULT" | grep -q "(no commits yet)" && echo "$RESULT" | grep -q "(first round, no prior history)"; then + pass "Round 0 shows correct placeholders" +else + fail "Round 0 placeholders" "(no commits yet) and (first round, no prior history)" "$RESULT" +fi + +# ======================================== +# Test 2: Round 3 - with commits and round history +# ======================================== +echo "" +echo "Test 2: Round 3 - commits and round file references" + +# Make some commits +cd "$TEST_DIR/repo" +echo "feat1" > feat1.txt && git add feat1.txt && git commit -q -m "feat: add feature 1" +echo "feat2" > feat2.txt && git add feat2.txt && git commit -q -m "feat: add feature 2" +echo "fix1" > fix1.txt && git add fix1.txt && git commit -q -m "fix: resolve bug in feature 1" +cd - > /dev/null + +CURRENT_ROUND=3 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-history-section.md" "FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +HAS_COMMITS=true +HAS_ROUNDS=true + +echo "$RESULT" | grep -q "feat: add feature 1" || HAS_COMMITS=false +echo "$RESULT" | grep -q "feat: add feature 2" || HAS_COMMITS=false +echo "$RESULT" | grep -q "fix: resolve bug in feature 1" || HAS_COMMITS=false + +echo "$RESULT" | grep -q "round-2-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-1-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-0-summary.md" || HAS_ROUNDS=false +echo "$RESULT" | grep -q "round-2-review-result.md" || HAS_ROUNDS=false + +if [[ "$HAS_COMMITS" == "true" ]]; then + pass "Round 3 shows all 3 commits" +else + fail "Round 3 commits" "3 commit messages" "$RESULT" +fi + +if [[ "$HAS_ROUNDS" == "true" ]]; then + pass "Round 3 shows round 0-2 file references" +else + fail "Round 3 round files" "round-0/1/2 summary and review files" "$RESULT" +fi + +# ======================================== +# Test 3: Corrupted BASE_COMMIT - nonexistent object +# ======================================== +echo "" +echo "Test 3: Corrupted BASE_COMMIT graceful fallback" + +BAD_COMMIT="deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + +# Simulate the exact logic from the stop hook (merge-base --is-ancestor) +if [[ -n "$BAD_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$BAD_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BAD_COMMIT"..HEAD 2>/dev/null | tail -80) +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Corrupted BASE_COMMIT triggers annotation" +else + fail "Corrupted BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +if echo "$COMMIT_HISTORY" | grep -q "feat: add feature"; then + pass "Corrupted BASE_COMMIT still shows recent commits" +else + fail "Corrupted BASE_COMMIT recent commits" "recent branch commits" "$COMMIT_HISTORY" +fi + +# Verify no crash (we got here = no set -e crash) +pass "Corrupted BASE_COMMIT did not crash (set -e safe)" + +# ======================================== +# Test 3b: Valid but unrelated commit (not ancestor of HEAD) +# ======================================== +echo "" +echo "Test 3b: Valid but unrelated BASE_COMMIT (orphan branch)" + +# Create an orphan branch with its own commit, then switch back +cd "$TEST_DIR/repo" +ORIG_BRANCH=$(git rev-parse --abbrev-ref HEAD) +git checkout -q --orphan orphan-test +echo "orphan" > orphan.txt && git add orphan.txt && git commit -q -m "orphan commit" +ORPHAN_COMMIT=$(git rev-parse HEAD) +git checkout -q "$ORIG_BRANCH" +cd - > /dev/null + +# ORPHAN_COMMIT exists but is NOT an ancestor of HEAD +if [[ -n "$ORPHAN_COMMIT" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$ORPHAN_COMMIT" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Unrelated valid commit triggers annotation" +else + fail "Unrelated valid commit annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Test 4: Missing template - fallback renders full section +# ======================================== +echo "" +echo "Test 4: Missing template fallback renders full section" + +CURRENT_ROUND=2 +COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse "$BASE_COMMIT"..HEAD 2>/dev/null | tail -80) + +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done + +# Use the exact fallback format from the stop hook +COMMIT_HISTORY_SECTION_FALLBACK="## Development History (Integral Context) +\`\`\` +${COMMIT_HISTORY} +\`\`\` +### Recent Round Files +Read these files before conducting your review to understand the trajectory of work: +${RECENT_ROUND_FILES}" + +# Point to a non-existent template to force fallback +RESULT=$(load_and_render_safe "$TEMPLATE_DIR" "codex/non-existent-template.md" "$COMMIT_HISTORY_SECTION_FALLBACK" \ + "COMMIT_HISTORY=$COMMIT_HISTORY" \ + "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") + +FALLBACK_OK=true +echo "$RESULT" | grep -q "Development History" || FALLBACK_OK=false +echo "$RESULT" | grep -q "feat: add feature 1" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Recent Round Files" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-1-summary.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "round-0-review-result.md" || FALLBACK_OK=false +echo "$RESULT" | grep -q "Read these files" || FALLBACK_OK=false + +if [[ "$FALLBACK_OK" == "true" ]]; then + pass "Fallback renders full section with commits, round files, and directive" +else + fail "Fallback full section" "commits + round files + directive" "$RESULT" +fi + +# ======================================== +# Test 5: Round 1 - only 1 prior round (boundary) +# ======================================== +echo "" +echo "Test 5: Round 1 - only 1 prior round" + +CURRENT_ROUND=1 +RECENT_ROUND_FILES="" +for (( r = CURRENT_ROUND - 1; r >= 0 && r >= CURRENT_ROUND - 3; r-- )); do + RECENT_ROUND_FILES+="- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-summary.md +- @.humanize/rlcr/${LOOP_TIMESTAMP}/round-${r}-review-result.md +" +done +[[ -z "$RECENT_ROUND_FILES" ]] && RECENT_ROUND_FILES="(first round, no prior history)" + +if echo "$RECENT_ROUND_FILES" | grep -q "round-0-summary.md" && \ + ! echo "$RECENT_ROUND_FILES" | grep -q "round-1-"; then + pass "Round 1 references only round 0" +else + fail "Round 1 boundary" "only round-0 references" "$RECENT_ROUND_FILES" +fi + +# ======================================== +# Test 6: Empty BASE_COMMIT (legacy loop) +# ======================================== +echo "" +echo "Test 6: Empty BASE_COMMIT fallback" + +EMPTY_BASE="" +if [[ -n "$EMPTY_BASE" ]] && git -C "$TEST_DIR/repo" merge-base --is-ancestor "$EMPTY_BASE" HEAD 2>/dev/null; then + COMMIT_HISTORY="should not reach here" +else + COMMIT_HISTORY=$(git -C "$TEST_DIR/repo" log --oneline --no-decorate --reverse -30 2>/dev/null) + [[ -n "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(base commit unavailable, showing recent branch commits) +${COMMIT_HISTORY}" +fi +[[ -z "$COMMIT_HISTORY" ]] && COMMIT_HISTORY="(no commits yet)" + +if echo "$COMMIT_HISTORY" | grep -q "base commit unavailable"; then + pass "Empty BASE_COMMIT triggers annotation" +else + fail "Empty BASE_COMMIT annotation" "base commit unavailable" "$COMMIT_HISTORY" +fi + +# ======================================== +# Summary +# ======================================== +print_test_summary "Commit History Section (I Component) Tests"