From f05e5df13c79e37ca2f60c25bf007d4f8bdc95e9 Mon Sep 17 00:00:00 2001 From: Paul Duvall Date: Sun, 30 Nov 2025 14:52:22 -0500 Subject: [PATCH] feat: add Test Promotion pattern with explicit threat model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new experimental pattern addressing the "self-grading student" problem where AI can weaken tests to pass buggy code. Pattern Features: - Immutable golden tests (tests/golden/) vs mutable generated tests - Defense-in-depth enforcement with explicit threat model - CI/CD git diff detection as primary enforcement - CODEOWNERS as final human approval gate Key Insight: File permissions (444) alone are INSUFFICIENT - AI with bash access can bypass using chmod. The pattern explicitly documents this limitation and relies on CI/CD + CODEOWNERS as primary enforcement mechanisms. Complete Implementation: - Pattern documentation in experiments/README.md - Working example in experiments/examples/test-promotion/ - Promotion workflow scripts with quality checklist - CI/CD enforcement blocking golden test modifications - AI hooks providing defense-in-depth - CODEOWNERS requiring human approval - THREAT-MODEL.md documenting all attack vectors Anti-patterns: - Mutable Baselines: Allowing AI to modify tests - Permission-Only Protection: Relying solely on file permissions Related Patterns: Testing Orchestration, Spec-Driven Development, Suite Health πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- experiments/README.md | 135 ++++++++++ .../.ai/hooks/protect-golden.sh | 31 +++ .../test-promotion/.github/CODEOWNERS | 17 ++ .../.github/workflows/test-protection.yml | 97 +++++++ experiments/examples/test-promotion/README.md | 249 ++++++++++++++++++ .../examples/test-promotion/THREAT-MODEL.md | 174 ++++++++++++ .../examples/test-promotion/conftest.py | 14 + .../examples/test-promotion/requirements.txt | 2 + .../scripts/enforce-permissions.sh | 20 ++ .../test-promotion/scripts/promote-test.sh | 129 +++++++++ .../examples/test-promotion/src/__init__.py | 1 + .../examples/test-promotion/src/payment.py | 58 ++++ .../tests/golden/test_payment.py | 41 +++ 13 files changed, 968 insertions(+) create mode 100755 experiments/examples/test-promotion/.ai/hooks/protect-golden.sh create mode 100644 experiments/examples/test-promotion/.github/CODEOWNERS create mode 100644 experiments/examples/test-promotion/.github/workflows/test-protection.yml create mode 100644 experiments/examples/test-promotion/README.md create mode 100644 experiments/examples/test-promotion/THREAT-MODEL.md create mode 100644 experiments/examples/test-promotion/conftest.py create mode 100644 experiments/examples/test-promotion/requirements.txt create mode 100755 experiments/examples/test-promotion/scripts/enforce-permissions.sh create mode 100755 experiments/examples/test-promotion/scripts/promote-test.sh create mode 100644 experiments/examples/test-promotion/src/__init__.py create mode 100644 experiments/examples/test-promotion/src/payment.py create mode 100644 experiments/examples/test-promotion/tests/golden/test_payment.py diff --git a/experiments/README.md b/experiments/README.md index 3fa1965..5d3a47c 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -25,6 +25,7 @@ These experimental patterns extend the core AI development patterns with advance | **[Release Synthesis](#release-synthesis)** | Beginner | Operations | Automatically generate structured release notes by analyzing git commit history | Pipeline Synthesis | | **[Incident Automation](#incident-automation)** | Advanced | Operations | Generate actionable incident response playbooks from historical incident data | Baseline Management | | **[Suite Health](#suite-health)** | Intermediate | Operations | Analyze build history to identify and remediate flaky tests automatically | Testing Orchestration | +| **[Test Promotion](#test-promotion)** | Intermediate | Development | Separate AI-generated tests from immutable golden tests to prevent AI from weakening test assertions | Testing Orchestration, Spec-Driven Development | | **[Upgrade Advisor](#upgrade-advisor)** | Intermediate | Operations | Intelligently manage dependency upgrades with compatibility analysis and risk assessment | Debt Forecasting | | **[Handoff Automation](#handoff-automation)** | Intermediate | Operations | Generate comprehensive handoff briefs that summarize system state and active issues | Incident Automation | | **[Chaos Engineering](#chaos-engineering)** | Advanced | Operations | Generate targeted chaos experiments based on system architecture and dependencies | Baseline Management | @@ -1325,6 +1326,140 @@ Accepting unreliable tests as normal instead of systematically identifying and f --- +### Test Promotion + +**Maturity**: Intermediate +**Description**: Separate AI-generated tests from immutable golden tests to prevent AI from weakening test assertions, with human-approved promotion ensuring only validated tests become behavioral contracts. + +**Related Patterns**: [Testing Orchestration](#testing-orchestration), [Spec-Driven Development](../README.md#spec-driven-development), [Suite Health](#suite-health) + +**Core Problem** + +When AI generates both code AND tests, it can make tests pass by weakening themβ€”the "self-grading student" problem. This applies to all AI code generation: new features, bug fixes, refactoring, or any implementation task. + +**Test Separation Architecture** + +``` +tests/ +β”œβ”€β”€ golden/ # Immutable (444 permissions) - AI blocked +β”‚ β”œβ”€β”€ auth/ +β”‚ β”‚ └── test_jwt_validation.py +β”‚ └── api/ +β”‚ └── test_payment.py +└── generated/ # Mutable - AI can freely generate/modify + β”œβ”€β”€ test_edge_cases.py + └── test_new_feature.py +``` + +**Defense-in-Depth Enforcement** + +The pattern uses multiple enforcement layers because **file permissions alone are insufficient** - AI with bash access could bypass them with `chmod`. + +```bash +# Layer 1: File permissions (prevents accidental edits) +chmod 444 tests/golden/**/*.py +# ⚠️ NOT SUFFICIENT: AI can run "chmod 644" via Bash to bypass + +# Layer 2: AI hooks (blocks Edit/Write tools) +# .ai/hooks/protect-golden.sh +[[ "$TOOL_INPUT_FILE_PATH" =~ ^tests/golden/ ]] && exit 2 # BLOCK +# ⚠️ NOT SUFFICIENT: AI can still modify via Bash commands + +# Layer 3: CI/CD enforcement (detects ANY git diff) +git diff --name-only origin/main...HEAD | grep '^tests/golden/' && { + echo "❌ BLOCKED: Golden tests cannot be modified" + exit 1 +} +# βœ… RELIABLE: Catches all modifications regardless of method + +# Layer 4: CODEOWNERS (requires human approval) +# .github/CODEOWNERS +tests/golden/** @tech-leads @qa-leads +# βœ… RELIABLE: Human gate prevents merge even if AI commits changes +``` + +**Threat Model:** +- **Accidental Edit**: Blocked by file permissions (444) +- **AI Edit/Write Tool**: Blocked by AI hooks +- **AI Bash Bypass**: Detected by CI/CD git diff check +- **Committed Changes**: Blocked by CODEOWNERS requiring human approval + +**Primary Enforcement**: CI/CD + CODEOWNERS, not file permissions. + +**Promotion Workflow** + +```bash +# AI generates test freely in tests/generated/ +ai "Write payment idempotency test in tests/generated/test_payment.py" + +# Human reviews and promotes +./scripts/promote-test.sh tests/generated/test_payment.py +# β†’ Runs pytest validation +# β†’ Interactive quality checklist +# β†’ Copies to tests/golden/ with 444 permissions +# β†’ Creates promotion PR requiring 2+ approvals +``` + +**Example: Golden Test Protection** + +```python +# AI generates test freely +# tests/generated/test_new_feature.py +def test_payment_idempotency(): + """Payment processing should prevent duplicate charges.""" + process_payment(id="123", amount=100) + with pytest.raises(DuplicateTransactionError): + process_payment(id="123", amount=100) + +# Human reviews β†’ promotes to golden +# tests/golden/test_payment.py (444 perms, AI blocked) +``` + +**Complete Implementation** + +See [examples/test-promotion/](examples/test-promotion/) for: +- Complete promotion workflow scripts +- CI/CD enforcement configuration +- AI protection hooks +- Example application demonstrating the pattern + +**Anti-pattern: Mutable Baselines** + +Allowing AI to modify existing tests to make its code pass, removing critical assertions. + +```python +# BEFORE (correct test): +def test_payment_idempotency(): + process_payment(id="123", amount=100) + with pytest.raises(DuplicateTransactionError): + process_payment(id="123", amount=100) + +# AFTER AI weakens test to pass buggy code: +def test_payment_idempotency(): + process_payment(id="123", amount=100) + process_payment(id="123", amount=100) # No error check! + # BUG: Allows double-charging customers in production +``` + +Without immutable golden tests, AI can weaken assertions to make failing tests pass, eliminating regression protection. + +**Anti-pattern: Permission-Only Protection** + +Relying solely on file permissions (444) without CI/CD enforcement. + +```bash +# INSUFFICIENT: AI can bypass via Bash +chmod 444 tests/golden/** # AI runs: chmod 644 && edit && chmod 444 + +# REQUIRED: CI/CD + CODEOWNERS as primary enforcement +git diff tests/golden/ β†’ CI blocks merge +tests/golden/** β†’ CODEOWNERS requires human approval +``` + +File permissions provide defense-in-depth but are not sufficient alone. CI/CD git diff detection and CODEOWNERS are the primary enforcement mechanisms. + +--- + ### Upgrade Advisor **Maturity**: Intermediate diff --git a/experiments/examples/test-promotion/.ai/hooks/protect-golden.sh b/experiments/examples/test-promotion/.ai/hooks/protect-golden.sh new file mode 100755 index 0000000..eb21b17 --- /dev/null +++ b/experiments/examples/test-promotion/.ai/hooks/protect-golden.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# AI Protection Hook for Golden Tests +# Blocks AI tools from modifying immutable golden tests + +# This hook executes before Edit/Write tool use +# Exit code 0 = ALLOW +# Exit code 2 = BLOCK + +FILE="$TOOL_INPUT_FILE_PATH" +TOOL="$TOOL_NAME" + +# Block any Edit or Write operations on tests/golden/** +if [[ "$FILE" =~ ^tests/golden/ ]] && [[ "$TOOL" =~ (Edit|Write) ]]; then + echo "❌ BLOCKED: Golden tests are immutable" + echo "" + echo " File: $FILE" + echo " Tool: $TOOL" + echo "" + echo "Golden tests are read-only behavioral contracts." + echo "AI cannot modify these files to prevent weakening assertions." + echo "" + echo "Instead:" + echo " 1. Create test in tests/generated/$( basename "$FILE")" + echo " 2. Run and validate the test" + echo " 3. Ask human to promote: ./scripts/promote-test.sh tests/generated/$(basename "$FILE")" + echo "" + exit 2 # BLOCK +fi + +# Allow all other operations +exit 0 # ALLOW diff --git a/experiments/examples/test-promotion/.github/CODEOWNERS b/experiments/examples/test-promotion/.github/CODEOWNERS new file mode 100644 index 0000000..e15a75c --- /dev/null +++ b/experiments/examples/test-promotion/.github/CODEOWNERS @@ -0,0 +1,17 @@ +# Golden Test Protection via CODEOWNERS +# +# This is the PRIMARY enforcement mechanism for immutable golden tests. +# File permissions (444) and AI hooks provide defense-in-depth but can be +# bypassed by AI using bash commands like chmod. +# +# CODEOWNERS ensures that ANY modification to tests/golden/** requires +# explicit human approval, regardless of how the change was made. + +# Golden tests require approval from tech leads and QA leads +tests/golden/** @tech-leads @qa-leads + +# Promotion workflow changes also require approval +scripts/promote-test.sh @tech-leads + +# Note: Replace @tech-leads and @qa-leads with actual GitHub team names +# or individual usernames (e.g., @alice @bob) diff --git a/experiments/examples/test-promotion/.github/workflows/test-protection.yml b/experiments/examples/test-promotion/.github/workflows/test-protection.yml new file mode 100644 index 0000000..9154ab2 --- /dev/null +++ b/experiments/examples/test-promotion/.github/workflows/test-protection.yml @@ -0,0 +1,97 @@ +name: Golden Test Protection + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - main + - master + +jobs: + protect-golden-tests: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for diff + + - name: Detect golden test modifications + run: | + # Get list of modified files in this PR/push + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + else + BASE="${{ github.event.before }}" + HEAD="${{ github.sha }}" + fi + + echo "Checking for modifications to tests/golden/..." + MODIFIED_GOLDEN=$(git diff --name-only "$BASE...$HEAD" | grep '^tests/golden/' || true) + + if [ -n "$MODIFIED_GOLDEN" ]; then + echo "❌ BLOCKED: Golden tests cannot be modified directly" + echo "" + echo "Modified golden tests:" + echo "$MODIFIED_GOLDEN" + echo "" + echo "Golden tests are immutable behavioral contracts." + echo "To update tests, use the promotion workflow:" + echo " 1. Create/modify test in tests/generated/" + echo " 2. Run: ./scripts/promote-test.sh tests/generated/" + echo " 3. Create PR with 'test-promotion' label" + echo " 4. Require 2+ approvals" + echo "" + echo "For test removal or modification, consult team lead." + exit 1 + fi + + echo "βœ… No golden test modifications detected" + + - name: Validate promotion PRs + if: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'test-promotion') + run: | + echo "πŸ” Validating test promotion PR..." + + # Check that ONLY golden tests were added (not modified) + ADDED=$(git diff --name-status "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}" | grep '^A' | grep 'tests/golden/' || true) + MODIFIED=$(git diff --name-status "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}" | grep '^M' | grep 'tests/golden/' || true) + + if [ -n "$MODIFIED" ]; then + echo "❌ Promotion PRs should only ADD tests, not MODIFY" + echo "Modified files:" + echo "$MODIFIED" + exit 1 + fi + + if [ -z "$ADDED" ]; then + echo "⚠️ No tests added in promotion PR" + else + echo "βœ… Test promotion validated:" + echo "$ADDED" + fi + + - name: Check golden test permissions + run: | + echo "πŸ” Verifying golden test permissions..." + + # Check that all golden tests have 444 permissions + INCORRECT_PERMS=0 + while IFS= read -r -d '' file; do + PERMS=$(stat -f "%OLp" "$file" 2>/dev/null || stat -c "%a" "$file" 2>/dev/null) + if [ "$PERMS" != "444" ]; then + echo "❌ Incorrect permissions: $file ($PERMS, should be 444)" + ((INCORRECT_PERMS++)) + fi + done < <(find tests/golden -type f -name "*.py" -print0) + + if [ "$INCORRECT_PERMS" -gt 0 ]; then + echo "" + echo "Run: ./scripts/enforce-permissions.sh" + exit 1 + fi + + echo "βœ… All golden tests have correct permissions (444)" diff --git a/experiments/examples/test-promotion/README.md b/experiments/examples/test-promotion/README.md new file mode 100644 index 0000000..bd4221c --- /dev/null +++ b/experiments/examples/test-promotion/README.md @@ -0,0 +1,249 @@ +# Test Promotion Example + +Complete implementation of the Test Promotion pattern demonstrating how to prevent AI from weakening test assertions through immutable golden tests and human-approved promotion workflow. + +## Quick Start + +```bash +# 1. Set up permissions +./scripts/enforce-permissions.sh + +# 2. AI generates test in mutable area +# tests/generated/test_example.py + +# 3. Human promotes validated test to golden +./scripts/promote-test.sh tests/generated/test_example.py +``` + +## Directory Structure + +``` +tests/ +β”œβ”€β”€ golden/ # Immutable (444 permissions) +β”‚ β”œβ”€β”€ auth/ # AI cannot modify these +β”‚ └── api/ # Human approval required +└── generated/ # Mutable + β”œβ”€β”€ test_*.py # AI can freely generate/modify + └── README.md # Auto-generated test index +``` + +## The Problem: Self-Grading Student + +When AI generates both code AND tests, it can make tests pass by weakening assertions: + +```python +# AI writes buggy code that allows duplicate payments +def process_payment(txn_id, amount): + # BUG: Missing idempotency check + return {"status": "success", "amount": amount} + +# AI weakens test to make it pass +def test_payment_idempotency(): + process_payment("123", 100) + process_payment("123", 100) # Should raise error! + # Missing assertion - customers charged twice! +``` + +## The Solution: Test Promotion + +**Golden tests are immutable** - AI physically cannot modify them: +- File permissions: `444` (read-only) +- CI/CD blocks: Rejects PRs that modify golden tests +- AI hooks: Blocks Edit/Write tools on golden paths + +**Generated tests are mutable** - AI can experiment freely: +- AI generates tests in `tests/generated/` +- Human reviews and validates +- Promotion script elevates to golden status + +## Usage + +### 1. AI Generates Test + +```bash +# AI writes test in generated/ directory +ai "Create test for payment idempotency in tests/generated/test_payment.py" +``` + +Result: `tests/generated/test_payment.py` (permissions: 644, mutable) + +### 2. Run and Validate + +```bash +# Run the generated test +pytest tests/generated/test_payment.py -v + +# Review test quality +cat tests/generated/test_payment.py +``` + +### 3. Promote to Golden + +```bash +# Promote validated test +./scripts/promote-test.sh tests/generated/test_payment.py + +# Interactive checklist: +# βœ“ Does this test capture critical behavior? +# βœ“ Is the test stable (not flaky)? +# βœ“ Does it have clear assertions? +# βœ“ Is it properly documented? +``` + +Result: Test copied to `tests/golden/test_payment.py` with 444 permissions + +### 4. CI Protection + +```bash +# Try to modify golden test (will fail) +echo "# comment" >> tests/golden/test_payment.py +git add tests/golden/test_payment.py +git commit -m "modify golden test" + +# CI blocks the commit: +# ❌ BLOCKED: Golden tests cannot be modified +# Use: ./scripts/promote-test.sh +``` + +## Enforcement Mechanisms (Defense-in-Depth) + +**IMPORTANT**: File permissions alone are **NOT SUFFICIENT**. AI with bash access can bypass them using `chmod`. The pattern uses multiple layers: + +### Layer 1: File Permissions (444) - Prevents Accidental Edits + +```bash +# Set read-only permissions on all golden tests +chmod 444 tests/golden/**/*.py + +# ⚠️ WARNING: Not sufficient alone! +# AI can bypass: chmod 644 tests/golden/test.py && edit && chmod 444 +``` + +**Purpose**: Prevents accidental modifications, provides visual indicator of immutability. +**Limitation**: Can be bypassed via Bash commands. + +### Layer 2: AI Hooks - Blocks Edit/Write Tools + +See `.ai/hooks/protect-golden.sh`: +- Executes before Edit/Write tool use +- Blocks operations on `tests/golden/**` paths +- Returns exit code 2 (BLOCK) with helpful message + +**Purpose**: Blocks AI coding assistants from using Edit/Write tools on golden tests. +**Limitation**: AI can still modify files via Bash (chmod + cat/echo/sed). + +### Layer 3: CI/CD Protection - **PRIMARY ENFORCEMENT** + +See `.github/workflows/test-protection.yml`: +- Detects **ANY** modifications to `tests/golden/**` via git diff +- Blocks PR merge if golden tests changed +- Catches modifications regardless of method (Edit, Write, Bash, manual) + +**Purpose**: Detect all golden test modifications before merge. +**Reliability**: βœ… Catches all changes regardless of bypass method. + +### Layer 4: CODEOWNERS - **FINAL GATE** + +See `.github/CODEOWNERS`: +``` +tests/golden/** @tech-leads @qa-leads +``` + +- Requires explicit human approval for ANY changes to golden tests +- Even if AI commits changes, PR cannot merge without approval +- Provides human review before behavioral contracts change + +**Purpose**: Human gate prevents unauthorized changes from reaching main branch. +**Reliability**: βœ… Requires human decision, cannot be automated away. + +### Threat Model Coverage + +| Attack Vector | Blocked By | Reliability | +|---------------|-----------|-------------| +| Accidental edit | File permissions (444) | Medium | +| AI Edit/Write tool | AI hooks | Medium | +| AI Bash bypass (chmod) | CI/CD git diff | **High** | +| Committed changes | CODEOWNERS approval | **High** | + +**Bottom Line**: CI/CD + CODEOWNERS are the real enforcement. File permissions and AI hooks provide defense-in-depth but can be bypassed. + +## Promotion Workflow + +```mermaid +graph LR + A[AI: Generate Test] --> B[tests/generated/] + B --> C{pytest passes?} + C -->|No| D[Fix Test] + D --> B + C -->|Yes| E[Human Review] + E --> F{Quality OK?} + F -->|No| G[Iterate] + G --> B + F -->|Yes| H[promote-test.sh] + H --> I[tests/golden/] + I --> J[chmod 444] + + style B fill:#FFD700 + style I fill:#90EE90 +``` + +## Example Tests Included + +### Golden Tests (Immutable) +- `tests/golden/auth/test_jwt_validation.py` - JWT signature validation +- `tests/golden/api/test_payment.py` - Payment idempotency + +### Generated Tests (Mutable) +- `tests/generated/test_edge_cases.py` - AI-generated edge case tests +- `tests/generated/test_performance.py` - Performance boundary tests + +## Running the Example + +```bash +# Run all tests +pytest tests/ -v + +# Run only golden tests (immutable baseline) +pytest tests/golden/ -v + +# Run only generated tests (AI experiments) +pytest tests/generated/ -v + +# Try to modify golden test (will fail) +echo "# test" >> tests/golden/api/test_payment.py +# Permission denied (444 permissions) +``` + +## Key Benefits + +1. **Prevents Self-Grading** - AI cannot weaken tests to pass buggy code +2. **Enables AI Experimentation** - AI freely generates tests in `generated/` +3. **Human Quality Gate** - Only validated tests become golden +4. **Audit Trail** - Promotion workflow tracked in git +5. **Defense in Depth** - Permissions + CI + AI hooks + +## Integration with Other Patterns + +- **Testing Orchestration**: Golden tests anchor the test suite +- **Spec-Driven Development**: Golden tests derive from specifications +- **Suite Health**: Monitor golden test stability over time + +## Troubleshooting + +### "Permission denied" on golden test +βœ“ **Expected behavior** - Golden tests are read-only (444) +β†’ Use promotion workflow to update + +### CI blocks my PR +βœ“ **Expected behavior** - Golden tests cannot be modified directly +β†’ Create new test in `generated/`, then promote + +### AI cannot write to `tests/golden/` +βœ“ **Expected behavior** - AI hooks block golden modification +β†’ AI should use `tests/generated/` instead + +## References + +- [Test Promotion Pattern](../../README.md#test-promotion) +- [Testing Orchestration](../../README.md#testing-orchestration) +- [Spec-Driven Development](../../../README.md#spec-driven-development) diff --git a/experiments/examples/test-promotion/THREAT-MODEL.md b/experiments/examples/test-promotion/THREAT-MODEL.md new file mode 100644 index 0000000..2e9491c --- /dev/null +++ b/experiments/examples/test-promotion/THREAT-MODEL.md @@ -0,0 +1,174 @@ +# Test Promotion Threat Model + +## Attack Vectors and Mitigations + +### Attack Vector 1: AI Weakens Test Assertions + +**Threat**: AI generates buggy code, then weakens test to make it pass (self-grading student). + +```python +# AI writes buggy code +def process_payment(id, amount): + return {"status": "success"} # BUG: No idempotency check + +# AI weakens test to pass +def test_payment_idempotency(): + process_payment("123", 100) + process_payment("123", 100) # Missing duplicate check! +``` + +**Mitigation**: Golden tests are immutable - AI cannot modify them. +**Enforcement**: CI/CD detects changes, CODEOWNERS requires approval. + +--- + +### Attack Vector 2: AI Uses Edit/Write Tools on Golden Tests + +**Threat**: AI attempts to modify golden test directly using Edit or Write tools. + +```python +# AI tries: +# Edit(file="tests/golden/test_payment.py", ...) +``` + +**Mitigation**: AI hooks block Edit/Write tools on `tests/golden/**` paths. +**Enforcement**: `.ai/hooks/protect-golden.sh` returns exit code 2 (BLOCK). +**Reliability**: βœ… High (blocks tool before execution) + +--- + +### Attack Vector 3: AI Bypasses Permissions via Bash + +**Threat**: AI uses Bash to bypass file permissions, modifies golden test, restores permissions. + +```bash +# AI could run: +chmod 644 tests/golden/test_payment.py +echo "# weakened" >> tests/golden/test_payment.py +chmod 444 tests/golden/test_payment.py +``` + +**Why File Permissions Alone Fail**: AI with bash access can change permissions. + +**Mitigation**: CI/CD detects **ANY** modification via git diff. +**Enforcement**: `.github/workflows/test-protection.yml` checks: +```yaml +git diff --name-only | grep '^tests/golden/' && exit 1 +``` +**Reliability**: βœ… High (catches all git changes regardless of method) + +--- + +### Attack Vector 4: AI Commits Modified Golden Tests + +**Threat**: AI successfully modifies and commits golden test changes. + +**Mitigation**: CODEOWNERS requires human approval before merge. +**Enforcement**: `.github/CODEOWNERS`: +``` +tests/golden/** @tech-leads @qa-leads +``` +**Reliability**: βœ… Very High (human gate cannot be automated away) + +--- + +### Attack Vector 5: AI Deletes Golden Tests + +**Threat**: AI deletes golden test file to remove failing assertions. + +```bash +rm tests/golden/test_payment.py +``` + +**Mitigation**: CI/CD detects deletion in git diff, CODEOWNERS requires approval. +**Enforcement**: Same as modification (git diff + CODEOWNERS). +**Reliability**: βœ… High + +--- + +## Defense Layers Summary + +| Layer | Purpose | Blocks | Reliability | Can Be Bypassed? | +|-------|---------|--------|-------------|------------------| +| 1. File Permissions (444) | Prevent accidental edits | Direct file writes | Medium | βœ… Yes (chmod) | +| 2. AI Hooks | Block Edit/Write tools | Tool-based edits | Medium | βœ… Yes (Bash) | +| 3. CI/CD Git Diff | Detect all modifications | Any git change | **High** | ❌ No | +| 4. CODEOWNERS | Human approval gate | Merge to main | **Very High** | ❌ No | + +## Primary vs. Defense-in-Depth + +**Primary Enforcement** (Cannot be bypassed): +- βœ… **CI/CD git diff detection** - Catches all changes +- βœ… **CODEOWNERS approval** - Human gate + +**Defense-in-Depth** (Reduces attack surface): +- File permissions (444) - Prevents accidents +- AI hooks - Makes intent explicit + +## What If AI Becomes More Sophisticated? + +**Scenario**: AI learns to hide changes from git diff. + +**Answer**: Not possible. Git diff operates at filesystem level, comparing tree objects. AI cannot modify git's internal state without access to `.git/` directory, which should be blocked by repository permissions. + +**Scenario**: AI social engineers human approvers. + +**Answer**: Out of scope for technical controls. This is a human process issue requiring: +- Review checklist enforcement +- Multiple approvers (2+ required) +- Separation of duties (AI cannot approve own PRs) + +## Security Principle + +The pattern follows **defense-in-depth**: +- Multiple independent layers +- Failure of one layer doesn't compromise security +- Primary enforcement (CI/CD + CODEOWNERS) is robust +- Permissions and hooks make attacks explicit and detectable + +## Recommended Configuration + +```bash +# Minimum viable protection +βœ… CI/CD git diff check (required) +βœ… CODEOWNERS approval (required) + +# Defense-in-depth +βœ… File permissions 444 (recommended) +βœ… AI hooks (recommended) +βœ… Branch protection rules (recommended) +``` + +## Monitoring and Alerts + +Track attempted bypasses: +```yaml +# Alert on promotion workflow usage +- name: Track Test Promotions + if: contains(github.event.pull_request.labels.*.name, 'test-promotion') + run: | + echo "Test promotion attempted by: ${{ github.actor }}" + # Send to monitoring system +``` + +## Testing the Threat Model + +Verify enforcement by attempting each attack vector: + +```bash +# Test 1: Try to edit golden test directly +echo "test" >> tests/golden/test_payment.py +# Expected: Permission denied (444) + +# Test 2: Try to use chmod bypass +chmod 644 tests/golden/test_payment.py && echo "test" >> tests/golden/test_payment.py +git add tests/golden/test_payment.py +git push +# Expected: CI blocks PR + +# Test 3: Try to merge without approval +# Create PR with golden test changes +# Expected: CODEOWNERS blocks merge +``` + +All attack vectors should be blocked by at least one enforcement layer. diff --git a/experiments/examples/test-promotion/conftest.py b/experiments/examples/test-promotion/conftest.py new file mode 100644 index 0000000..ea90769 --- /dev/null +++ b/experiments/examples/test-promotion/conftest.py @@ -0,0 +1,14 @@ +""" +pytest configuration for Test Promotion example +""" + +import pytest +from src.payment import reset_transactions + + +@pytest.fixture(autouse=True) +def reset_payment_state(): + """Reset payment state before each test.""" + reset_transactions() + yield + reset_transactions() diff --git a/experiments/examples/test-promotion/requirements.txt b/experiments/examples/test-promotion/requirements.txt new file mode 100644 index 0000000..d14e63a --- /dev/null +++ b/experiments/examples/test-promotion/requirements.txt @@ -0,0 +1,2 @@ +pytest>=7.4.0 +pytest-cov>=4.1.0 diff --git a/experiments/examples/test-promotion/scripts/enforce-permissions.sh b/experiments/examples/test-promotion/scripts/enforce-permissions.sh new file mode 100755 index 0000000..ce51772 --- /dev/null +++ b/experiments/examples/test-promotion/scripts/enforce-permissions.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -euo pipefail + +# Enforce Golden Test Permissions +# Sets all golden tests to read-only (444) + +echo "πŸ”’ Enforcing golden test permissions..." + +# Find all Python test files in tests/golden/ and set to 444 +count=0 +while IFS= read -r -d '' file; do + chmod 444 "$file" + echo " βœ“ $file β†’ 444 (read-only)" + ((count++)) +done < <(find tests/golden -type f -name "*.py" -print0) + +echo "" +echo "βœ… Protected $count golden test(s)" +echo " AI cannot modify these files" +echo " Human edits require promotion workflow" diff --git a/experiments/examples/test-promotion/scripts/promote-test.sh b/experiments/examples/test-promotion/scripts/promote-test.sh new file mode 100755 index 0000000..016bff7 --- /dev/null +++ b/experiments/examples/test-promotion/scripts/promote-test.sh @@ -0,0 +1,129 @@ +#!/bin/bash +set -euo pipefail + +# Test Promotion Script +# Promotes AI-generated tests to immutable golden status + +GENERATED_TEST="$1" + +if [[ -z "$GENERATED_TEST" ]]; then + echo "Usage: ./scripts/promote-test.sh " + echo "Example: ./scripts/promote-test.sh tests/generated/test_payment.py" + exit 1 +fi + +if [[ ! -f "$GENERATED_TEST" ]]; then + echo "❌ Error: Test file not found: $GENERATED_TEST" + exit 1 +fi + +if [[ ! "$GENERATED_TEST" =~ ^tests/generated/ ]]; then + echo "❌ Error: Can only promote tests from tests/generated/" + echo " Got: $GENERATED_TEST" + exit 1 +fi + +echo "πŸ” Test Promotion Workflow" +echo " Source: $GENERATED_TEST" +echo "" + +# Step 1: Validate test passes +echo "Step 1: Validating test passes..." +if ! pytest "$GENERATED_TEST" -v; then + echo "❌ Test must pass before promotion" + exit 1 +fi +echo "βœ… Test passes" +echo "" + +# Step 2: Human quality checklist +echo "Step 2: Quality Review Checklist" +echo " Please review the test and answer the following:" +echo "" + +read -p " Does this test capture critical behavior? (y/n): " critical +if [[ "$critical" != "y" ]]; then + echo "❌ Promotion cancelled - test not critical" + exit 1 +fi + +read -p " Is the test stable (not flaky)? (y/n): " stable +if [[ "$stable" != "y" ]]; then + echo "❌ Promotion cancelled - test may be flaky" + exit 1 +fi + +read -p " Does it have clear, specific assertions? (y/n): " assertions +if [[ "$assertions" != "y" ]]; then + echo "❌ Promotion cancelled - assertions not clear" + exit 1 +fi + +read -p " Is it properly documented? (y/n): " documented +if [[ "$documented" != "y" ]]; then + echo "❌ Promotion cancelled - needs documentation" + exit 1 +fi + +echo "" +echo "βœ… Quality checks passed" +echo "" + +# Step 3: Determine golden path +BASENAME=$(basename "$GENERATED_TEST") +GOLDEN_PATH="tests/golden/${BASENAME}" + +# Check if golden test already exists +if [[ -f "$GOLDEN_PATH" ]]; then + echo "⚠️ Warning: Golden test already exists: $GOLDEN_PATH" + read -p " Overwrite? This requires special approval (y/n): " overwrite + if [[ "$overwrite" != "y" ]]; then + echo "❌ Promotion cancelled" + exit 1 + fi + # Remove read-only protection temporarily for overwrite + chmod 644 "$GOLDEN_PATH" +fi + +# Step 4: Copy to golden with read-only permissions +echo "Step 3: Copying to golden tests..." +cp "$GENERATED_TEST" "$GOLDEN_PATH" +chmod 444 "$GOLDEN_PATH" +echo "βœ… Test promoted to: $GOLDEN_PATH (444 permissions)" +echo "" + +# Step 5: Git operations +echo "Step 4: Creating promotion commit..." +git add "$GOLDEN_PATH" + +COMMIT_MSG="test-promotion: Promote ${BASENAME} to golden status + +Source: $GENERATED_TEST +Target: $GOLDEN_PATH +Reviewer: $(git config user.name) + +Checklist: +- [x] Test captures critical behavior +- [x] Test is stable (not flaky) +- [x] Clear, specific assertions +- [x] Properly documented" + +git commit -m "$COMMIT_MSG" +echo "βœ… Commit created" +echo "" + +# Step 6: Summary +echo "πŸ“‹ Promotion Summary" +echo " βœ“ Test validated and passing" +echo " βœ“ Quality checklist completed" +echo " βœ“ Promoted to: $GOLDEN_PATH" +echo " βœ“ Permissions set to 444 (read-only)" +echo " βœ“ Committed to git" +echo "" +echo "Next steps:" +echo " 1. Create PR: git push origin HEAD" +echo " 2. Request 2+ reviews from team" +echo " 3. Label PR with 'test-promotion'" +echo " 4. Merge after approval" +echo "" +echo "πŸŽ‰ Test promotion complete!" diff --git a/experiments/examples/test-promotion/src/__init__.py b/experiments/examples/test-promotion/src/__init__.py new file mode 100644 index 0000000..11fd3ea --- /dev/null +++ b/experiments/examples/test-promotion/src/__init__.py @@ -0,0 +1 @@ +"""Test Promotion Example Application""" diff --git a/experiments/examples/test-promotion/src/payment.py b/experiments/examples/test-promotion/src/payment.py new file mode 100644 index 0000000..db29c74 --- /dev/null +++ b/experiments/examples/test-promotion/src/payment.py @@ -0,0 +1,58 @@ +""" +Simple Payment Processing System + +Demonstrates the Test Promotion pattern with immutable golden tests +protecting critical business logic. +""" + +# In-memory transaction registry (simulates database) +_processed_transactions = set() + + +class DuplicateTransactionError(Exception): + """Raised when attempting to process a transaction that was already processed.""" + pass + + +def process_payment(txn_id: str, amount: float) -> dict: + """ + Process a payment transaction with idempotency protection. + + Args: + txn_id: Unique transaction identifier + amount: Payment amount (must be positive) + + Returns: + dict with status, txn_id, and amount + + Raises: + ValueError: If txn_id is empty or amount is not positive + DuplicateTransactionError: If transaction was already processed + """ + # Validate transaction ID + if not txn_id or not isinstance(txn_id, str) or len(txn_id.strip()) == 0: + raise ValueError("Transaction ID must be a non-empty string") + + # Validate amount + if not isinstance(amount, (int, float)) or amount <= 0: + raise ValueError("Amount must be a positive number") + + # Check for duplicate transaction + if txn_id in _processed_transactions: + raise DuplicateTransactionError( + f"Transaction {txn_id} was already processed" + ) + + # Process payment + _processed_transactions.add(txn_id) + + return { + "status": "success", + "txn_id": txn_id, + "amount": amount + } + + +def reset_transactions(): + """Reset transaction registry (for testing only).""" + _processed_transactions.clear() diff --git a/experiments/examples/test-promotion/tests/golden/test_payment.py b/experiments/examples/test-promotion/tests/golden/test_payment.py new file mode 100644 index 0000000..f526524 --- /dev/null +++ b/experiments/examples/test-promotion/tests/golden/test_payment.py @@ -0,0 +1,41 @@ +""" +Golden Test: Payment Idempotency + +This test is IMMUTABLE (444 permissions). +AI cannot modify this file - it serves as a behavioral contract. + +Tests that duplicate payment transactions are properly rejected. +""" + +import pytest +from src.payment import process_payment, DuplicateTransactionError + + +def test_payment_idempotency(): + """Payment processing MUST prevent duplicate charges for same transaction ID.""" + # First payment should succeed + result = process_payment(txn_id="TXN-123", amount=100.00) + assert result["status"] == "success" + assert result["amount"] == 100.00 + + # Duplicate payment MUST raise error + with pytest.raises(DuplicateTransactionError) as exc_info: + process_payment(txn_id="TXN-123", amount=100.00) + + assert "already processed" in str(exc_info.value).lower() + + +def test_payment_validation(): + """Payment MUST validate amount is positive.""" + with pytest.raises(ValueError) as exc_info: + process_payment(txn_id="TXN-456", amount=-50.00) + + assert "positive" in str(exc_info.value).lower() + + +def test_payment_requires_transaction_id(): + """Payment MUST require valid transaction ID.""" + with pytest.raises(ValueError) as exc_info: + process_payment(txn_id="", amount=100.00) + + assert "transaction id" in str(exc_info.value).lower()