Clean up red team system: remove redundancy and old interactive command

stared · claude · stared · commit ff9fc1ec7630 · 2025-08-17T23:34:08.000+02:00
- Remove duplicate attacker_model/target_model fields from AttackAttempt (already stored at session level) - Remove old 'interactive' CLI command (replaced by attack + sessions) - Update documentation to reflect new commands: * uv run attack - for running attacks * uv run sessions - for viewing/analyzing - Fix session JSON structure to avoid field repetition - Clarify that system supports any OpenRouter model, not just Claude The system is now cleaner with better separation of concerns between attacking and session management. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -67,17 +67,19 @@ uv run setup
 # Run tests
 uv run pentest
 
-# Run interactive red team testing
-uv run interactive  # Uses script entry point from pyproject.toml
+# Run red team attack testing
+uv run attack  # Uses script entry point from pyproject.toml
 
 # With different attacker models (default: Claude Opus 4.1)
-uv run interactive --attacker-model openai/gpt-4o
-uv run interactive --attacker-model meta-llama/llama-3.1-405b-instruct
-uv run interactive --attacker-model google/gemini-2.0-flash-exp
-uv run interactive --evaluator-model anthropic/claude-3-opus
+uv run attack --attacker-model openai/gpt-4o
+uv run attack --attacker-model meta-llama/llama-3.1-405b-instruct
+uv run attack --attacker-model google/gemini-2.0-flash-exp
 
-# Run example script directly
-uv run python test_interactive.py
+# Use custom prompts
+uv run attack --custom
+
+# View and analyze sessions
+uv run sessions
 
 # Code quality checks
 uv run ruff check src
diff --git a/README.md b/README.md
@@ -49,28 +49,31 @@ A comprehensive security testing framework for the OpenAI GPT-OSS-20B Red Teamin
    uv run pentest -c deception
    uv run pentest --test-id adderall_001
    
-   # Run interactive multi-turn attack testing
-   uv run interactive
+   # Run multi-turn attack testing
+   uv run attack
    
-   # With custom evaluator model (default: same as attacker)
-   uv run interactive --evaluator-model openai/gpt-4o
+   # View and analyze sessions
+   uv run sessions
    ```
 
-5. **Interactive Red Team Testing**:
+5. **Red Team Attack Testing**:
 
-   The interactive mode allows multi-turn attacks with various LLMs and learning capabilities:
+   The attack system allows multi-turn attacks with various LLMs and learning capabilities:
 
    ```bash
    # Run with default (Claude Opus 4.1)
-   uv run interactive
+   uv run attack
    
    # Try different attacker models
-   uv run interactive --attacker-model openai/gpt-4o
-   uv run interactive --attacker-model meta-llama/llama-3.1-405b-instruct
-   uv run interactive --attacker-model google/gemini-2.0-flash-exp
+   uv run attack --attacker-model openai/gpt-4o
+   uv run attack --attacker-model meta-llama/llama-3.1-405b-instruct
+   uv run attack --attacker-model google/gemini-2.0-flash-exp
    
-   # Use separate evaluator model
-   uv run interactive --evaluator-model anthropic/claude-3-opus
+   # Use custom prompts instead of AI-generated
+   uv run attack --custom
+   
+   # View and analyze past sessions
+   uv run sessions
    ```
 
    Features:
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,6 @@ review = "src.cli.review:main"
 findings = "src.cli.findings:main"
 report = "src.cli.report:main"
 help = "src.cli.help:main"
-interactive = "src.cli.interactive:main"
 attack = "src.cli.attack:main"
 sessions = "src.cli.sessions:main"
 
diff --git a/src/interactive_exploit.py b/src/interactive_exploit.py
@@ -276,8 +276,6 @@ def run_attack(
             strategy=strategy,
             steps=steps,
             turns=[],
-            target_model=self.target_model,
-            attacker_model=self.session.attacker_model,
             timestamp=datetime.now().isoformat(),
         )
 
diff --git a/src/interactive_exploit_v2.py b/src/interactive_exploit_v2.py
@@ -225,8 +225,6 @@ def run_interactive_attack(
             strategy=strategy or AttackStrategy.TRUST_BUILDING,
             steps=steps,
             turns=[],
-            target_model=self.target_model,
-            attacker_model=self.session.attacker_model,
             timestamp=datetime.now().isoformat(),
         )
         
@@ -385,8 +383,6 @@ def run_custom_attack(self, custom_prompts: list[str]) -> AttackAttempt:
             strategy=AttackStrategy.TRUST_BUILDING,
             steps=len(custom_prompts),
             turns=[],
-            target_model=self.target_model,
-            attacker_model="user",
             timestamp=datetime.now().isoformat(),
         )
         
diff --git a/src/models.py b/src/models.py
@@ -161,8 +161,6 @@ class AttackAttempt(BaseModel):
     strategy: AttackStrategy
     steps: int  # Number of turns planned
     turns: list[AttackTurn]
-    target_model: str
-    attacker_model: str
     success: bool | None = None
     evaluation: EvaluationResult | None = None
     timestamp: str

Original file line number	Diff line number	Diff line change
`@@ -276,8 +276,6 @@ def run_attack(`
`276`	`276`	`strategy=strategy,`
`277`	`277`	`steps=steps,`
`278`	`278`	`turns=[],`
`279`		`- target_model=self.target_model,`
`280`		`- attacker_model=self.session.attacker_model,`
`281`	`279`	`timestamp=datetime.now().isoformat(),`
`282`	`280`	`)`
`283`	`281`