system-prompts-forensics/data/analysis/codex.review.analysis.yaml at main · rmax-ai/system-prompts-forensics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
schema:
  name: system-prompt
  version: v0
  description: Structural schema to normalize, compare, and analyze system prompts as governance constitutions for AI tools and agents.

metadata:
  tool:
    name: unknown
    vendor: unknown
    channel: api
  version:
    tool_version: unknown
    model_family: gpt-5.1
  capture:
    method: mitmproxy
    timestamp: "2026-01-01T22:00:47Z"
    environment:
      os: Darwin
      arch: arm64
      runtime: Python 3.12.5
    artifact_hash: 52f3e10a65f7d1f950ae985e0bebb79dc06ff9e111780b72181e0c73403c3d35
  notes: Captured invocation payload for code review-style agent with tool access.

layers:
  identity:
    role: reviewer
    persona:
      traits:
        - matter-of-fact
        - brief
        - non-accusatory
        - avoids flattery
        - helpful AI assistant suggestion style
      tone: neutral
    self_description: AI acting as a reviewer for a proposed code change made by another engineer.
    alignment_claims:
      - accuracy-focused
      - security-focused
      - maintainability-focused

  authority:
    allowed_actions:
      - review proposed code changes and flag discrete actionable bugs introduced in the commit
      - run shell commands via provided tool (auto tool choice)
      - read MCP resources via provided tools
      - update a plan via provided tool
      - edit files via apply_patch tool (capability present, though review instructions say not to generate PR fix)
      - output structured JSON findings and overall verdict
    forbidden_actions:
      - flag pre-existing bugs (only bugs introduced in the commit)
      - speculate about breakage without provably affected code
      - claim severity higher than warranted
      - include code chunks longer than 3 lines in comments (outside suggestion blocks)
      - wrap final JSON output in markdown fences or extra prose
      - omit required code_location fields in findings
      - generate a PR fix
      - carry skills across turns unless re-mentioned
    conditional_actions:
      - condition: If user names a skill or task matches a listed skill description
        allowed:
          - open and follow the referenced SKILL.md workflow with progressive disclosure
        forbidden:
          - bulk-load skill reference files unnecessarily
      - condition: If a named skill is missing or unreadable
        allowed:
          - state briefly and continue with best fallback
        forbidden: []
      - condition: If using shell_command
        allowed:
          - set workdir parameter explicitly
        forbidden:
          - rely on cd unless absolutely necessary
    escalation:
      allowed: true
      targets:
        - user
        - tool
    final_decision_maker: model

  scope:
    inputs_visible:
      - user messages (including AGENTS.md instructions and environment_context)
      - tool declarations
      - tool call outputs (shell_command results)
      - reasoning summaries (encrypted content included but not human-readable)
    outputs_allowed:
      - raw JSON (must match specified schema exactly)
      - tool calls (functions and custom apply_patch)
    statefulness:
      memory: false
      session_persistence: false
    boundaries:
      hard_limits:
        - final output must be JSON matching the provided schema exactly (no fences/prose)
        - do not generate a PR fix
        - one-paragraph body per finding; avoid line breaks unless needed for code fragment
        - no code chunks >3 lines in comment body
        - code_location required; line ranges short and overlap diff
      soft_limits:
        - ignore trivial style unless obscures meaning or violates documented standards
        - prefer outputting no findings if none are clearly worth fixing

  environment:
    execution_context: local
    side_effects_allowed: true
    network_access: limited
    filesystem_access: write

  tools:
    declared_tools:
      - name: shell_command
        type: function
        description: Run a shell command; must set workdir; avoid cd unless necessary.
        side_effects: true
      - name: list_mcp_resources
        type: function
        description: List MCP server resources; prefer over web search.
        side_effects: false
      - name: list_mcp_resource_templates
        type: function
        description: List MCP server resource templates; prefer over web search.
        side_effects: false
      - name: read_mcp_resource
        type: function
        description: Read a specific MCP resource by server and URI.
        side_effects: false
      - name: update_plan
        type: function
        description: Update task plan; at most one step in_progress.
        side_effects: true
      - name: apply_patch
        type: cli
        description: Freeform patch application tool to edit files.
        side_effects: true
    invocation_rules:
      explicit: true
      constraints:
        - tool_choice is auto; parallel_tool_calls disabled
        - shell_command should include workdir
        - apply_patch must be freeform (not JSON)
    abstraction_level: wrapped
    failure_handling: ask-user

  constraints:
    style:
      requirements:
        - matter-of-fact tone; not accusatory; not overly positive
        - brief comments; body at most one paragraph
        - one comment per distinct issue
        - tag finding titles with priority label [P0]-[P3]
        - include confidence_score per finding and overall_confidence_score
        - include overall_correctness verdict and 1-3 sentence overall_explanation
      prohibitions:
        - avoid flattery and unhelpful praise
        - do not include unnecessary location details in comment body
        - do not use suggestion blocks except for concrete replacement code
        - do not change outer indentation levels in suggestion blocks unless required
    safety:
      policies: []
      refusal_style: unknown
    legal:
      restrictions: []
      attribution_required: false
    formatting:
      enforced: true
      schemas:
        - specified JSON output schema for findings and overall verdict
        - apply_patch lark grammar (tool format)

  reasoning:
    visibility: partial
    explanation_policy: on-request
    internal_deliberation: true
    justification_required: true

  correction:
    self_review:
      enabled: true
      triggers:
        - ensure findings are discrete/actionable and introduced in commit
        - ensure output JSON matches schema exactly
        - ensure line ranges are short and overlap diff
    external_feedback:
      sources:
        - user edits
        - tool outputs
      incorporation_rules: Follow higher-priority, more specific guidelines when encountered; otherwise apply general review guidelines.
    iteration_limits:
      max_cycles: unknown
      timeout: unknown

  termination:
    stopping_conditions:
      - all qualifying findings listed, or none if no clearly fix-worthy issues
      - output overall_correctness verdict and explanation
    success_definition: Produce valid schema-matching JSON with accurate, actionable findings (or empty findings) and a justified overall verdict.
    abort_conditions:
      - inability to access required context (e.g., missing diff) preventing code_location overlap
    handoff_behavior: return control

analysis:
  risk_model:
    primary_risks:
      - incorrect or non-conforming JSON output (schema mismatch)
      - over-reporting speculative issues or pre-existing bugs
      - leaking excessive code in comments (violating 3-line limit)
      - unintended file modifications despite "do not generate a PR fix"
      - executing shell commands with side effects in workspace-write sandbox
    mitigations:
      - strict output schema and formatting constraints
      - guidelines requiring provable impact and commit-introduced issues only
      - short line ranges and one-paragraph bodies
      - suggestion block constraints and indentation preservation
      - environment_context indicates restricted network and sandbox mode
  failure_modes:
    anticipated:
      - no diff available leading to inability to provide overlapping code_location
      - confusion between "apply_patch available" vs "do not generate PR fix"
      - tool outputs empty (e.g., empty directory) limiting review content
      - skills trigger rules causing unnecessary file reads if misapplied
    unmitigated:
      - no explicit policy for handling missing diff while still requiring code_location overlap
      - no explicit prohibition on running destructive shell commands beyond sandboxing
  implicit_assumptions: >
    The agent is expected to have access to a code diff/patch context elsewhere in the session so it can cite
    absolute_file_path and overlapping line ranges; the environment is a local workspace with write access but
    network is restricted; "skills" are local files that can be opened when triggered; encrypted reasoning is
    not intended for user visibility.
  notable_absences:
    - no explicit privacy/data-handling rules beyond tool/environment constraints
    - no explicit definition of how to determine whether a bug was introduced in the commit (methodology)
    - no explicit guidance for when to use MCP resources vs filesystem reads
    - no explicit maximum number of findings beyond "all qualifying"
    - no explicit refusal policy for unsafe requests

provenance:
  source_references:
    - mitmproxy capture of codex.review.json invocation payload (model gpt-5.1-codex-max)
    - local AGENTS.md instructions listing skills and trigger rules
    - environment_context block (sandbox_mode workspace-write; network restricted)
  redactions_applied: false
  compliance_notes: Normalization derived from provided payload; tool vendor/name not present in capture.