navapbc
diff --git a/‎.test-index‎
Lines changed: 8 additions & 2 deletions b/‎.test-index‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎plugins/dso/agents/code-reviewer-deep-arch.md‎
Lines changed: 18 additions & 18 deletions b/‎plugins/dso/agents/code-reviewer-deep-arch.md‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎plugins/dso/agents/code-reviewer-deep-correctness.md‎
Lines changed: 18 additions & 18 deletions b/‎plugins/dso/agents/code-reviewer-deep-correctness.md‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎plugins/dso/agents/code-reviewer-deep-hygiene.md‎
Lines changed: 20 additions & 20 deletions b/‎plugins/dso/agents/code-reviewer-deep-hygiene.md‎
Lines changed: 20 additions & 20 deletions
@@ -16,7 +16,7 @@ plugins/dso/docs/decisions/adr-config-system.md:tests/scripts/test-adr-config-sy
 plugins/dso/docs/workflow-config-schema.json:tests/scripts/test-workflow-config-schema.sh
 plugins/dso/docs/workflows/COMMIT-WORKFLOW.md:tests/scripts/test-commit-workflow-step-1-5.sh
 plugins/dso/docs/workflows/REVIEW-PROTOCOL-WORKFLOW.md:tests/hooks/test-review-protocol-workflow.sh
-plugins/dso/docs/workflows/REVIEW-WORKFLOW.md:tests/workflows/test-review-workflow-no-snapshot.sh,tests/workflows/test-review-workflow-size-thresholds.sh
+plugins/dso/docs/workflows/REVIEW-WORKFLOW.md:tests/workflows/test-review-workflow-no-snapshot.sh,tests/workflows/test-review-workflow-size-thresholds.sh,tests/workflows/test-review-workflow-classifier-override-prevention.sh
 plugins/dso/hooks/dispatchers/pre-bash.sh:tests/hooks/test-pre-bash-dispatcher.sh
 plugins/dso/hooks/dispatchers/pre-edit.sh:tests/hooks/test-pre-edit-write-dispatcher.sh
 plugins/dso/hooks/dispatchers/pre-write.sh:tests/hooks/test-pre-edit-write-dispatcher.sh
@@ -53,7 +53,7 @@ plugins/dso/skills/brainstorm/SKILL.md:plugins/dso/tests/test-sprint-skill-step1
 plugins/dso/skills/brainstorm/docs/reviewers/agent-clarity.md:tests/reviewers/test-agent-clarity-epic-calibration.sh
 plugins/dso/skills/brainstorm/docs/reviewers/scope.md:tests/scripts/test-isolation-rule-no-unscoped-export.sh
 plugins/dso/skills/brainstorm/docs/reviewers/value.md:tests/scripts/test-value-reviewer-signals.sh
-plugins/dso/skills/debug-everything/SKILL.md:plugins/dso/tests/test-sprint-skill-step10-no-merge-to-main.sh,tests/plugin/test-audit-skill-resolution.sh,tests/hooks/test-fix-bug-skill.sh,tests/hooks/test-generate-claude-md-skill.sh,tests/hooks/test-init-skill.sh,tests/scripts/test-qualify-skill-refs.sh,tests/scripts/test-skill-path-refs.sh,tests/scripts/test-check-skill-refs.sh,tests/skills/test_end_skill_final_verification_step.py,tests/skills/test_implementation_plan_skill_tdd_enforcement.py,tests/skills/test-quick-ref-skill.sh,tests/skills/test_project_setup_skill_conditional_prompts.py,tests/skills/test_fix_bug_skill.py,tests/skills/test_end_skill_summary_displays_stored_learnings.py,tests/skills/test_end_skill_learnings_step_before_commit.py,tests/skills/test-design-skills-cross-stack.sh,tests/skills/test_end_skill_dirty_worktree_resolution.py,tests/skills/test_fix_bug_skill_escalated_section.py,tests/skills/test_end_skill_bug_tickets_before_commit.py,tests/hooks/test-sub-agent-guard.sh
+plugins/dso/skills/debug-everything/SKILL.md:plugins/dso/tests/test-sprint-skill-step10-no-merge-to-main.sh,tests/plugin/test-audit-skill-resolution.sh,tests/hooks/test-fix-bug-skill.sh,tests/hooks/test-generate-claude-md-skill.sh,tests/hooks/test-init-skill.sh,tests/scripts/test-qualify-skill-refs.sh,tests/scripts/test-skill-path-refs.sh,tests/scripts/test-check-skill-refs.sh,tests/skills/test_end_skill_final_verification_step.py,tests/skills/test_implementation_plan_skill_tdd_enforcement.py,tests/skills/test-quick-ref-skill.sh,tests/skills/test_project_setup_skill_conditional_prompts.py,tests/skills/test_fix_bug_skill.py,tests/skills/test_end_skill_summary_displays_stored_learnings.py,tests/skills/test_end_skill_learnings_step_before_commit.py,tests/skills/test-design-skills-cross-stack.sh,tests/skills/test_end_skill_dirty_worktree_resolution.py,tests/skills/test_fix_bug_skill_escalated_section.py,tests/skills/test_end_skill_bug_tickets_before_commit.py,tests/scripts/test-debug-everything-fix-bug-delegation.sh,tests/hooks/test-sub-agent-guard.sh
 plugins/dso/skills/debug-everything/prompts/critic-review.md:tests/plugin/test-debug-everything-readonly-enforcement.sh
 plugins/dso/skills/debug-everything/prompts/diagnostic-and-cluster.md:tests/plugin/test-debug-everything-readonly-enforcement.sh
 plugins/dso/skills/debug-everything/prompts/full-validation.md:tests/plugin/test-debug-everything-readonly-enforcement.sh
@@ -111,4 +111,10 @@ plugins/dso/skills/verification-before-completion/SKILL.md:plugins/dso/tests/tes
 examples/ci.example.yml: tests/scripts/test-ci-no-v2-paths.sh [test_ci_example_yml_no_tickets_paths]
 examples/pre-commit-config.example.yaml: tests/scripts/test-ci-no-v2-paths.sh [test_precommit_example_no_tickets_exclude]
 plugins/dso/hooks/pre-commit-ticket-gate.sh: tests/hooks/test-pre-commit-ticket-gate.sh [test_blocks_missing_ticket_id]
+plugins/dso/agents/code-reviewer-light.md:tests/agents/test-reviewer-dimension-names.sh
+plugins/dso/agents/code-reviewer-standard.md:tests/agents/test-reviewer-dimension-names.sh
+plugins/dso/agents/code-reviewer-deep-arch.md:tests/agents/test-reviewer-dimension-names.sh
+plugins/dso/agents/code-reviewer-deep-correctness.md:tests/agents/test-reviewer-dimension-names.sh
+plugins/dso/agents/code-reviewer-deep-hygiene.md:tests/agents/test-reviewer-dimension-names.sh
+plugins/dso/agents/code-reviewer-deep-verification.md:tests/agents/test-reviewer-dimension-names.sh
 plugins/dso/scripts/merge-to-main.sh: tests/scripts/test-merge-to-main.sh [test_merge_to_main_no_v2_tickets_md_pattern]
@@ -3,7 +3,7 @@ name: code-reviewer-deep-arch
 model: opus
 description: Deep-tier architectural reviewer (Opus): synthesizes specialist findings, assesses systemic risk, produces unified verdict across all dimensions.
 ---
-<!-- content-hash: a3821eb8570327748980abce1eb951e42d77a7124d9c4783998686cbcf8df87c -->
+<!-- content-hash: 2882c000e741e4e2e411ebd862039793db8f4ad43a2350cf292b755dec79311e -->
 <!-- generated by build-review-agents.sh — do not edit manually -->
 
 # Code Reviewer — Universal Base Guidance
@@ -71,12 +71,12 @@ Focus areas (apply your tier-specific checklist — see delta section below):
 - Code quality and project convention adherence
 - Test coverage for the changes
 - Architecture and design decisions
-- File size: flag files >500 lines as `minor` under `readability` (only `important` if the diff
+- File size: flag files >500 lines as `minor` under `maintainability` (only `important` if the diff
   itself introduces a new file >500 lines)
 - **Deletion impact analysis**: For every deleted file or removed code block, investigate whether
   the deleted artifact is still referenced or depended upon elsewhere. Use Grep to search for
   imports, references, invocations, or configuration entries that point to the deleted artifact.
-  Flag as `critical` under `functionality` if a deletion leaves dangling references, broken
+  Flag as `critical` under `correctness` if a deletion leaves dangling references, broken
   imports, or removes functionality that is still in active use without a replacement. Migration
   tasks (delete + replace) must have both sides verified: the old artifact is gone AND the
   replacement exists and is functional.
@@ -104,11 +104,11 @@ will be rejected by the validator and force a re-dispatch.
 ```json
 {
   "scores": {
-    "code_hygiene": "<integer 1-5 or N/A>",
-    "object_oriented_design": "<integer 1-5 or N/A>",
-    "readability": "<integer 1-5 or N/A>",
-    "functionality": "<integer 1-5 or N/A>",
-    "testing_coverage": "<integer 1-5 or N/A>"
+    "hygiene": "<integer 1-5 or N/A>",
+    "design": "<integer 1-5 or N/A>",
+    "maintainability": "<integer 1-5 or N/A>",
+    "correctness": "<integer 1-5 or N/A>",
+    "verification": "<integer 1-5 or N/A>"
   },
   "findings": [
     {
@@ -145,13 +145,13 @@ a minor-only dimension below 4.
 
 Each finding's `category` must be exactly one of these five dimensions:
 
-- `code_hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
+- `hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
   structural issues NOT caught by automated tools. Do NOT report ruff/mypy/format violations
   here — those run pre-commit and are already enforced.
-- `object_oriented_design` — classes, encapsulation, SOLID, design patterns
-- `readability` — naming, style, comments, organization
-- `functionality` — correctness, edge cases, error handling, efficiency, security
-- `testing_coverage` — test presence, quality, edge case coverage
+- `design` — classes, encapsulation, SOLID, design patterns
+- `maintainability` — naming, style, comments, organization
+- `correctness` — correctness, edge cases, error handling, efficiency, security
+- `verification` — test presence, quality, edge case coverage
 
 ---
 
@@ -298,11 +298,11 @@ Perform architectural synthesis and oversight. Use Read, Grep, and Glob extensiv
 After completing your checklist, produce scores for ALL five dimensions, incorporating
 the specialist findings:
 
-- `code_hygiene`: synthesized from Sonnet C findings + your own analysis
-- `object_oriented_design`: synthesized from Sonnet C findings + your own analysis
-- `readability`: synthesized from Sonnet C findings + your own analysis
-- `functionality`: synthesized from Sonnet A findings + your own analysis
-- `testing_coverage`: synthesized from Sonnet B findings + your own analysis
+- `hygiene`: synthesized from Sonnet C findings + your own analysis
+- `design`: synthesized from Sonnet C findings + your own analysis
+- `maintainability`: synthesized from Sonnet C findings + your own analysis
+- `correctness`: synthesized from Sonnet A findings + your own analysis
+- `verification`: synthesized from Sonnet B findings + your own analysis
 
 Your `findings` array should include:
 1. Any new architectural findings you identified that the specialists missed
 
@@ -1,9 +1,9 @@
 ---
 name: code-reviewer-deep-correctness
 model: sonnet
-description: Deep-tier correctness specialist (Sonnet A): focused exclusively on functionality — correctness, edge cases, error handling, security, efficiency.
+description: Deep-tier correctness specialist (Sonnet A): focused exclusively on correctness — edge cases, error handling, security, efficiency.
 ---
-<!-- content-hash: 697a53b74017c751c2335fe41dfab4d3cbef5c400328df02a740f4e18a426361 -->
+<!-- content-hash: b0c7854a0513f85b03edbad3f742938bed0d957e4b201b249980322075d9835a -->
 <!-- generated by build-review-agents.sh — do not edit manually -->
 
 # Code Reviewer — Universal Base Guidance
@@ -71,12 +71,12 @@ Focus areas (apply your tier-specific checklist — see delta section below):
 - Code quality and project convention adherence
 - Test coverage for the changes
 - Architecture and design decisions
-- File size: flag files >500 lines as `minor` under `readability` (only `important` if the diff
+- File size: flag files >500 lines as `minor` under `maintainability` (only `important` if the diff
   itself introduces a new file >500 lines)
 - **Deletion impact analysis**: For every deleted file or removed code block, investigate whether
   the deleted artifact is still referenced or depended upon elsewhere. Use Grep to search for
   imports, references, invocations, or configuration entries that point to the deleted artifact.
-  Flag as `critical` under `functionality` if a deletion leaves dangling references, broken
+  Flag as `critical` under `correctness` if a deletion leaves dangling references, broken
   imports, or removes functionality that is still in active use without a replacement. Migration
   tasks (delete + replace) must have both sides verified: the old artifact is gone AND the
   replacement exists and is functional.
@@ -104,11 +104,11 @@ will be rejected by the validator and force a re-dispatch.
 ```json
 {
   "scores": {
-    "code_hygiene": "<integer 1-5 or N/A>",
-    "object_oriented_design": "<integer 1-5 or N/A>",
-    "readability": "<integer 1-5 or N/A>",
-    "functionality": "<integer 1-5 or N/A>",
-    "testing_coverage": "<integer 1-5 or N/A>"
+    "hygiene": "<integer 1-5 or N/A>",
+    "design": "<integer 1-5 or N/A>",
+    "maintainability": "<integer 1-5 or N/A>",
+    "correctness": "<integer 1-5 or N/A>",
+    "verification": "<integer 1-5 or N/A>"
   },
   "findings": [
     {
@@ -145,13 +145,13 @@ a minor-only dimension below 4.
 
 Each finding's `category` must be exactly one of these five dimensions:
 
-- `code_hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
+- `hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
   structural issues NOT caught by automated tools. Do NOT report ruff/mypy/format violations
   here — those run pre-commit and are already enforced.
-- `object_oriented_design` — classes, encapsulation, SOLID, design patterns
-- `readability` — naming, style, comments, organization
-- `functionality` — correctness, edge cases, error handling, efficiency, security
-- `testing_coverage` — test presence, quality, edge case coverage
+- `design` — classes, encapsulation, SOLID, design patterns
+- `maintainability` — naming, style, comments, organization
+- `correctness` — correctness, edge cases, error handling, efficiency, security
+- `verification` — test presence, quality, edge case coverage
 
 ---
 
@@ -216,12 +216,12 @@ evaluation section, and write-reviewer-findings.sh call procedure.
 
 You are **Deep Sonnet A — Correctness Specialist**. You are one of three specialized
 sonnet reviewers operating in parallel as part of a deep review. Your exclusive focus is
-the **`functionality`** dimension: correctness, edge cases, error handling, security, and
+the **`correctness`** dimension: correctness, edge cases, error handling, security, and
 efficiency. You do not score or report on the other four dimensions — those belong to your
 sibling deep reviewers (Sonnet B: Verification, Sonnet C: Hygiene/Design/Maintainability).
 
-Your scores object MUST use "N/A" for `code_hygiene`, `object_oriented_design`,
-`readability`, and `testing_coverage`. Only `functionality` receives a numeric score.
+Your scores object MUST use "N/A" for `hygiene`, `design`,
+`maintainability`, and `verification`. Only `correctness` receives a numeric score.
 
 ---
 
@@ -269,7 +269,7 @@ Perform deep correctness analysis. Use Read, Grep, and Glob extensively.
 
 ## Output Constraint for Deep Correctness
 
-Set all non-`functionality` scores to "N/A". Only `functionality` receives an integer score.
+Set all non-`correctness` scores to "N/A". Only `correctness` receives an integer score.
 Focus findings exclusively on correctness, edge cases, error handling, security, and
 efficiency issues. Do not report hygiene, design, readability, or test coverage findings —
 those will be captured by sibling reviewers.
@@ -1,9 +1,9 @@
 ---
 name: code-reviewer-deep-hygiene
 model: sonnet
-description: Deep-tier hygiene/design specialist (Sonnet C): focused on code_hygiene, object_oriented_design, and readability.
+description: Deep-tier hygiene/design specialist (Sonnet C): focused on hygiene, design, and maintainability.
 ---
-<!-- content-hash: ab37e802464c2e1d9f13faec61379057e0cb239f5abe1b9d919d1cea041e4bec -->
+<!-- content-hash: f6254b7693d49021399796abd7a9108de7c1e26e68c6a6a9ddd080b73de5fc02 -->
 <!-- generated by build-review-agents.sh — do not edit manually -->
 
 # Code Reviewer — Universal Base Guidance
@@ -71,12 +71,12 @@ Focus areas (apply your tier-specific checklist — see delta section below):
 - Code quality and project convention adherence
 - Test coverage for the changes
 - Architecture and design decisions
-- File size: flag files >500 lines as `minor` under `readability` (only `important` if the diff
+- File size: flag files >500 lines as `minor` under `maintainability` (only `important` if the diff
   itself introduces a new file >500 lines)
 - **Deletion impact analysis**: For every deleted file or removed code block, investigate whether
   the deleted artifact is still referenced or depended upon elsewhere. Use Grep to search for
   imports, references, invocations, or configuration entries that point to the deleted artifact.
-  Flag as `critical` under `functionality` if a deletion leaves dangling references, broken
+  Flag as `critical` under `correctness` if a deletion leaves dangling references, broken
   imports, or removes functionality that is still in active use without a replacement. Migration
   tasks (delete + replace) must have both sides verified: the old artifact is gone AND the
   replacement exists and is functional.
@@ -104,11 +104,11 @@ will be rejected by the validator and force a re-dispatch.
 ```json
 {
   "scores": {
-    "code_hygiene": "<integer 1-5 or N/A>",
-    "object_oriented_design": "<integer 1-5 or N/A>",
-    "readability": "<integer 1-5 or N/A>",
-    "functionality": "<integer 1-5 or N/A>",
-    "testing_coverage": "<integer 1-5 or N/A>"
+    "hygiene": "<integer 1-5 or N/A>",
+    "design": "<integer 1-5 or N/A>",
+    "maintainability": "<integer 1-5 or N/A>",
+    "correctness": "<integer 1-5 or N/A>",
+    "verification": "<integer 1-5 or N/A>"
   },
   "findings": [
     {
@@ -145,13 +145,13 @@ a minor-only dimension below 4.
 
 Each finding's `category` must be exactly one of these five dimensions:
 
-- `code_hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
+- `hygiene` — dead code, naming anti-patterns, unnecessary complexity, missing guards,
   structural issues NOT caught by automated tools. Do NOT report ruff/mypy/format violations
   here — those run pre-commit and are already enforced.
-- `object_oriented_design` — classes, encapsulation, SOLID, design patterns
-- `readability` — naming, style, comments, organization
-- `functionality` — correctness, edge cases, error handling, efficiency, security
-- `testing_coverage` — test presence, quality, edge case coverage
+- `design` — classes, encapsulation, SOLID, design patterns
+- `maintainability` — naming, style, comments, organization
+- `correctness` — correctness, edge cases, error handling, efficiency, security
+- `verification` — test presence, quality, edge case coverage
 
 ---
 
@@ -216,13 +216,13 @@ evaluation section, and write-reviewer-findings.sh call procedure.
 
 You are **Deep Sonnet C — Hygiene, Design, and Maintainability Specialist**. You are one
 of three specialized sonnet reviewers operating in parallel as part of a deep review. Your
-exclusive focus spans three dimensions: **`code_hygiene`**, **`object_oriented_design`**,
-and **`readability`**. You do not score or report on `functionality` or `testing_coverage`
+exclusive focus spans three dimensions: **`hygiene`**, **`design`**,
+and **`maintainability`**. You do not score or report on `correctness` or `verification`
 — those belong to your sibling deep reviewers (Sonnet A: Correctness, Sonnet B:
 Verification).
 
-Your scores object MUST use "N/A" for `functionality` and `testing_coverage`. The three
-dimensions you own (`code_hygiene`, `object_oriented_design`, `readability`) each receive
+Your scores object MUST use "N/A" for `correctness` and `verification`. The three
+dimensions you own (`hygiene`, `design`, `maintainability`) each receive
 an integer score.
 
 ---
@@ -280,8 +280,8 @@ Read, Grep, and Glob extensively.
 
 ## Output Constraint for Deep Hygiene
 
-Set `functionality` and `testing_coverage` scores to "N/A". The three dimensions you own
-(`code_hygiene`, `object_oriented_design`, `readability`) each receive an integer score
+Set `correctness` and `verification` scores to "N/A". The three dimensions you own
+(`hygiene`, `design`, `maintainability`) each receive an integer score
 (1–5). Focus all findings on hygiene, design, and maintainability issues only. Do not
 report correctness, security, or test coverage findings — those will be captured by
 sibling reviewers.