Merge pull request #108 from PatrickSys/feat/verify-ui-proof-gate

PatrickSys · web-flow · commit a1449d711eba · 2026-05-08T19:11:54.000+02:00
Gate phase verification on required UI proof
diff --git a/bin/lib/phase.mjs b/bin/lib/phase.mjs
@@ -4,10 +4,16 @@
 // evaluate once, so CWD must be computed inside function bodies.
 
 import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync } from 'fs';
-import { join, basename } from 'path';
+import { dirname, join, relative } from 'path';
 import { output } from './cli-utils.mjs';
 import { writeFingerprint } from './session-fingerprint.mjs';
 import { resolveWorkspaceContext } from './workspace-root.mjs';
+import {
+  compareUiProofSlots,
+  findUiProofBundleFiles,
+  parseUiProofSlotsContent,
+  readUiProofBundleFile,
+} from './ui-proof.mjs';
 
 const PHASE_STATUS_MARKERS = {
   not_started: '[ ]',
@@ -169,6 +175,200 @@ function extractPlanFileArtifacts(planContent, workspaceRoot) {
   return artifacts;
 }
 
+function isPlanArtifactSatisfied(artifact) {
+  if (artifact.operation === 'delete') return !artifact.exists;
+  return artifact.exists;
+}
+
+function planArtifactFixHint(artifact) {
+  if (artifact.operation === 'delete') {
+    return `Complete the planned DELETE for ${artifact.file}, or revise the plan if the file should remain.`;
+  }
+  return `Create or update ${artifact.file} so the planned ${artifact.operation.toUpperCase()} artifact exists, or revise the plan if it is no longer in scope.`;
+}
+
+function evaluatePlanArtifacts(artifacts) {
+  const unsatisfied = artifacts
+    .filter((artifact) => !isPlanArtifactSatisfied(artifact))
+    .map((artifact) => ({
+      ...artifact,
+      severity: 'blocker',
+      expected: artifact.operation === 'delete' ? 'absent' : 'present',
+      fix_hint: planArtifactFixHint(artifact),
+    }));
+  return {
+    satisfied: unsatisfied.length === 0,
+    unsatisfied,
+  };
+}
+
+function normalizeUiProofIssue(issue) {
+  return {
+    ...issue,
+    severity: issue.severity || 'blocker',
+    fix_hint: issue.fix_hint || issue.fix || 'Fix the UI proof issue before claiming verification is complete.',
+  };
+}
+
+function planDeclaresUiProofSlots(planContent) {
+  const match = String(planContent || '').match(/(^|\n)ui_proof_slots:[ \t]*([^\n]*)/);
+  if (!match) return false;
+  const inlineValue = match[2].replace(/\s+#.*$/, '').trim();
+  if (inlineValue) return !['[]', 'null', '~'].includes(inlineValue);
+  const after = String(planContent || '').slice(match.index + match[0].length).split(/\r?\n/);
+  for (const line of after) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('#')) continue;
+    if (trimmed === '---' || trimmed === '...') break;
+    if (/^\s+-\s+/.test(line)) return true;
+    if (/^\S[^:\n]*:\s*/.test(line) || /^\S/.test(line)) break;
+  }
+  return false;
+}
+
+function extractDeclaredUiProofSlotIds(planContent) {
+  const match = String(planContent || '').match(/(^|\n)ui_proof_slots:[ \t]*([^\n]*)/);
+  if (!match) return [];
+  const after = String(planContent || '').slice(match.index + match[0].length).split(/\r?\n/);
+  const slotIds = [];
+  for (const line of after) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('#')) continue;
+    if (trimmed === '---' || trimmed === '...') break;
+    if (/^\S[^:\n]*:\s*/.test(line) || /^\S/.test(line)) break;
+    const slotMatch = trimmed.match(/(?:^-\s*)?slot_id:\s*([^#\s]+)/);
+    if (slotMatch) slotIds.push(slotMatch[1].replace(/^['"]|['"]$/g, ''));
+  }
+  return slotIds;
+}
+
+function findUiProofSlotPlansAndFiles(planningDir, planDisplayPaths) {
+  const candidates = new Set();
+  const declaredPlans = [];
+  const declaredSlotIds = [];
+  const names = new Set([
+    'ui-proof-slots.json',
+    'ui-proof-slots.md',
+    'UI-PROOF-SLOTS.json',
+    'UI-PROOF-SLOTS.md',
+    'planned-ui-proof.json',
+    'planned-ui-proof.md',
+  ]);
+
+  for (const planDisplayPath of planDisplayPaths) {
+    const fullPlanPath = join(planningDir, 'phases', planDisplayPath);
+    if (!existsSync(fullPlanPath)) continue;
+    const planContent = readFileSync(fullPlanPath, 'utf-8');
+    if (!planDeclaresUiProofSlots(planContent)) continue;
+    const relPlanPath = relative(planningDir, fullPlanPath).replace(/\\/g, '/');
+    declaredPlans.push(relPlanPath);
+    for (const slotId of extractDeclaredUiProofSlotIds(planContent)) {
+      declaredSlotIds.push({ plan: relPlanPath, slot_id: slotId });
+    }
+    const planDir = dirname(fullPlanPath);
+    if (!existsSync(planDir)) continue;
+    for (const entry of readdirSync(planDir, { withFileTypes: true })) {
+      if (entry.isFile() && names.has(entry.name)) {
+        candidates.add(join(planDir, entry.name));
+      }
+    }
+  }
+  return { declaredPlans, declaredSlotIds, files: [...candidates].sort() };
+}
+
+function comparePhaseUiProof({ planningDir, workspaceRoot, planDisplayPaths }) {
+  const plannedDiscovery = findUiProofSlotPlansAndFiles(planningDir, planDisplayPaths);
+  const plannedFiles = plannedDiscovery.files;
+  const phaseDirs = new Set(planDisplayPaths.map((planDisplayPath) => dirname(join(planningDir, 'phases', planDisplayPath))));
+  const observedFiles = findUiProofBundleFiles(planningDir)
+    .filter((filePath) => phaseDirs.has(dirname(filePath)));
+
+  const plannedSlots = [];
+  const errors = [];
+  const planned = [];
+  const observed = [];
+
+  for (const filePath of plannedFiles) {
+    const rel = relative(workspaceRoot, filePath).replace(/\\/g, '/');
+    const parsed = parseUiProofSlotsContent(readFileSync(filePath, 'utf-8'), rel);
+    planned.push(rel);
+    plannedSlots.push(...parsed.slots);
+    errors.push(...parsed.errors.map(normalizeUiProofIssue));
+  }
+
+  if (plannedSlots.length > 0 && plannedDiscovery.declaredSlotIds.length > 0) {
+    const plannedSlotIds = new Set(plannedSlots.map((slot) => String(slot?.slot_id || '')));
+    for (const declaredSlot of plannedDiscovery.declaredSlotIds) {
+      if (plannedSlotIds.has(String(declaredSlot.slot_id))) continue;
+      errors.push(normalizeUiProofIssue({
+        code: 'planned_ui_proof_slots_drift',
+        path: `${declaredSlot.plan}.ui_proof_slots`,
+        message: `Plan declares UI proof slot ${declaredSlot.slot_id}, but no matching slot exists in the planned UI proof artifact.`,
+        fix: 'Update ui-proof-slots.json or ui-proof-slots.md beside the plan so it matches the plan-declared slot IDs, or update the plan declaration.',
+      }));
+    }
+  }
+
+  const observedBundles = [];
+  for (const filePath of observedFiles) {
+    const rel = relative(workspaceRoot, filePath).replace(/\\/g, '/');
+    const parsed = readUiProofBundleFile(filePath);
+    observed.push(rel);
+    if (parsed.errors.length > 0) {
+      errors.push(...parsed.errors.map((error) => normalizeUiProofIssue({ ...error, path: error.path || rel })));
+      continue;
+    }
+    observedBundles.push({
+      source: rel,
+      bundle: parsed.bundle,
+      options: {
+        requireLocalArtifactExists: true,
+        workspaceRoot,
+        bundleDir: dirname(filePath),
+      },
+    });
+  }
+
+  if (plannedFiles.length === 0 && plannedDiscovery.declaredPlans.length > 0) {
+    const missingError = {
+      code: 'missing_planned_ui_proof_slots_file',
+      severity: 'blocker',
+      path: plannedDiscovery.declaredPlans[0],
+      message: 'Plan declares ui_proof_slots but no ui-proof-slots artifact was found beside the plan.',
+      fix_hint: 'Create ui-proof-slots.json or ui-proof-slots.md beside the plan, or set ui_proof_slots: [] with a no_ui_proof_rationale if the phase is not UI-sensitive.',
+    };
+    return {
+      planned,
+      observed,
+      status: 'missing',
+      comparison: { status: 'missing', slots: [], errors: [missingError] },
+      errors: [missingError],
+    };
+  }
+
+  if (plannedFiles.length === 0) {
+    return {
+      planned,
+      observed,
+      status: 'not_applicable',
+      comparison: null,
+      errors,
+    };
+  }
+
+  const comparison = errors.length > 0
+    ? { status: 'partial', slots: [], errors: errors.map(normalizeUiProofIssue) }
+    : compareUiProofSlots(plannedSlots, observedBundles);
+
+  return {
+    planned,
+    observed,
+    status: comparison.status,
+    comparison,
+    errors: comparison.errors || errors,
+  };
+}
+
 export function updateRoadmapPhaseStatus(roadmap, phaseNumber, status) {
   const marker = PHASE_STATUS_MARKERS[status];
   if (!marker) {
@@ -360,6 +560,26 @@ export function cmdVerify(...args) {
       ? extractPlanFileArtifacts(readFileSync(fullPath, 'utf-8'), workspaceRoot)
       : [];
   });
+  const artifactStatus = evaluatePlanArtifacts(artifacts);
+  const uiProof = comparePhaseUiProof({
+    planningDir,
+    workspaceRoot,
+    planDisplayPaths: matchingPlans,
+  });
+  const uiProofSatisfied = ['satisfied', 'not_applicable'].includes(uiProof.status);
+  const legacyVerified = matchingPlans.length > 0 && matchingSummaries.length > 0;
+  const uiProofGate = {
+    status: uiProof.status,
+    required: uiProof.status !== 'not_applicable',
+    satisfied: uiProofSatisfied,
+    blocks_verification: uiProof.status !== 'not_applicable' && !uiProofSatisfied,
+    required_block: uiProof.status !== 'not_applicable' && !uiProofSatisfied ? 'ui-proof-failed' : null,
+  };
+  const blockedOn = [
+    ...(artifactStatus.satisfied ? [] : ['artifacts']),
+    ...(uiProofGate.blocks_verification ? ['ui_proof'] : []),
+  ];
+  const closureVerified = legacyVerified && artifactStatus.satisfied && uiProofSatisfied;
 
   const result = {
     phase: normalizePhaseToken(phaseNum),
@@ -368,9 +588,17 @@ export function cmdVerify(...args) {
     summaries: matchingSummaries,
     artifacts,
     allExist: artifacts.every((artifact) => artifact.exists),
-    verified: matchingPlans.length > 0 && matchingSummaries.length > 0,
+    artifact_status: artifactStatus,
+    uiProof,
+    verified: closureVerified,
+    legacy_verified: legacyVerified,
+    phase_artifacts_present: legacyVerified,
+    ui_proof: uiProofGate,
+    blocked_on: blockedOn,
+    blocks_verification: blockedOn.length > 0,
   };
   output(result);
+  if (!closureVerified && legacyVerified) process.exitCode = 1;
 }
 
 export function cmdScaffold(...args) {
diff --git a/bin/lib/ui-proof.mjs b/bin/lib/ui-proof.mjs
@@ -446,6 +446,41 @@ function normalizeObservedBundle(entry) {
   };
 }
 
+function comparisonFixHint(code) {
+  const hints = {
+    invalid_observed_bundle: 'Fix the observed proof bundle metadata, then rerun ui-proof compare.',
+    unsatisfied_observed_claim_status: 'Record a passed observed claim only after the changed UI state has been exercised and evidenced.',
+    unsatisfied_observed_comparison_status: 'Set comparison_status_by_slot to satisfied only for slots backed by matching observations and artifacts.',
+    missing_required_evidence_kind: 'Add observed evidence for every evidence kind required by the planned slot, or narrow the planned slot before verification.',
+    human_evidence_cannot_bypass_required_non_human_evidence: 'Add the missing non-human evidence; human approval may narrow or waive but cannot replace it.',
+    route_state_mismatch: 'Capture proof for the exact planned route/state, or update the plan before execution.',
+    environment_mismatch: 'Capture proof in the planned environment, or record a narrowed claim limit and rerun comparison.',
+    viewport_mismatch: 'Capture proof for the planned viewport, or narrow the viewport claim explicitly.',
+    requirement_mismatch: 'Declare the planned requirement id in the observed proof bundle scope.',
+    claim_mismatch: 'Keep the planned and observed claims identical so proof maps to the exact UI assertion.',
+    observation_claim_mismatch: 'Add a passed observation that supports the exact planned claim.',
+    observation_route_state_mismatch: 'Attach observations to the exact planned route/state.',
+    missing_supporting_observation_evidence_kind: 'Add passed supporting observations for each required evidence kind.',
+    unsatisfied_proof_step: 'Rerun or replace failing proof steps before claiming the slot is satisfied.',
+    missing_manual_acceptance_evidence: 'Record human evidence when the planned slot requires manual acceptance.',
+    missing_manual_acceptance_observation: 'Add a passed human observation for manual acceptance.',
+    unsatisfied_observation_result: 'Resolve failed observations or classify the slot as partial, waived, or deferred.',
+    missing_minimum_observation: 'Add observations covering every planned minimum observation.',
+    missing_claim_limit: 'Preserve the planned claim limit in the observed proof bundle.',
+    missing_expected_artifact_type: 'Attach the planned artifact type, such as screenshot, report, trace, or DOM snapshot.',
+    missing_observed_bundle: 'Create an observed UI proof bundle for the planned slot, or explicitly waive/defer the slot with claim narrowing.',
+  };
+  return hints[code] || 'Fix the proof issue, rerun the comparison, and keep the slot partial until evidence matches the plan.';
+}
+
+function decorateComparisonIssue(issue) {
+  return {
+    severity: issue.severity || 'blocker',
+    fix_hint: issue.fix_hint || issue.fix || comparisonFixHint(issue.code),
+    ...issue,
+  };
+}
+
 function compareSlotToBundle(slot, slotIdValue, observed) {
   const issues = [];
   const bundle = observed.bundle;
@@ -648,15 +683,15 @@ function compareSlotToBundle(slot, slotIdValue, observed) {
   }
 
   const status = issues.length === 0 ? 'satisfied' : (bundleStatus === 'missing' ? 'missing' : 'partial');
-  return { status, issues, source: observed.source };
+  return { status, issues: issues.map(decorateComparisonIssue), source: observed.source };
 }
 
 export function compareUiProofSlots(plannedSlots, observedBundles) {
   const slots = normalizeArray(plannedSlots);
   const slotValidation = validateUiProofSlots(slots);
   const bundles = normalizeArray(observedBundles).map(normalizeObservedBundle);
   const results = [];
-  const errors = [...slotValidation.errors];
+  const errors = slotValidation.errors.map(decorateComparisonIssue);
 
   for (const observed of bundles) {
     if (!observed.validation.valid) {
@@ -680,7 +715,7 @@ export function compareUiProofSlots(plannedSlots, observedBundles) {
           code: 'missing_observed_bundle',
           path: 'scope.slot_ids',
           message: `No observed UI proof bundle declares planned slot ${slotIdValue}.`,
-        }],
+        }].map(decorateComparisonIssue),
       });
       continue;
     }
@@ -706,7 +741,7 @@ export function compareUiProofSlots(plannedSlots, observedBundles) {
         ? 'missing'
         : 'partial';
 
-  return { status, slots: results, errors };
+  return { status, slots: results, errors: errors.map(decorateComparisonIssue) };
 }
 
 export function validateUiProofBundle(bundle, options = {}) {
diff --git a/distilled/workflows/verify.md b/distilled/workflows/verify.md
@@ -14,6 +14,7 @@ Before starting, read these files:
 5. From the SUMMARY.md loaded in step 3, if a `<judgment>` section is present - read `<anti_regression>` rules as additional verification targets: confirm that invariants listed there were not broken by execution. Read `<active_constraints>` to calibrate verification scope.
 6. The relevant codebase files - the code that was actually built
 7. **Session-boundary fallback:** If the SUMMARY.md loaded in step 3 has no `<judgment>` section, check whether `.planning/.continue-here.bak` exists. If it does, read its `<judgment>` section. Treat `<anti_regression>` rules as additional verification targets and `<active_constraints>` to calibrate verification scope (same usage as step 5). After reading, run `node .planning/bin/gsdd.mjs file-op delete .planning/.continue-here.bak --missing ok` (auto-clean).
+8. `node .planning/bin/gsdd.mjs control-map --json` to reconcile workflow/lifecycle state and checkpoint presence (`.planning/.continue-here.md`) before deciding pass/fail.
 
 Establish your verification basis (must-have sources, requirement scope, previous report status) before beginning code inspection. Do not jump to loose file reading until this basis is explicit.
 
@@ -130,7 +131,7 @@ Note: this step does NOT replace levels 1–3. An artifact can satisfy the evide
 
 <ui_proof_comparison>
 If the plan defines non-empty `ui_proof_slots`, compare planned UI proof against observed bundles before closure. Prefer `gsdd ui-proof compare <planned-slots-json> [observed-bundle-json ...]` when planned slots are available as JSON or fenced JSON; otherwise perform the same field-by-field comparison and record reduced assurance if no deterministic command could run. If the plan records only `no_ui_proof_rationale`, verify the rationale instead of requiring a bundle. Each observed bundle must include top-level `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`.
-Classify each slot as exactly one of: `satisfied`, `partial`, `missing`, `waived`, `deferred`, or `not_applicable`. Waiver/deferment narrows the claim; it is not proof. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities mapped onto existing evidence kinds, not new evidence kinds. Artifact count is never proof; each artifact must tie to the slot claim, route/state, observation, artifact path/link, privacy metadata, and claim limit.
+Classify each slot as exactly one of: `satisfied`, `partial`, `missing`, `waived`, `deferred`, or `not_applicable`. Deterministic comparison issues include `severity` and `fix_hint`; use those as the normal repair feedback loop before closing verification. Waiver/deferment narrows the claim; it is not proof. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities mapped onto existing evidence kinds, not new evidence kinds. Artifact count is never proof; each artifact must tie to the slot claim, route/state, observation, artifact path/link, privacy metadata, and claim limit.
 For live UI runtime proof, expect `agent-browser` as the default captured tool unless the observed bundle explains a project-native equivalent or an availability constraint. Do not fail solely because another browser tool was used, but downgrade vague proof that lacks exact route/state, planned viewport coverage or rationale, interactive steps/refs where relevant, screenshot/report artifacts, or relevant console/network observations. Existing Playwright tests count as canonical repeatable regression evidence, not a replacement for scoped runtime evidence when the slot requires `runtime`.
 Artifact privacy metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to local-only and unsafe unless sanitized. Run `gsdd ui-proof validate <path>` or treat `gsdd health` E10 as blocking; add `--claim <...>` when relying on the bundle for public, tracked, delivery, release, or publication proof. Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication require human evidence or explicit waiver; human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. Source annotations, AST/cAST findings, semantic search, comments, and Semble-like retrieval are discovery hints only.
 </ui_proof_comparison>
diff --git a/docs/USER-GUIDE.md b/docs/USER-GUIDE.md
@@ -223,7 +223,7 @@ Other CLI commands that remain available outside the first-run path:
 | Command | Purpose |
 |---------|---------|
 | `gsdd find-phase [N]` | Show phase info as JSON (for agent consumption) |
-| `gsdd verify <N>` | Run artifact checks for phase N |
+| `gsdd verify <N>` | Run phase artifact and UI-proof closure checks for phase N; exits nonzero when verification is blocked |
 | `gsdd scaffold phase <N> [name]` | Create a new phase plan file |
 
 ### Platform flags for `--tools`
diff --git a/tests/phase.test.cjs b/tests/phase.test.cjs