Skip to content

Commit a1449d7

Browse files
authored
Merge pull request #108 from PatrickSys/feat/verify-ui-proof-gate
Gate phase verification on required UI proof
2 parents a9c46ba + 1474e24 commit a1449d7

5 files changed

Lines changed: 428 additions & 8 deletions

File tree

bin/lib/phase.mjs

Lines changed: 230 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,16 @@
44
// evaluate once, so CWD must be computed inside function bodies.
55

66
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync } from 'fs';
7-
import { join, basename } from 'path';
7+
import { dirname, join, relative } from 'path';
88
import { output } from './cli-utils.mjs';
99
import { writeFingerprint } from './session-fingerprint.mjs';
1010
import { resolveWorkspaceContext } from './workspace-root.mjs';
11+
import {
12+
compareUiProofSlots,
13+
findUiProofBundleFiles,
14+
parseUiProofSlotsContent,
15+
readUiProofBundleFile,
16+
} from './ui-proof.mjs';
1117

1218
const PHASE_STATUS_MARKERS = {
1319
not_started: '[ ]',
@@ -169,6 +175,200 @@ function extractPlanFileArtifacts(planContent, workspaceRoot) {
169175
return artifacts;
170176
}
171177

178+
function isPlanArtifactSatisfied(artifact) {
179+
if (artifact.operation === 'delete') return !artifact.exists;
180+
return artifact.exists;
181+
}
182+
183+
function planArtifactFixHint(artifact) {
184+
if (artifact.operation === 'delete') {
185+
return `Complete the planned DELETE for ${artifact.file}, or revise the plan if the file should remain.`;
186+
}
187+
return `Create or update ${artifact.file} so the planned ${artifact.operation.toUpperCase()} artifact exists, or revise the plan if it is no longer in scope.`;
188+
}
189+
190+
function evaluatePlanArtifacts(artifacts) {
191+
const unsatisfied = artifacts
192+
.filter((artifact) => !isPlanArtifactSatisfied(artifact))
193+
.map((artifact) => ({
194+
...artifact,
195+
severity: 'blocker',
196+
expected: artifact.operation === 'delete' ? 'absent' : 'present',
197+
fix_hint: planArtifactFixHint(artifact),
198+
}));
199+
return {
200+
satisfied: unsatisfied.length === 0,
201+
unsatisfied,
202+
};
203+
}
204+
205+
function normalizeUiProofIssue(issue) {
206+
return {
207+
...issue,
208+
severity: issue.severity || 'blocker',
209+
fix_hint: issue.fix_hint || issue.fix || 'Fix the UI proof issue before claiming verification is complete.',
210+
};
211+
}
212+
213+
function planDeclaresUiProofSlots(planContent) {
214+
const match = String(planContent || '').match(/(^|\n)ui_proof_slots:[ \t]*([^\n]*)/);
215+
if (!match) return false;
216+
const inlineValue = match[2].replace(/\s+#.*$/, '').trim();
217+
if (inlineValue) return !['[]', 'null', '~'].includes(inlineValue);
218+
const after = String(planContent || '').slice(match.index + match[0].length).split(/\r?\n/);
219+
for (const line of after) {
220+
const trimmed = line.trim();
221+
if (!trimmed || trimmed.startsWith('#')) continue;
222+
if (trimmed === '---' || trimmed === '...') break;
223+
if (/^\s+-\s+/.test(line)) return true;
224+
if (/^\S[^:\n]*:\s*/.test(line) || /^\S/.test(line)) break;
225+
}
226+
return false;
227+
}
228+
229+
function extractDeclaredUiProofSlotIds(planContent) {
230+
const match = String(planContent || '').match(/(^|\n)ui_proof_slots:[ \t]*([^\n]*)/);
231+
if (!match) return [];
232+
const after = String(planContent || '').slice(match.index + match[0].length).split(/\r?\n/);
233+
const slotIds = [];
234+
for (const line of after) {
235+
const trimmed = line.trim();
236+
if (!trimmed || trimmed.startsWith('#')) continue;
237+
if (trimmed === '---' || trimmed === '...') break;
238+
if (/^\S[^:\n]*:\s*/.test(line) || /^\S/.test(line)) break;
239+
const slotMatch = trimmed.match(/(?:^-\s*)?slot_id:\s*([^#\s]+)/);
240+
if (slotMatch) slotIds.push(slotMatch[1].replace(/^['"]|['"]$/g, ''));
241+
}
242+
return slotIds;
243+
}
244+
245+
function findUiProofSlotPlansAndFiles(planningDir, planDisplayPaths) {
246+
const candidates = new Set();
247+
const declaredPlans = [];
248+
const declaredSlotIds = [];
249+
const names = new Set([
250+
'ui-proof-slots.json',
251+
'ui-proof-slots.md',
252+
'UI-PROOF-SLOTS.json',
253+
'UI-PROOF-SLOTS.md',
254+
'planned-ui-proof.json',
255+
'planned-ui-proof.md',
256+
]);
257+
258+
for (const planDisplayPath of planDisplayPaths) {
259+
const fullPlanPath = join(planningDir, 'phases', planDisplayPath);
260+
if (!existsSync(fullPlanPath)) continue;
261+
const planContent = readFileSync(fullPlanPath, 'utf-8');
262+
if (!planDeclaresUiProofSlots(planContent)) continue;
263+
const relPlanPath = relative(planningDir, fullPlanPath).replace(/\\/g, '/');
264+
declaredPlans.push(relPlanPath);
265+
for (const slotId of extractDeclaredUiProofSlotIds(planContent)) {
266+
declaredSlotIds.push({ plan: relPlanPath, slot_id: slotId });
267+
}
268+
const planDir = dirname(fullPlanPath);
269+
if (!existsSync(planDir)) continue;
270+
for (const entry of readdirSync(planDir, { withFileTypes: true })) {
271+
if (entry.isFile() && names.has(entry.name)) {
272+
candidates.add(join(planDir, entry.name));
273+
}
274+
}
275+
}
276+
return { declaredPlans, declaredSlotIds, files: [...candidates].sort() };
277+
}
278+
279+
function comparePhaseUiProof({ planningDir, workspaceRoot, planDisplayPaths }) {
280+
const plannedDiscovery = findUiProofSlotPlansAndFiles(planningDir, planDisplayPaths);
281+
const plannedFiles = plannedDiscovery.files;
282+
const phaseDirs = new Set(planDisplayPaths.map((planDisplayPath) => dirname(join(planningDir, 'phases', planDisplayPath))));
283+
const observedFiles = findUiProofBundleFiles(planningDir)
284+
.filter((filePath) => phaseDirs.has(dirname(filePath)));
285+
286+
const plannedSlots = [];
287+
const errors = [];
288+
const planned = [];
289+
const observed = [];
290+
291+
for (const filePath of plannedFiles) {
292+
const rel = relative(workspaceRoot, filePath).replace(/\\/g, '/');
293+
const parsed = parseUiProofSlotsContent(readFileSync(filePath, 'utf-8'), rel);
294+
planned.push(rel);
295+
plannedSlots.push(...parsed.slots);
296+
errors.push(...parsed.errors.map(normalizeUiProofIssue));
297+
}
298+
299+
if (plannedSlots.length > 0 && plannedDiscovery.declaredSlotIds.length > 0) {
300+
const plannedSlotIds = new Set(plannedSlots.map((slot) => String(slot?.slot_id || '')));
301+
for (const declaredSlot of plannedDiscovery.declaredSlotIds) {
302+
if (plannedSlotIds.has(String(declaredSlot.slot_id))) continue;
303+
errors.push(normalizeUiProofIssue({
304+
code: 'planned_ui_proof_slots_drift',
305+
path: `${declaredSlot.plan}.ui_proof_slots`,
306+
message: `Plan declares UI proof slot ${declaredSlot.slot_id}, but no matching slot exists in the planned UI proof artifact.`,
307+
fix: 'Update ui-proof-slots.json or ui-proof-slots.md beside the plan so it matches the plan-declared slot IDs, or update the plan declaration.',
308+
}));
309+
}
310+
}
311+
312+
const observedBundles = [];
313+
for (const filePath of observedFiles) {
314+
const rel = relative(workspaceRoot, filePath).replace(/\\/g, '/');
315+
const parsed = readUiProofBundleFile(filePath);
316+
observed.push(rel);
317+
if (parsed.errors.length > 0) {
318+
errors.push(...parsed.errors.map((error) => normalizeUiProofIssue({ ...error, path: error.path || rel })));
319+
continue;
320+
}
321+
observedBundles.push({
322+
source: rel,
323+
bundle: parsed.bundle,
324+
options: {
325+
requireLocalArtifactExists: true,
326+
workspaceRoot,
327+
bundleDir: dirname(filePath),
328+
},
329+
});
330+
}
331+
332+
if (plannedFiles.length === 0 && plannedDiscovery.declaredPlans.length > 0) {
333+
const missingError = {
334+
code: 'missing_planned_ui_proof_slots_file',
335+
severity: 'blocker',
336+
path: plannedDiscovery.declaredPlans[0],
337+
message: 'Plan declares ui_proof_slots but no ui-proof-slots artifact was found beside the plan.',
338+
fix_hint: 'Create ui-proof-slots.json or ui-proof-slots.md beside the plan, or set ui_proof_slots: [] with a no_ui_proof_rationale if the phase is not UI-sensitive.',
339+
};
340+
return {
341+
planned,
342+
observed,
343+
status: 'missing',
344+
comparison: { status: 'missing', slots: [], errors: [missingError] },
345+
errors: [missingError],
346+
};
347+
}
348+
349+
if (plannedFiles.length === 0) {
350+
return {
351+
planned,
352+
observed,
353+
status: 'not_applicable',
354+
comparison: null,
355+
errors,
356+
};
357+
}
358+
359+
const comparison = errors.length > 0
360+
? { status: 'partial', slots: [], errors: errors.map(normalizeUiProofIssue) }
361+
: compareUiProofSlots(plannedSlots, observedBundles);
362+
363+
return {
364+
planned,
365+
observed,
366+
status: comparison.status,
367+
comparison,
368+
errors: comparison.errors || errors,
369+
};
370+
}
371+
172372
export function updateRoadmapPhaseStatus(roadmap, phaseNumber, status) {
173373
const marker = PHASE_STATUS_MARKERS[status];
174374
if (!marker) {
@@ -360,6 +560,26 @@ export function cmdVerify(...args) {
360560
? extractPlanFileArtifacts(readFileSync(fullPath, 'utf-8'), workspaceRoot)
361561
: [];
362562
});
563+
const artifactStatus = evaluatePlanArtifacts(artifacts);
564+
const uiProof = comparePhaseUiProof({
565+
planningDir,
566+
workspaceRoot,
567+
planDisplayPaths: matchingPlans,
568+
});
569+
const uiProofSatisfied = ['satisfied', 'not_applicable'].includes(uiProof.status);
570+
const legacyVerified = matchingPlans.length > 0 && matchingSummaries.length > 0;
571+
const uiProofGate = {
572+
status: uiProof.status,
573+
required: uiProof.status !== 'not_applicable',
574+
satisfied: uiProofSatisfied,
575+
blocks_verification: uiProof.status !== 'not_applicable' && !uiProofSatisfied,
576+
required_block: uiProof.status !== 'not_applicable' && !uiProofSatisfied ? 'ui-proof-failed' : null,
577+
};
578+
const blockedOn = [
579+
...(artifactStatus.satisfied ? [] : ['artifacts']),
580+
...(uiProofGate.blocks_verification ? ['ui_proof'] : []),
581+
];
582+
const closureVerified = legacyVerified && artifactStatus.satisfied && uiProofSatisfied;
363583

364584
const result = {
365585
phase: normalizePhaseToken(phaseNum),
@@ -368,9 +588,17 @@ export function cmdVerify(...args) {
368588
summaries: matchingSummaries,
369589
artifacts,
370590
allExist: artifacts.every((artifact) => artifact.exists),
371-
verified: matchingPlans.length > 0 && matchingSummaries.length > 0,
591+
artifact_status: artifactStatus,
592+
uiProof,
593+
verified: closureVerified,
594+
legacy_verified: legacyVerified,
595+
phase_artifacts_present: legacyVerified,
596+
ui_proof: uiProofGate,
597+
blocked_on: blockedOn,
598+
blocks_verification: blockedOn.length > 0,
372599
};
373600
output(result);
601+
if (!closureVerified && legacyVerified) process.exitCode = 1;
374602
}
375603

376604
export function cmdScaffold(...args) {

bin/lib/ui-proof.mjs

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,41 @@ function normalizeObservedBundle(entry) {
446446
};
447447
}
448448

449+
function comparisonFixHint(code) {
450+
const hints = {
451+
invalid_observed_bundle: 'Fix the observed proof bundle metadata, then rerun ui-proof compare.',
452+
unsatisfied_observed_claim_status: 'Record a passed observed claim only after the changed UI state has been exercised and evidenced.',
453+
unsatisfied_observed_comparison_status: 'Set comparison_status_by_slot to satisfied only for slots backed by matching observations and artifacts.',
454+
missing_required_evidence_kind: 'Add observed evidence for every evidence kind required by the planned slot, or narrow the planned slot before verification.',
455+
human_evidence_cannot_bypass_required_non_human_evidence: 'Add the missing non-human evidence; human approval may narrow or waive but cannot replace it.',
456+
route_state_mismatch: 'Capture proof for the exact planned route/state, or update the plan before execution.',
457+
environment_mismatch: 'Capture proof in the planned environment, or record a narrowed claim limit and rerun comparison.',
458+
viewport_mismatch: 'Capture proof for the planned viewport, or narrow the viewport claim explicitly.',
459+
requirement_mismatch: 'Declare the planned requirement id in the observed proof bundle scope.',
460+
claim_mismatch: 'Keep the planned and observed claims identical so proof maps to the exact UI assertion.',
461+
observation_claim_mismatch: 'Add a passed observation that supports the exact planned claim.',
462+
observation_route_state_mismatch: 'Attach observations to the exact planned route/state.',
463+
missing_supporting_observation_evidence_kind: 'Add passed supporting observations for each required evidence kind.',
464+
unsatisfied_proof_step: 'Rerun or replace failing proof steps before claiming the slot is satisfied.',
465+
missing_manual_acceptance_evidence: 'Record human evidence when the planned slot requires manual acceptance.',
466+
missing_manual_acceptance_observation: 'Add a passed human observation for manual acceptance.',
467+
unsatisfied_observation_result: 'Resolve failed observations or classify the slot as partial, waived, or deferred.',
468+
missing_minimum_observation: 'Add observations covering every planned minimum observation.',
469+
missing_claim_limit: 'Preserve the planned claim limit in the observed proof bundle.',
470+
missing_expected_artifact_type: 'Attach the planned artifact type, such as screenshot, report, trace, or DOM snapshot.',
471+
missing_observed_bundle: 'Create an observed UI proof bundle for the planned slot, or explicitly waive/defer the slot with claim narrowing.',
472+
};
473+
return hints[code] || 'Fix the proof issue, rerun the comparison, and keep the slot partial until evidence matches the plan.';
474+
}
475+
476+
function decorateComparisonIssue(issue) {
477+
return {
478+
severity: issue.severity || 'blocker',
479+
fix_hint: issue.fix_hint || issue.fix || comparisonFixHint(issue.code),
480+
...issue,
481+
};
482+
}
483+
449484
function compareSlotToBundle(slot, slotIdValue, observed) {
450485
const issues = [];
451486
const bundle = observed.bundle;
@@ -648,15 +683,15 @@ function compareSlotToBundle(slot, slotIdValue, observed) {
648683
}
649684

650685
const status = issues.length === 0 ? 'satisfied' : (bundleStatus === 'missing' ? 'missing' : 'partial');
651-
return { status, issues, source: observed.source };
686+
return { status, issues: issues.map(decorateComparisonIssue), source: observed.source };
652687
}
653688

654689
export function compareUiProofSlots(plannedSlots, observedBundles) {
655690
const slots = normalizeArray(plannedSlots);
656691
const slotValidation = validateUiProofSlots(slots);
657692
const bundles = normalizeArray(observedBundles).map(normalizeObservedBundle);
658693
const results = [];
659-
const errors = [...slotValidation.errors];
694+
const errors = slotValidation.errors.map(decorateComparisonIssue);
660695

661696
for (const observed of bundles) {
662697
if (!observed.validation.valid) {
@@ -680,7 +715,7 @@ export function compareUiProofSlots(plannedSlots, observedBundles) {
680715
code: 'missing_observed_bundle',
681716
path: 'scope.slot_ids',
682717
message: `No observed UI proof bundle declares planned slot ${slotIdValue}.`,
683-
}],
718+
}].map(decorateComparisonIssue),
684719
});
685720
continue;
686721
}
@@ -706,7 +741,7 @@ export function compareUiProofSlots(plannedSlots, observedBundles) {
706741
? 'missing'
707742
: 'partial';
708743

709-
return { status, slots: results, errors };
744+
return { status, slots: results, errors: errors.map(decorateComparisonIssue) };
710745
}
711746

712747
export function validateUiProofBundle(bundle, options = {}) {

distilled/workflows/verify.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Before starting, read these files:
1414
5. From the SUMMARY.md loaded in step 3, if a `<judgment>` section is present - read `<anti_regression>` rules as additional verification targets: confirm that invariants listed there were not broken by execution. Read `<active_constraints>` to calibrate verification scope.
1515
6. The relevant codebase files - the code that was actually built
1616
7. **Session-boundary fallback:** If the SUMMARY.md loaded in step 3 has no `<judgment>` section, check whether `.planning/.continue-here.bak` exists. If it does, read its `<judgment>` section. Treat `<anti_regression>` rules as additional verification targets and `<active_constraints>` to calibrate verification scope (same usage as step 5). After reading, run `node .planning/bin/gsdd.mjs file-op delete .planning/.continue-here.bak --missing ok` (auto-clean).
17+
8. `node .planning/bin/gsdd.mjs control-map --json` to reconcile workflow/lifecycle state and checkpoint presence (`.planning/.continue-here.md`) before deciding pass/fail.
1718

1819
Establish your verification basis (must-have sources, requirement scope, previous report status) before beginning code inspection. Do not jump to loose file reading until this basis is explicit.
1920

@@ -130,7 +131,7 @@ Note: this step does NOT replace levels 1–3. An artifact can satisfy the evide
130131

131132
<ui_proof_comparison>
132133
If the plan defines non-empty `ui_proof_slots`, compare planned UI proof against observed bundles before closure. Prefer `gsdd ui-proof compare <planned-slots-json> [observed-bundle-json ...]` when planned slots are available as JSON or fenced JSON; otherwise perform the same field-by-field comparison and record reduced assurance if no deterministic command could run. If the plan records only `no_ui_proof_rationale`, verify the rationale instead of requiring a bundle. Each observed bundle must include top-level `proof_bundle_version`, `scope`, `route_state`, `environment`, `viewport`, `evidence_inputs`, `commands_or_manual_steps`, `observations`, `artifacts`, `privacy`, `result`, and `claim_limits`.
133-
Classify each slot as exactly one of: `satisfied`, `partial`, `missing`, `waived`, `deferred`, or `not_applicable`. Waiver/deferment narrows the claim; it is not proof. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities mapped onto existing evidence kinds, not new evidence kinds. Artifact count is never proof; each artifact must tie to the slot claim, route/state, observation, artifact path/link, privacy metadata, and claim limit.
134+
Classify each slot as exactly one of: `satisfied`, `partial`, `missing`, `waived`, `deferred`, or `not_applicable`. Deterministic comparison issues include `severity` and `fix_hint`; use those as the normal repair feedback loop before closing verification. Waiver/deferment narrows the claim; it is not proof. Screenshots, traces, videos, reports, accessibility scans, Gherkin, visual diffs, and manual notes are artifact types or activities mapped onto existing evidence kinds, not new evidence kinds. Artifact count is never proof; each artifact must tie to the slot claim, route/state, observation, artifact path/link, privacy metadata, and claim limit.
134135
For live UI runtime proof, expect `agent-browser` as the default captured tool unless the observed bundle explains a project-native equivalent or an availability constraint. Do not fail solely because another browser tool was used, but downgrade vague proof that lacks exact route/state, planned viewport coverage or rationale, interactive steps/refs where relevant, screenshot/report artifacts, or relevant console/network observations. Existing Playwright tests count as canonical repeatable regression evidence, not a replacement for scoped runtime evidence when the slot requires `runtime`.
135136
Artifact privacy metadata must include `visibility`, `retention`, `sensitivity`, and `safe_to_publish`; raw screenshots, traces, videos, DOM snapshots, and reports default to local-only and unsafe unless sanitized. Run `gsdd ui-proof validate <path>` or treat `gsdd health` E10 as blocking; add `--claim <...>` when relying on the bundle for public, tracked, delivery, release, or publication proof. Visual taste, accessibility judgment, baseline acceptance, subjective polish/layout quality, and privacy publication require human evidence or explicit waiver; human approval does not replace required `code`, `test`, `runtime`, or `delivery` evidence. Source annotations, AST/cAST findings, semantic search, comments, and Semble-like retrieval are discovery hints only.
136137
</ui_proof_comparison>

docs/USER-GUIDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ Other CLI commands that remain available outside the first-run path:
223223
| Command | Purpose |
224224
|---------|---------|
225225
| `gsdd find-phase [N]` | Show phase info as JSON (for agent consumption) |
226-
| `gsdd verify <N>` | Run artifact checks for phase N |
226+
| `gsdd verify <N>` | Run phase artifact and UI-proof closure checks for phase N; exits nonzero when verification is blocked |
227227
| `gsdd scaffold phase <N> [name]` | Create a new phase plan file |
228228

229229
### Platform flags for `--tools`

0 commit comments

Comments
 (0)