devkade
diff --git a/‎README.md‎
Lines changed: 3 additions & 1 deletion b/‎README.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/domain/objective.ts‎
Lines changed: 21 additions & 8 deletions b/‎src/domain/objective.ts‎
Lines changed: 21 additions & 8 deletions
@@ -251,7 +251,9 @@ Runtime storage, adapter configuration, and worker retention are described in [`
 
 Future learning-runtime boundaries are designed in [`docs/learning-runtime-boundaries.md`](docs/learning-runtime-boundaries.md). That document connects existing `WorkflowState` and RunContract projection responsibilities to a future `RunState` execution envelope while separating completion authority, runtime readiness authority, and advisory evaluation/learning signals.
 
-The domain `PolicySelector` in `src/domain/policy-selector.ts` is an advisory pre-dispatch primitive. It generates a fixed initial policy set, simulates objective-weighted candidate outcomes, records a selected policy plus rejected alternatives, and emits prediction ids that reward-ledger entries can later calibrate. It does not launch agents, mutate workflow state, or hard-block execution from simulated score alone.
+The domain `PolicySelector` in `src/domain/policy-selector.ts` is an advisory pre-dispatch primitive. It generates a fixed initial policy set across conservative, balanced, aggressive, high-assurance, and learning-exploration strategies; simulates objective-weighted candidate outcomes from task complexity, expected module touch count, dependency depth, adapter mix, isolation mode, verification depth, historical success, and recent reward calibration; records estimator outputs for conflict risk, regression risk, repair likelihood, elapsed/tool cost, review burden, learning value, confidence, and utility; and emits prediction ids that reward-ledger entries can later calibrate. Human overrides are explicit (`selector: "human"` plus reason) and remain bounded by exploration/conflict/regression safety caps. The selector does not launch agents, mutate workflow state, or hard-block execution from simulated score alone.
+
+The objective/reward domain in `src/domain/objective.ts` converts evidence-backed `EvaluationResult` records into append-only `reward-ledger.v1` events. Reward records include outcome status, prediction-vs-actual delta, penalty taxonomy, anti-Goodhart checks tied to [`docs/runcontract-harness-evaluator.md`](docs/runcontract-harness-evaluator.md), advisory `PolicyHint` values, and calibration metadata. Reward data may inform future `PolicySelection` records, but it must not silently mutate objective weights, selected policy, worker count, adapter choices, or completion authority; human-approved objective calibration must be recorded explicitly.
 
 ## Thin Harness Standard
 
 
@@ -12,13 +12,16 @@ export interface GuardrailViolation { guardrailId: string; reason: string }
 export interface EvaluationResult { schemaVersion: "evaluation-result.v1"; id: string; objectiveId: string; target: ObjectiveTarget; score: number; verdict: EvaluationVerdict; metricScores: MetricScore[]; guardrailViolations: GuardrailViolation[]; reasons: string[]; requiredRepairs: string[]; evidenceRefs: string[]; at: string }
 export interface EvaluateObjectiveInput { id: string; target: ObjectiveTarget; metricValues: Record<string, number>; guardrailChecks: GuardrailCheck[]; evidenceRefs: string[]; at: string }
 export type RewardOutcomeStatus = "success" | "repair" | "failure";
+export type RewardPenaltyKind = "repair" | "regression" | "conflict" | "stale_evidence" | "anti_goodhart" | "human_rejection" | "cost";
 export interface PolicyPrediction { id: string; policyId: string; predictedScore: number; confidence: number }
 export interface RewardLedgerContext { runId: string; taskId?: string; workerId?: string }
 export interface RewardActualOutcome { status: RewardOutcomeStatus; score: number; elapsedSeconds: number; repairCount: number }
-export interface RewardPenalty { id: string; amount: number; reason: string }
-export interface PolicyHint { id: string; policyId: string; summary: string; authority: "advisory" }
-export interface RewardLedgerEvent { schemaVersion: "reward-ledger.v1"; id: string; at: string; context: RewardLedgerContext; selectedPolicyId: string; prediction: PolicyPrediction; evaluation: EvaluationResult; actualOutcome: RewardActualOutcome; reward: number; predictionDelta: number; penalties: RewardPenalty[]; policyHints: PolicyHint[] }
-export type RewardLedgerAppendInput = Omit<RewardLedgerEvent, "schemaVersion" | "reward" | "predictionDelta" | "penalties"> & { penalties?: RewardPenalty[] };
+export interface RewardPenalty { id: string; kind: RewardPenaltyKind; amount: number; reason: string; evidenceRefs: string[] }
+export interface AntiGoodhartCheck { id: string; passed: boolean; concern: string; evidenceRefs: string[] }
+export interface RewardCalibration { predictionId: string; predictionDelta: number; confidence: number; humanApprovedWeightChange: boolean; notes: string[] }
+export interface PolicyHint { id: string; policyId: string; summary: string; authority: "advisory"; strength?: "weak" | "normal" | "strong"; source?: "reward_ledger" | "human" | "policy_simulation" }
+export interface RewardLedgerEvent { schemaVersion: "reward-ledger.v1"; id: string; at: string; context: RewardLedgerContext; selectedPolicyId: string; prediction: PolicyPrediction; evaluation: EvaluationResult; actualOutcome: RewardActualOutcome; reward: number; predictionDelta: number; penalties: RewardPenalty[]; antiGoodhartChecks: AntiGoodhartCheck[]; calibration: RewardCalibration; policyHints: PolicyHint[] }
+export type RewardLedgerAppendInput = Omit<RewardLedgerEvent, "schemaVersion" | "reward" | "predictionDelta" | "penalties" | "calibration"> & { penalties?: RewardPenalty[]; calibration?: Partial<RewardCalibration> };
 
 export function evaluateObjective(objective: ObjectiveFunction, input: EvaluateObjectiveInput): EvaluationResult {
   assertValidObjective(objective);
@@ -45,10 +48,13 @@ export function evaluateObjective(objective: ObjectiveFunction, input: EvaluateO
 export function appendRewardLedgerEvent(ledger: RewardLedgerEvent[], input: RewardLedgerAppendInput): RewardLedgerEvent[] {
   assertValidRewardAppendInput(input);
   const penalties = input.penalties ?? [];
-  const penaltyTotal = penalties.reduce((sum, penalty) => sum + penalty.amount, 0);
+  const antiGoodhartPenalty = input.antiGoodhartChecks.some((check) => !check.passed) ? 0.15 : 0;
+  const penaltyTotal = penalties.reduce((sum, penalty) => sum + penalty.amount, antiGoodhartPenalty);
   const outcomePenalty = input.actualOutcome.status === "success" ? 0 : input.actualOutcome.status === "repair" ? 0.2 : 0.5;
   const reward = roundScore(input.actualOutcome.score - outcomePenalty - input.actualOutcome.repairCount * 0.05 - Math.min(input.actualOutcome.elapsedSeconds / 10_000, 0.2) - penaltyTotal);
-  const event: RewardLedgerEvent = { schemaVersion: "reward-ledger.v1", id: input.id, at: input.at, context: { ...input.context }, selectedPolicyId: input.selectedPolicyId, prediction: { ...input.prediction }, evaluation: cloneEvaluation(input.evaluation), actualOutcome: { ...input.actualOutcome }, reward, predictionDelta: roundScore(input.actualOutcome.score - input.prediction.predictedScore), penalties: penalties.map((penalty) => ({ ...penalty })), policyHints: input.policyHints.map((hint) => ({ ...hint, authority: "advisory" })) };
+  const predictionDelta = roundScore(input.actualOutcome.score - input.prediction.predictedScore);
+  const calibration: RewardCalibration = { predictionId: input.prediction.id, predictionDelta, confidence: input.prediction.confidence, humanApprovedWeightChange: input.calibration?.humanApprovedWeightChange ?? false, notes: [...(input.calibration?.notes ?? [])] };
+  const event: RewardLedgerEvent = { schemaVersion: "reward-ledger.v1", id: input.id, at: input.at, context: { ...input.context }, selectedPolicyId: input.selectedPolicyId, prediction: { ...input.prediction }, evaluation: cloneEvaluation(input.evaluation), actualOutcome: { ...input.actualOutcome }, reward, predictionDelta, penalties: penalties.map((penalty) => ({ ...penalty, evidenceRefs: [...penalty.evidenceRefs] })), antiGoodhartChecks: input.antiGoodhartChecks.map((check) => ({ ...check, evidenceRefs: [...check.evidenceRefs] })), calibration, policyHints: input.policyHints.map((hint) => ({ ...hint, authority: "advisory" })) };
   return [...ledger.map(cloneRewardLedgerEvent), event];
 }
 
@@ -90,7 +96,7 @@ function cloneEvaluation(evaluation: EvaluationResult): EvaluationResult {
 }
 
 function cloneRewardLedgerEvent(event: RewardLedgerEvent): RewardLedgerEvent {
-  return { ...event, context: { ...event.context }, prediction: { ...event.prediction }, evaluation: cloneEvaluation(event.evaluation), actualOutcome: { ...event.actualOutcome }, penalties: event.penalties.map((penalty) => ({ ...penalty })), policyHints: event.policyHints.map((hint) => ({ ...hint })) };
+  return { ...event, context: { ...event.context }, prediction: { ...event.prediction }, evaluation: cloneEvaluation(event.evaluation), actualOutcome: { ...event.actualOutcome }, penalties: event.penalties.map((penalty) => ({ ...penalty, evidenceRefs: [...penalty.evidenceRefs] })), antiGoodhartChecks: event.antiGoodhartChecks.map((check) => ({ ...check, evidenceRefs: [...check.evidenceRefs] })), calibration: { ...event.calibration, notes: [...event.calibration.notes] }, policyHints: event.policyHints.map((hint) => ({ ...hint })) };
 }
 
 function assertValidRewardAppendInput(input: RewardLedgerAppendInput): void {
@@ -100,13 +106,20 @@ function assertValidRewardAppendInput(input: RewardLedgerAppendInput): void {
   assertValidScore("actualOutcome score", input.actualOutcome.score);
   if (!Number.isFinite(input.actualOutcome.elapsedSeconds) || input.actualOutcome.elapsedSeconds < 0) throw new Error("elapsedSeconds must be non-negative");
   if (!Number.isInteger(input.actualOutcome.repairCount) || input.actualOutcome.repairCount < 0) throw new Error("repairCount must be a non-negative integer");
-  for (const penalty of input.penalties ?? []) if (!Number.isFinite(penalty.amount) || penalty.amount < 0) throw new Error(`penalty ${penalty.id} amount must be non-negative`);
+  for (const penalty of input.penalties ?? []) {
+    if (!penalty.id.trim()) throw new Error("penalty id is required");
+    if (!Number.isFinite(penalty.amount) || penalty.amount < 0) throw new Error(`penalty ${penalty.id} amount must be non-negative`);
+    if (!penalty.evidenceRefs.length) throw new Error(`penalty ${penalty.id} requires evidence refs`);
+  }
+  for (const check of input.antiGoodhartChecks) if (!check.id.trim() || !check.concern.trim() || !check.evidenceRefs.length) throw new Error("anti-Goodhart checks require id, concern, and evidence refs");
 }
 
 function validateParsedRewardLedgerEvent(value: unknown): RewardLedgerEvent {
   if (!value || typeof value !== "object") throw new Error("reward ledger event must be an object");
   const event = value as Partial<RewardLedgerEvent>;
   if (event.schemaVersion !== "reward-ledger.v1") throw new Error("unsupported reward ledger schema version");
+  if (!event.calibration?.predictionId) throw new Error("reward ledger event requires calibration prediction id");
+  if (!Array.isArray(event.antiGoodhartChecks)) throw new Error("reward ledger event requires anti-Goodhart checks");
   return event as RewardLedgerEvent;
 }