Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions .github/workflows/ci-update-canonization.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,22 +228,47 @@ jobs:
echo "G3-U2 PASS — current=$CURRENT target=$TARGET"

# ------------------------------------------------------------------
# G3-U4 — preflight enforcement: all five checks must appear in the
# plan's Preflight block. We don't require every check to pass —
# CI environments don't have nftband running — but every check
# MUST be evaluated and reported.
# G3-U4 — preflight enforcement: all seven checks must appear in the
# plan's Preflight block (PR-16 P-1..P-5 + PR-17 P-6..P-7). We don't
# require every check to pass — CI environments don't have nftband
# running — but every check MUST be evaluated and reported.
# ------------------------------------------------------------------
- name: G3-U4 — preflight reports all 5 checks
- name: G3-U4 — preflight reports all 7 checks
shell: bash
run: |
set -Eeuo pipefail
for name in authority_nftban service_nftband_active artifact_version_file dependency_nft state_no_stale_in_progress; do
for name in \
authority_nftban \
service_nftband_active \
artifact_version_file \
dependency_nft \
state_no_stale_in_progress \
rebuild_recovery_available \
install_origin_coherent; do
grep -q "$name" /tmp/dryrun.out || {
echo "::error::G3-U4 FAIL: preflight check '$name' missing from plan"
exit 1
}
done
echo "G3-U4 PASS — all 5 preflight checks reported"
echo "G3-U4 PASS — all 7 preflight checks reported"

# ------------------------------------------------------------------
# G3-U4-deepen (PR-17) — recovery planning metadata must be rendered
# so apply (PR-18) has explicit input about rollback reachability.
# ------------------------------------------------------------------
- name: G3-U4-deepen — recovery plan metadata rendered
shell: bash
run: |
set -Eeuo pipefail
grep -q "Recovery plan" /tmp/dryrun.out || {
echo "::error::G3-U4-deepen FAIL: plan has no Recovery block"
exit 1
}
grep -q "Mechanism" /tmp/dryrun.out || {
echo "::error::G3-U4-deepen FAIL: Recovery block missing Mechanism"
exit 1
}
echo "G3-U4-deepen PASS — recovery planning metadata present"

summary:
name: Update Canonization summary
Expand Down
25 changes: 15 additions & 10 deletions cmd/nftban-installer/update_dryrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,30 @@ func runUpdateDryRun(ctx context.Context, exec executor.Executor, sf *state.Stat
}
log.PhaseEnd("Detect")

// 2. Update-specific preflight (P-1 through P-5).
// 2. Install origin — declared flag wins; fall back to package-manager
// probe (PR-17) so package installs that don't re-pass --rpm/--deb
// still get a correct plan.
origin := detectInstallOrigin(cfg)
if origin == "" {
origin = update.DetectInstallOrigin(exec, log)
}

// 3. Update-specific preflight (P-1..P-7 — PR-16 + PR-17).
log.Phase("Preflight")
pre := update.Preflight(exec, log)
pre := update.Preflight(exec, log, origin)
log.PhaseEnd("Preflight")

// 3. Version detection. sourceDir is empty in the package-install case;
// the update package treats that as non-fatal for PR-16 (package-install
// target detection lands in PR-17).
current, target, err := update.DetectVersions(exec, cfg.sourceDir, log)
// 4. Version detection — source tree wins; package manager is the
// fallback for package-install hosts (PR-17).
current, target, err := update.DetectVersions(exec, cfg.sourceDir, origin, log)
if err != nil {
log.Error("update dry-run: version detection failed: %v", err)
return state.ExitFailed
}

// 4. Install origin — drives which apply path PR-18 will use.
origin := detectInstallOrigin(cfg)

// 5. Assemble + render plan.
// 5. Assemble plan + attach PR-17 recovery metadata (planning-only).
plan := update.BuildPlan(pre, current, target, origin)
plan.AttachRecovery(update.BuildRecoveryPlan(exec))
plan.Render(os.Stdout)

// 6. Write a copy of the plan JSON to the state dir for audit/history
Expand Down
86 changes: 75 additions & 11 deletions internal/installer/update/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,39 +77,78 @@ type Plan struct {
// this abstract ("reuse rebuild pipeline"); PR-18 fills in concrete
// steps.
Actions []string `json:"actions,omitempty"`

// Recovery carries planning-only metadata about the rollback route that
// PR-18's apply step will rely on. PR-17 populates this but does NOT
// execute any recovery — that's PR-18 scope per INV-U-002.
Recovery *RecoveryPlan `json:"recovery,omitempty"`
}

// RecoveryPlan is PR-17 metadata describing the rollback path apply will
// follow on failure. It is DESCRIPTIVE only; no state mutation in PR-17.
type RecoveryPlan struct {
// Available reports whether a rollback to the prior known-good state
// is possible without operator intervention.
Available bool `json:"available"`

// Mechanism is the rollback mechanism that would be used — currently
// "rebuild" (validate-in-namespace + flush+load + v1.96 recovery), the
// only canonized recovery surface today.
Mechanism string `json:"mechanism"`

// Notes carries human-oriented context that helps an operator reason
// about recovery without digging into code (e.g. "prior state file
// present and terminal", "no prior install_state — fresh recovery").
Notes []string `json:"notes,omitempty"`
}

// PlanSchemaVersion is the current wire contract for Plan. Consumers should
// reject plans with unexpected schema versions.
const PlanSchemaVersion = "1.99.0"

// DetectVersions reads the current installed version from the FHS VERSION
// file and the target version from the source-tree VERSION (for source
// installs) or from the package metadata path.
// file and the target version from:
//
// For PR-16 we support source-install detection; package-install target
// detection is handled in PR-17.
func DetectVersions(exec executor.Executor, sourceDir string, log *logging.Logger) (current, target string, err error) {
// - <sourceDir>/VERSION when sourceDir is non-empty (source installs)
// - the package manager (rpm -q / dpkg -s) when origin is "rpm"/"deb"
// and sourceDir is empty (PR-17 scope)
//
// Missing target is non-fatal — the plan carries a warning downstream.
// Missing current IS fatal — we must know where we are before planning
// any transition (INV-U-002 rollbackability).
func DetectVersions(exec executor.Executor, sourceDir, origin string, log *logging.Logger) (current, target string, err error) {
current = readCurrentVersion(exec, log)

// Source install: target VERSION lives at <sourceDir>/VERSION.
// Priority 1: source tree VERSION (takes precedence over package query
// so a caller can test with a source tree even on a package-install host).
if sourceDir != "" {
tPath := filepath.Join(sourceDir, "VERSION")
if data, rErr := exec.ReadFile(tPath); rErr == nil {
target = strings.TrimSpace(string(data))
log.Debug("update: target version from source tree %s = %s", tPath, target)
} else {
log.Debug("update: source VERSION not readable at %s: %v", tPath, rErr)
}
}

// Priority 2 (PR-17): package manager query. Only consulted when source
// detection did not yield a result. We pass in `origin` rather than
// auto-detecting inside this function so the caller stays authoritative
// about the install-origin decision.
if target == "" && (origin == "rpm" || origin == "deb") {
if t, qErr := DetectPackageTarget(exec, origin, log); qErr != nil {
log.Warn("update: package target query failed: %v", qErr)
} else if t != "" {
target = t
log.Debug("update: target version from package manager (%s) = %s", origin, target)
}
}

if current == "" {
return current, target, fmt.Errorf("update: cannot detect current version (no VERSION file)")
}
if target == "" {
// PR-16: target detection for package installs lands in PR-17.
// For now, missing target is non-fatal — the plan records it and
// the preflight flags it as a warning.
log.Info("update: target version not yet detected (package-install detection lands in PR-17)")
log.Info("update: target version not detected (no source tree + no package ownership)")
}
return current, target, nil
}
Expand Down Expand Up @@ -172,6 +211,20 @@ func (p *Plan) Render(w io.Writer) {
fmt.Fprintln(w, "")
}

if p.Recovery != nil {
fmt.Fprintln(w, " Recovery plan (metadata only — apply lands in PR-18):")
availMark := "available"
if !p.Recovery.Available {
availMark = "NOT available — operator intervention may be required"
}
fmt.Fprintf(w, " Rollback : %s\n", availMark)
fmt.Fprintf(w, " Mechanism : %s\n", displayString(p.Recovery.Mechanism))
for _, n := range p.Recovery.Notes {
fmt.Fprintf(w, " · %s\n", n)
}
fmt.Fprintln(w, "")
}

if len(p.Actions) > 0 {
fmt.Fprintln(w, " Actions (dry-run — no mutation):")
for _, a := range p.Actions {
Expand Down Expand Up @@ -209,11 +262,22 @@ func BuildPlan(pre *PreflightResult, current, target, origin string) *Plan {
}
if target == "" {
p.Warnings = append(p.Warnings,
"target version not detected (package-install target detection lands in PR-17)")
"target version not detected (no source tree + no package ownership)")
}
if origin == "" {
p.Warnings = append(p.Warnings,
"install origin not detected — pass --source, --rpm, or --deb to disambiguate")
}
return p
}

// AttachRecovery decorates the plan with the PR-17 recovery metadata.
// Separate from BuildPlan so callers without an exec handle (tests) can
// still build a plan without synthesizing recovery state.
func (p *Plan) AttachRecovery(r *RecoveryPlan) {
p.Recovery = r
}

// displayVersion returns a placeholder for empty versions so the rendered
// table doesn't print blank cells that look like a bug.
func displayVersion(v string) string {
Expand Down
112 changes: 111 additions & 1 deletion internal/installer/update/preflight.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ type PreflightResult struct {
// Called by phaseDetect when cfg.mode == "upgrade".
//
// The function is READ-ONLY. It must never mutate host state.
func Preflight(exec executor.Executor, log *logging.Logger) *PreflightResult {
//
// origin is the operator-declared install origin ("rpm", "deb", "source",
// or ""). PR-17 uses it for the new P-7 coherence check.
func Preflight(exec executor.Executor, log *logging.Logger, origin string) *PreflightResult {
res := &PreflightResult{Passed: true}

// P-1 authority — the existing authority.Detect would give us a full
Expand Down Expand Up @@ -150,9 +153,116 @@ func Preflight(exec executor.Executor, log *logging.Logger) *PreflightResult {
logCheck(log, c)
}

// P-6 (PR-17) rebuild recovery available — the planning-only check
// that tells apply (PR-18) whether a clean rollback is reachable.
// We do NOT execute recovery here; we just report whether the
// prerequisites exist: terminal prior state + ip nftban table + nft
// binary. If any is missing, apply must treat rollback as
// operator-assisted rather than automatic (INV-U-002).
{
ok := rebuildRecoveryAvailable(exec)
c := PreflightCheck{
Name: "rebuild_recovery_available",
Passed: ok,
Severity: "warning",
}
if !ok {
c.Detail = "rebuild recovery may require operator intervention on failure — review before apply"
}
res.Checks = append(res.Checks, c)
logCheck(log, c)
}

// P-7 (PR-17) install origin coherent — the declared --rpm / --deb /
// --source flag must match the host's actual install origin. A
// mismatch (e.g. --rpm on a DEB host) is a hard warning: apply would
// route through the wrong delivery model. PR-17 detects this early.
{
declared := origin
detected := DetectInstallOrigin(exec, log)
coherent := declared == "" || detected == "" || declared == detected
c := PreflightCheck{
Name: "install_origin_coherent",
Passed: coherent,
Severity: "warning",
}
if !coherent {
c.Detail = "declared origin (" + declared + ") does not match detected origin (" + detected + ")"
}
res.Checks = append(res.Checks, c)
logCheck(log, c)
}

return res
}

// rebuildRecoveryAvailable reports whether the prerequisites for automatic
// rollback via rebuild recovery are present:
//
// - the prior install_state is terminal (COMMITTED / DEGRADED / FAILED_*)
// - ip nftban table exists (authority held)
// - nft binary is present (handled by P-4; we still check here so this
// predicate is self-contained for callers outside the full preflight)
//
// PR-17 scope: this is metadata only. PR-18 will wire it into the apply
// path via INV-U-002.
func rebuildRecoveryAvailable(exec executor.Executor) bool {
if !exec.NftTableExists("ip", "nftban") {
return false
}
if r := exec.Run("sh", "-c", "command -v nft >/dev/null 2>&1"); r.ExitCode != 0 {
return false
}
// Prior state must be readable OR absent (absent = fresh recovery is
// fine). If present but non-terminal (in-progress), recovery is not
// clean.
const p = "/var/lib/nftban/state/install_state"
if !exec.FileExists(p) {
return true
}
data, err := exec.ReadFile(p)
if err != nil {
return false
}
s := trim(string(data))
if s == "" {
return true
}
// Same terminal-state list as isStaleInProgress but inverted.
switch s {
case "COMMITTED", "DEGRADED",
"FAILED_SSH_UNKNOWN", "FAILED_AUTHORITY_ABORT", "FAILED_RENDER",
"FAILED_REBUILD", "FAILED_NO_FIREWALL", "FAILED_TAKEOVER":
return true
}
return false
}

// BuildRecoveryPlan derives the PR-17 recovery metadata from exec state.
// Planning-only: no mutation, no recovery execution.
func BuildRecoveryPlan(exec executor.Executor) *RecoveryPlan {
r := &RecoveryPlan{
Mechanism: "rebuild",
}
r.Available = rebuildRecoveryAvailable(exec)
const p = "/var/lib/nftban/state/install_state"
if !exec.FileExists(p) {
r.Notes = append(r.Notes,
"no prior install_state — a fresh recovery is clean (no rollback target)")
} else {
data, err := exec.ReadFile(p)
if err == nil {
r.Notes = append(r.Notes,
"prior install_state = "+trim(string(data)))
}
}
if !r.Available {
r.Notes = append(r.Notes,
"preconditions for automatic rollback unmet — apply will require operator assist on failure")
}
return r
}

// isStaleInProgress returns true if install_state exists AND is not a
// terminal state (COMMITTED / DEGRADED / FAILED_*). We treat an in-progress
// marker as a caution.
Expand Down
Loading
Loading