Skip to content

Commit 365ddc1

Browse files
committed
fix(audit-cleanup): unblock Helm + move self-audit out of tx (#3268)
- .kontinuous/templates/audit-cleanup-cron.yaml: `.Values.app.imagePackage` is scoped to the app subchart and renders as nil from the project template context (build failure: "nil pointer evaluating interface {}.imagePackage"). Inline the literal `app` since values.yaml:9 pins it statically. - .kontinuous/templates/audit-cleanup-cron.yaml: switch `restartPolicy: OnFailure` to `Never` — the combination with `backoffLimit: 0` was contradictory (container would restart on failure but the Job would not re-queue). Both now agree on fail-fast. - scripts/audit-cleanup.mjs: move the success self-audit INSERT outside the transaction. A failure writing the audit row must not roll back cleanup work that already committed. The failure-path log was already out-of-tx. - scripts/audit-cleanup.mjs: use `fileURLToPath` + `realpathSync` for the `isMain` guard so the CLI fires correctly under pnpm's content-addressable store and Docker bind-mounts that hand symlinked paths to Node. - integration test: null-guard `afterAll` so teardown cannot crash on an un-initialized `sql` if `beforeAll` threw.
1 parent b4d99cd commit 365ddc1

3 files changed

Lines changed: 55 additions & 20 deletions

File tree

.kontinuous/templates/audit-cleanup-cron.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,19 @@ spec:
1919
labels:
2020
app: audit-cleanup-daily
2121
spec:
22-
restartPolicy: OnFailure
22+
# `Never` is consistent with `backoffLimit: 0`: the kubelet does not
23+
# restart the failed container and the Job does not re-queue a new
24+
# pod. A failed cleanup waits for the next daily cron run.
25+
restartPolicy: Never
2326
securityContext:
2427
runAsUser: 1000
2528
runAsNonRoot: true
2629
containers:
2730
- name: audit-cleanup
28-
image: "{{ .Values.global.registry }}/{{ .Values.global.imageProject }}/{{ .Values.global.imageRepository }}/{{ .Values.app.imagePackage }}:{{ .Values.global.imageTag }}"
31+
# `app` matches `.Values.app.imagePackage` in `values.yaml` — but
32+
# that lookup lands in the `app` subchart's scope, not the
33+
# project-level templates, so the literal is inlined here.
34+
image: "{{ .Values.global.registry }}/{{ .Values.global.imageProject }}/{{ .Values.global.imageRepository }}/app:{{ .Values.global.imageTag }}"
2935
securityContext:
3036
allowPrivilegeEscalation: false
3137
env:

packages/app/scripts/audit-cleanup.mjs

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import path from "node:path";
1+
import { realpathSync } from "node:fs";
2+
import { fileURLToPath } from "node:url";
23
import postgres from "postgres";
34

45
/**
@@ -11,9 +12,11 @@ import postgres from "postgres";
1112
* (high-volume access logs containing IP addresses)
1213
* - long retention (365 d by default): every other category (security logs)
1314
*
14-
* Both DELETEs and the self-audit INSERT run inside a single transaction — if
15-
* anything fails, the whole operation rolls back and the next daily run
16-
* re-attempts the purge.
15+
* The two DELETEs are atomic (single transaction). The success self-audit is
16+
* appended after the transaction commits — a failure writing the audit row
17+
* must not roll back work we already accomplished. Cleanup failures go
18+
* through `logFailure`, which inserts outside any transaction so the record
19+
* survives the rollback.
1720
*
1821
* Env vars:
1922
* - DATABASE_URL (or POSTGRES_* fallback, same convention as migrate.mjs)
@@ -112,7 +115,7 @@ export async function runAuditCleanup({
112115
const shortThreshold = subtractDays(now, shortRetentionDays);
113116
const longThreshold = subtractDays(now, longRetentionDays);
114117

115-
return await sql.begin(async (txRaw) => {
118+
const summary = await sql.begin(async (txRaw) => {
116119
// postgres-js `TransactionSql` is declared as `Omit<Sql, ...>`, which
117120
// — quirk of TS's `Omit` — strips the call signatures on Sql. Cast
118121
// back to Sql so we can use the template-tag + dynamic-array helper.
@@ -130,31 +133,46 @@ export async function runAuditCleanup({
130133

131134
const deletedShort = Number(shortResult.count ?? 0);
132135
const deletedLong = Number(longResult.count ?? 0);
133-
const deletedTotal = deletedShort + deletedLong;
136+
return {
137+
deletedShort,
138+
deletedLong,
139+
deletedTotal: deletedShort + deletedLong,
140+
};
141+
});
134142

135-
// `created_at` is NOT NULL with no SQL-level default — the drizzle
136-
// schema supplies `$defaultFn(() => new Date())` at the ORM layer,
137-
// which the raw SQL path has to replicate explicitly.
138-
await tx`
143+
// Self-audit runs OUTSIDE the transaction so a failure to record the
144+
// audit row (constraint violation, disk full, etc.) cannot roll back
145+
// cleanup work that has already been committed. We still log success
146+
// via an INSERT so the cleanup cron is visible in `audit.action_log`.
147+
// `created_at` is NOT NULL with no SQL-level default — the drizzle
148+
// schema supplies `$defaultFn(() => new Date())` at the ORM layer,
149+
// which the raw SQL path has to replicate explicitly.
150+
try {
151+
await sql`
139152
INSERT INTO audit.action_log (id, created_at, action, category, status, metadata)
140153
VALUES (
141154
${crypto.randomUUID()},
142155
${new Date()},
143156
${AUDIT_CLEANUP_ACTION},
144157
${AUDIT_CLEANUP_CATEGORY},
145158
'success',
146-
${tx.json({
147-
deletedShort,
148-
deletedLong,
149-
deletedTotal,
159+
${sql.json({
160+
deletedShort: summary.deletedShort,
161+
deletedLong: summary.deletedLong,
162+
deletedTotal: summary.deletedTotal,
150163
shortRetentionDays,
151164
longRetentionDays,
152165
})}
153166
)
154167
`;
168+
} catch (auditError) {
169+
console.error(
170+
"[audit-cleanup] Cleanup succeeded but self-audit insert failed:",
171+
auditError,
172+
);
173+
}
155174

156-
return { deletedShort, deletedLong, deletedTotal };
157-
});
175+
return summary;
158176
}
159177

160178
/**
@@ -186,7 +204,14 @@ async function logFailure(sql, error) {
186204
const isMain = (() => {
187205
const entry = process.argv[1];
188206
if (!entry) return false;
189-
return import.meta.url === `file://${path.resolve(entry)}`;
207+
// `realpathSync` resolves symlinks — needed because pnpm's content-
208+
// addressable store or Docker bind-mounts may hand Node a symlink path
209+
// that doesn't match `import.meta.url`'s canonicalized form.
210+
try {
211+
return fileURLToPath(import.meta.url) === realpathSync(entry);
212+
} catch {
213+
return false;
214+
}
190215
})();
191216

192217
if (isMain) {

packages/app/src/server/audit/__tests__/auditCleanupScript.integration.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ import { runAuditCleanup } from "#scripts/audit-cleanup.mjs";
1919
import { env } from "~/env.js";
2020

2121
describe("audit-cleanup.mjs (integration)", () => {
22-
let sql: ReturnType<typeof postgres>;
22+
// Definite-assignment assertion — initialized synchronously in `beforeAll`.
23+
// If `beforeAll` throws, vitest skips the tests and reports the original
24+
// error; any afterAll failure from an undefined `sql` is suppressed below.
25+
let sql!: ReturnType<typeof postgres>;
2326

2427
beforeAll(() => {
2528
// DATABASE_URL is populated by `integration-setup.ts` before this file
@@ -28,6 +31,7 @@ describe("audit-cleanup.mjs (integration)", () => {
2831
});
2932

3033
afterAll(async () => {
34+
if (!sql) return;
3135
await sql`DELETE FROM audit.action_log`;
3236
await sql.end();
3337
});

0 commit comments

Comments
 (0)