diff --git a/README.md b/README.md index 42bc6f2..7d8509b 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ postgresql-partition-manager run all - **Automatic provisioning** — create upcoming partitions ahead of time - **Cleanup management** — delete or detach outdated partitions with configurable retention +- **Lifecycle hooks** — execute shell commands or SQL at each stage of partition cleanup - **Configuration checking** — verify partitions match expected configuration - **Multiple intervals** — daily, weekly, monthly, quarterly, and yearly partitioning - **Flexible partition keys** — `date`, `timestamp`, `timestamptz`, and `uuid` (UUIDv7) columns diff --git a/cmd/run/run.go b/cmd/run/run.go index eb13b15..bb15133 100644 --- a/cmd/run/run.go +++ b/cmd/run/run.go @@ -26,7 +26,10 @@ const ( InvalidDateExitCode = 7 ) -var ErrUnsupportedPostgreSQLVersion = errors.New("unsupported PostgreSQL version") +var ( + ErrUnsupportedPostgreSQLVersion = errors.New("unsupported PostgreSQL version") + dryRun bool +) func RunCmd() *cobra.Command { runCmd := &cobra.Command{ @@ -38,6 +41,9 @@ func RunCmd() *cobra.Command { }, } + AllCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Preview which hooks would be executed without actually running them") + CleanupCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Preview which hooks would be executed without actually running them") + runCmd.AddCommand(AllCmd) runCmd.AddCommand(CheckCmd) runCmd.AddCommand(ProvisioningCmd) @@ -135,7 +141,7 @@ func initCmd() *ppm.PPM { log.Info("Work date", "work-date", workDate) - client := ppm.New(context.TODO(), *log, db, config.Partitions, workDate) + client := ppm.New(context.TODO(), *log, db, config.Partitions, workDate, config.ConnectionURL, config.Hooks, dryRun) if err = client.CheckServerRequirements(); err != nil { log.Error("Server is incompatible", "error", err) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 35bf3a0..451472a 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -51,9 +51,15 @@ Perform partitions provisioning, cleanup, and check **Usage:** ``` -postgresql-partition-manager run all +postgresql-partition-manager run all [flags] ``` +**Flags:** + +| Flag | Shorthand | Default | Description | +|------|-----------|---------|-------------| +| --dry-run | | false | Preview which hooks would be executed without actually running them | + **Inherited Flags:** | Flag | Shorthand | Default | Description | @@ -93,9 +99,15 @@ Remove outdated partitions **Usage:** ``` -postgresql-partition-manager run cleanup +postgresql-partition-manager run cleanup [flags] ``` +**Flags:** + +| Flag | Shorthand | Default | Description | +|------|-----------|---------|-------------| +| --dry-run | | false | Preview which hooks would be executed without actually running them | + **Inherited Flags:** | Flag | Shorthand | Default | Description | diff --git a/docs/development/hook-types.md b/docs/development/hook-types.md new file mode 100644 index 0000000..2d441af --- /dev/null +++ b/docs/development/hook-types.md @@ -0,0 +1,235 @@ +# Developing a Hook Type + +This page is for contributors who want to add a new **hook type** (a "runner") to +PostgreSQL Partition Manager — for example an `s3` type that archives a partition +to object storage. + +If you only want to *use* the existing hook types, see [Shell](../hooks/shell.md) and +[PostgreSQL](../hooks/postgresql.md) instead. + +The hook engine lives in the `internal/infra/hook` package. + +## Architecture + +A hook type is described by three pieces of behavior, bundled in a `typeHandler` +(in `registry.go`): + +| Field | Responsibility | +| ----------- | ----------------------------------------------------------- | +| `validate` | Check the raw `config` map at configuration-load time. | +| `resolve` | Render template variables and build a typed config. | +| `newRunner` | Build the runner that executes the hook. | + +These are wired together by the `registry` map. Everything else is +type-agnostic and does **not** change when you add a type: + +- **Orchestrator** — resolves, logs, executes, applies the `on_failure` policy, records metrics. +- **Executor** — adds timeout and retry/backoff around any runner. +- **RegistryRunner** — dispatches to the right runner based on the hook type. + +The lifecycle of a single hook: + +```text +HookEntry (raw config map) + │ validate ← at config load + │ resolve ← templates applied → typed config + ▼ +ResolvedHook { Type, Config, ConnectionURL, PartitionContext, ... } + │ Executor (timeout + retry) + │ RegistryRunner → runner for Type + ▼ +Runner.Run(ctx, hook) ← your runner does the work +``` + +## Interfaces you implement + +```go +// RenderedConfig is your config after template variables are substituted. +// It only has to describe itself for structured logging. +type RenderedConfig interface { + LogAttrs() []any // key/value pairs, e.g. []any{"bucket", c.Bucket} +} + +// Runner executes the hook. The context carries the timeout deadline. +type Runner interface { + Run(ctx context.Context, hook *ResolvedHook) error +} +``` + +At execution time your runner receives a `*ResolvedHook`. Type-assert +`hook.Config` to your concrete config type. `hook.ConnectionURL` carries the +database connection details if you need them. + +!!! warning + `LogAttrs` output is emitted at debug level and in `--dry-run`. Only return + fields that are safe to log — never secrets. + +## Step by step: an `s3` runner + +Everything below goes in a single new file `s3_runner.go`, plus **one line** in +the `registry` map and **one** type constant. + +### 1. Add the type constant + +In `config.go`: + +```go +const ( + ShellType HookType = "shell" + PostgreSQLType HookType = "postgresql" + S3Type HookType = "s3" // new +) +``` + +The type-validity check in `HookEntry.Validate` is registry-driven, so +registering the handler (step 5) is what makes `type: s3` accepted — there is no +switch to update. + +### 2. Define the rendered config and `LogAttrs` + +```go +type S3Config struct { + Bucket string `mapstructure:"bucket"` + Key string `mapstructure:"key"` +} + +var _ RenderedConfig = (*S3Config)(nil) + +func (c *S3Config) LogAttrs() []any { + return []any{"bucket", c.Bucket, "key", c.Key} +} +``` + +### 3. Validate the raw config + +Runs at config-load time, before any partition work. Use static, wrapped error +variables (mirror the existing `Err*` vars in `config.go`): + +```go +var ( + ErrS3ConfigRequired = errors.New("config section is required for s3 hooks") + ErrS3BucketRequired = errors.New("'bucket' is required in config for s3 hooks") +) + +func validateS3Config(config map[string]interface{}) error { + if config == nil { + return ErrS3ConfigRequired + } + if _, ok := config["bucket"]; !ok { + return ErrS3BucketRequired + } + return nil +} +``` + +### 4. Resolve templates + +Render every user-supplied string field with `Render(value, partition)` so +[template variables](../hooks/index.md#template-variables) such as `{{.Schema}}` and +`{{.Table}}` work: + +```go +func resolveS3Config(config map[string]interface{}, partition PartitionContext) (RenderedConfig, error) { + cfg := &S3Config{} + + if v, ok := config["bucket"]; ok { + rendered, err := Render(fmt.Sprintf("%v", v), partition) + if err != nil { + return nil, fmt.Errorf("rendering bucket: %w", err) + } + cfg.Bucket = rendered + } + + if v, ok := config["key"]; ok { + rendered, err := Render(fmt.Sprintf("%v", v), partition) + if err != nil { + return nil, fmt.Errorf("rendering key: %w", err) + } + cfg.Key = rendered + } + + return cfg, nil +} +``` + +### 5. Implement the runner + +```go +var _ Runner = (*S3Runner)(nil) + +type S3Runner struct { + logger slog.Logger +} + +func NewS3Runner(logger slog.Logger) *S3Runner { + return &S3Runner{logger: logger} +} + +func (r *S3Runner) Run(ctx context.Context, hook *ResolvedHook) error { + cfg, ok := hook.Config.(*S3Config) + if !ok { + return fmt.Errorf("s3 configuration is nil for hook %q", hook.Name) + } + + r.logger.Debug("Executing s3 hook", "hook", hook.Name, "bucket", cfg.Bucket, "key", cfg.Key) + + // ... perform the upload, honoring ctx for the timeout deadline ... + + return nil +} +``` + +!!! tip + Honor `ctx` — it carries the per-hook timeout. Retry and backoff are handled + for you by the executor based on the hook's `retry` config, so do not add + your own retry loop. + +### 6. Register the handler + +This is the only edit outside your new file, in `registry.go`: + +```go +var registry = map[HookType]typeHandler{ + ShellType: { /* ... */ }, + PostgreSQLType: { /* ... */ }, + S3Type: { + validate: validateS3Config, + resolve: resolveS3Config, + newRunner: func(logger slog.Logger) Runner { return NewS3Runner(logger) }, + }, +} +``` + +That is all. The orchestrator, executor, dispatcher, logging, dry-run, metrics, +and config validation now support `type: s3`. + +## Checklist + +- [ ] `HookType` constant added in `config.go` +- [ ] Config struct implements `RenderedConfig` (`LogAttrs` + compile-time `var _`) +- [ ] `validateXxxConfig` with static, wrapped error variables +- [ ] `resolveXxxConfig` renders every templated field via `Render` +- [ ] Runner implements `Runner`, type-asserts `hook.Config`, and honors `ctx` +- [ ] Handler registered in the `registry` map +- [ ] Unit tests for the runner, resolve, and validate functions +- [ ] User documentation added under `docs/hooks/` and linked in `mkdocs.yml` +- [ ] `LogAttrs` exposes no secrets + +## Testing conventions + +Mirror the existing tests in the `internal/infra/hook` package: + +- **Runner** — table-driven success / failure / nil-config cases (see + `shell_runner_test.go`). For runners with external dependencies, inject a seam + (like `PostgreSQLRunner`'s `ConnectorFunc`) so tests don't hit the network. +- **Validation** — assert the specific `Err*` sentinel with `errors.Is` (see + `config_test.go`). +- **Resolution** — assert template variables are substituted, and that an + unknown variable surfaces an error (templates use `missingkey=error`). + +Run the suite before opening a pull request: + +```bash +make test +make lint +``` diff --git a/docs/development/index.md b/docs/development/index.md new file mode 100644 index 0000000..639244c --- /dev/null +++ b/docs/development/index.md @@ -0,0 +1,49 @@ +# Development + +Guides for contributors working on PostgreSQL Partition Manager itself, rather +than operating it. + +If you want to *use* PPM, start with [Getting Started](../getting-started.md) +and [Configuration](../configuration.md). + +## Getting set up + +The repository [`CONTRIBUTING.md`](https://github.com/qonto/postgresql-partition-manager/blob/main/CONTRIBUTING.md) +covers the full local development workflow: building, running the test suites +(unit, Bats, Helm), linting, and the PostgreSQL/Kubernetes dev environments. + +Quick reference: + +```bash +make build # Build the PPM binary +make test # Run unit tests with coverage +make lint # Run golangci-lint +``` + +## Topics + +| Guide | Description | +|-------|-------------| +| [Developing a Hook Type](hook-types.md) | Add a new hook runner (e.g. `s3`) to the hook engine | + +## Documentation + +This site is built with [MkDocs](https://www.mkdocs.org/) and the Material +theme. To preview changes locally: + +```bash +pip install -r requirements-docs.txt +mkdocs serve +``` + +Build and validate (treats warnings as errors, matching CI): + +```bash +mkdocs build --strict +``` + +!!! note + The [CLI Reference](../cli-reference.md) page is auto-generated from the + Cobra command tree with `make docs-generate`. Never edit it by hand, and + regenerate it in the same commit as any change to commands or flags in + `cmd/`. diff --git a/docs/hooks/index.md b/docs/hooks/index.md new file mode 100644 index 0000000..84e9dc3 --- /dev/null +++ b/docs/hooks/index.md @@ -0,0 +1,304 @@ +# Hooks + +Hooks allow you to execute custom actions at specific points during the partition cleanup lifecycle. Use them to archive data, run maintenance SQL, send notifications, or trigger external workflows before or after partitions are detached or dropped. + +## Overview + +Hooks are defined in the `hooks` section of your configuration and execute at four lifecycle events: + +1. **`before-detach`** — Before a partition is detached from the parent table +2. **`after-detach`** — After a partition has been successfully detached +3. **`before-drop`** — Before a detached partition is dropped (only when `cleanupPolicy: drop`) +4. **`after-drop`** — After a partition has been successfully dropped (only when `cleanupPolicy: drop`) + +``` +┌──────────────────────────────────────────────────────────────────────────────┐ +│ Partition Cleanup Flow │ +├──────────────┬──────────┬──────────────┬───────────┬────────────┬────────────┤ +│ before-detach│ DETACH │ after-detach │before-drop│ DROP │ after-drop | +│ hooks │ partition│ hooks │ hooks │ partition │ hooks | +└──────────────┴──────────┴──────────────┴───────────┴────────────┴────────────┘ +``` + +## Hook Types + +| Type | Description | Documentation | +|------|-------------|---------------| +| `shell` | Execute system commands, scripts, or external tools | [Shell Hook](shell.md) | +| `postgresql` | Execute SQL statements against the database | [PostgreSQL Hook](postgresql.md) | + +## Configuration + +### Scope + +Hooks can be defined at two levels: + +- **Global** — Applied to all partitions +- **Per-partition** — Overrides global hooks for that specific partition + +When hooks are defined at both levels, the partition-level hooks completely replace the global hooks for that partition. + +### Hook Entry Fields + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `name` | string | Yes | — | Hook identifier for logging | +| `type` | string | Yes | — | Runner type: `shell` or `postgresql` | +| `enabled` | bool | No | `true` | Set to `false` to skip without removing | +| `timeout` | duration | No | `300s` | Maximum execution time (e.g., `30s`, `5m`, `1h`) | +| `on_failure` | string | No | — | Override failure behavior: `abort` or `continue` | +| `retry` | object | No | — | Retry configuration (see below) | +| `config` | object | Yes | — | Type-specific configuration | + +### Type-Specific Config + +Each hook type has its own `config` section. See the dedicated pages for details: + +- [Shell Hook Configuration](shell.md#configuration) +- [PostgreSQL Hook Configuration](postgresql.md#configuration) + +## Template Variables + +Hook configuration fields support Go template syntax (`{{.VariableName}}`). Available variables: + +| Variable | Description | Example | +|----------|-------------|---------| +| `{{.Schema}}` | Partition schema | `public` | +| `{{.Table}}` | Partition table name (child) | `logs_2024_06_25` | +| `{{.ParentTable}}` | Parent table name | `logs` | +| `{{.PartitionName}}` | Partition identifier from config | `application_logs` | +| `{{.LowerBound}}` | Partition lower bound | `2024-06-25` | +| `{{.UpperBound}}` | Partition upper bound | `2024-06-26` | +| `{{.DatabaseName}}` | Database name from connection URL | `production` | +| `{{.Hostname}}` | Database hostname from connection URL | `db.example.com` | +| `{{.Retention}}` | Configured retention value | `30` | +| `{{.Interval}}` | Configured interval | `daily` | + +!!! warning + Referencing an undefined template variable causes a configuration error and aborts the cleanup for the affected partition. + +## Retry Configuration + +Hooks can be configured to retry on failure with configurable backoff. + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `retry.attempts` | int | `0` | Number of retry attempts (0 = no retry) | +| `retry.backoff` | string | `exponential` | Backoff strategy: `fixed` or `exponential` | +| `retry.initial_delay` | duration | `5s` | Initial delay between retries | +| `retry.max_delay` | duration | `60s` | Maximum delay (for exponential backoff) | + +**Exponential backoff** doubles the delay after each attempt: `initial_delay × 2^(N-1)`, capped at `max_delay`. + +**Fixed backoff** waits `initial_delay` between every attempt. + +## Failure Behavior + +### Default Behavior + +- **Before-hooks** (`before-detach`, `before-drop`): failure cancels the operation for the affected partition +- **After-hooks** (`after-detach`, `after-drop`): failure is logged; the operation is already complete +- **After-detach failure with `cleanupPolicy: drop`**: the drop operation is skipped + +In all cases, other partitions continue processing normally. + +### Override with `on_failure` + +| Value | Effect | +|-------|--------| +| `abort` | Stop the entire cleanup process immediately and exit with non-zero code | +| `continue` | Proceed with the operation even if the before-hook fails | + +!!! note + When any hook fails during a run (after all retries), PPM exits with a non-zero exit code — even if all partition operations succeeded. + +### Execution Order + +Hooks within a lifecycle event execute sequentially in the order defined. If a hook fails, remaining hooks in the same event are skipped. + +## Dry-Run Mode + +Use `--dry-run` to preview hook execution without side effects: + +```bash +postgresql-partition-manager run cleanup --dry-run +postgresql-partition-manager run all --dry-run +``` + +In dry-run mode: + +- Template variables are resolved and logged +- No hooks are executed (no shell commands, no SQL) +- No partitions are detached or dropped +- Configuration errors (invalid templates) are reported as they would in a normal run + +## Credential Propagation + +When `propagate-credentials: true` is set inside a shell hook's `config` section, that hook receives PostgreSQL connection details as environment variables: + +- `PGHOST` +- `PGPORT` +- `PGDATABASE` +- `PGUSER` +- `PGPASSWORD` + +This allows shell hooks to connect to the same database without duplicating credentials. + +!!! note + `propagate-credentials` is a `shell` hook config option. PostgreSQL hooks use the same connection parameters as PPM automatically. + +## Examples + +### Archive partition to S3 before drop + +```yaml +connection-url: postgres://app:secret@db.example.com:5432/production + +partitions: + application_logs: + schema: public + table: logs + partitionKey: created_at + interval: daily + retention: 30 + preProvisioned: 7 + cleanupPolicy: drop + hooks: + before-drop: + - name: "archive-to-s3" + type: shell + timeout: 10m + retry: + attempts: 3 + backoff: exponential + initial_delay: 10s + max_delay: 120s + config: + command: "/usr/local/bin/archive-partition" + args: + - "--schema" + - "{{.Schema}}" + - "--table" + - "{{.Table}}" + env: + S3_BUCKET: "my-archive-bucket" + S3_PREFIX: "{{.DatabaseName}}/{{.ParentTable}}/{{.Table}}" + propagate-credentials: true +``` + +### Run VACUUM ANALYZE after detach + +```yaml +hooks: + after-detach: + - name: "vacuum-after-detach" + type: postgresql + timeout: 5m + retry: + attempts: 2 + backoff: fixed + initial_delay: 5s + config: + sql_query: "VACUUM ANALYZE {{.Schema}}.{{.Table}}" +``` + +### Log partition operations to a file + +```yaml +hooks: + before-detach: + - name: "log-detach" + type: shell + timeout: 10s + on_failure: continue + config: + command: "/bin/sh" + args: ["-c", "echo '{{.Schema}}.{{.Table}} detaching at $(date)' >> /var/log/ppm-hooks.log"] +``` + +### Global hooks with per-partition override + +```yaml +connection-url: postgres://app:secret@db.example.com:5432/production + +# Global hooks applied to all partitions +hooks: + after-detach: + - name: "notify-detach" + type: shell + timeout: 15s + on_failure: continue + config: + command: "/usr/local/bin/notify" + args: ["Partition {{.Schema}}.{{.Table}} detached"] + +partitions: + events: + schema: public + table: events + partitionKey: created_at + interval: daily + retention: 90 + preProvisioned: 7 + cleanupPolicy: drop + # This partition uses global hooks (after-detach notification) + + sensitive_data: + schema: private + table: audit_logs + partitionKey: created_at + interval: monthly + retention: 12 + preProvisioned: 3 + cleanupPolicy: drop + # Override global hooks — archive before dropping + hooks: + before-drop: + - name: "archive-audit" + type: shell + timeout: 30m + config: + command: "/usr/local/bin/archive-audit" + args: ["--partition", "{{.Schema}}.{{.Table}}"] +``` + +### Multiple hooks in sequence + +```yaml +hooks: + before-drop: + - name: "verify-backup" + type: shell + timeout: 2m + config: + command: "/usr/local/bin/verify-backup" + args: ["--table", "{{.Schema}}.{{.Table}}"] + + - name: "export-stats" + type: postgresql + timeout: 30s + on_failure: continue + config: + sql_query: "INSERT INTO partition_stats (partition_name, row_count, dropped_at) SELECT '{{.Schema}}.{{.Table}}', count(*), now() FROM {{.Schema}}.{{.Table}}" +``` + +### Abort on critical hook failure + +```yaml +hooks: + before-drop: + - name: "critical-backup" + type: shell + timeout: 15m + on_failure: abort + retry: + attempts: 5 + backoff: exponential + initial_delay: 10s + max_delay: 120s + config: + command: "/usr/local/bin/backup-partition" + args: ["{{.Schema}}.{{.Table}}"] +``` + +If this hook fails after all 5 retries, the entire cleanup process stops immediately. diff --git a/docs/hooks/postgresql.md b/docs/hooks/postgresql.md new file mode 100644 index 0000000..92e828b --- /dev/null +++ b/docs/hooks/postgresql.md @@ -0,0 +1,123 @@ +# PostgreSQL Hook + +The `postgresql` hook type executes SQL statements against the database during the partition cleanup lifecycle. Use it for maintenance operations like `VACUUM`, `ANALYZE`, statistics collection, or custom cleanup queries. + +## Configuration + +```yaml +hooks: + after-detach: + - name: "vacuum-partition" + type: postgresql + timeout: 5m + config: + sql_query: "VACUUM ANALYZE {{.Schema}}.{{.ParentTable}}" +``` + +### Config Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `sql_query` | string | Yes | SQL statement to execute (supports template variables) | + +## Execution Behavior + +- The SQL statement is executed in a **separate connection** from the main PPM operations +- The hook uses the same connection parameters (host, port, database, credentials) as PPM +- The query is executed with the configured timeout via context cancellation +- If the SQL statement returns an error, the hook is treated as failed +- Hooks execute **outside of any active transaction** — no locks are held during execution + +!!! warning + Only single SQL statements are supported per hook entry. For multiple statements, define multiple hook entries in sequence. + +## Template Variables + +The `sql_query` field supports [template variables](index.md#template-variables). Variables are resolved before execution. + +```yaml +config: + sql_query: "VACUUM ANALYZE {{.Schema}}.{{.ParentTable}}" +``` + +All standard template variables are available: `Schema`, `Table`, `ParentTable`, `PartitionName`, `LowerBound`, `UpperBound`, `DatabaseName`, `Hostname`, `Retention`, `Interval`. + +## Examples + +### ANALYZE parent table after drop + +```yaml +hooks: + after-drop: + - name: "analyze-parent" + type: postgresql + timeout: 10m + on_failure: continue + config: + sql_query: "ANALYZE {{.Schema}}.{{.ParentTable}}" +``` + +### Record partition statistics before drop + +```yaml +hooks: + before-drop: + - name: "record-stats" + type: postgresql + timeout: 30s + on_failure: continue + config: + sql_query: >- + INSERT INTO partition_stats (partition_name, row_count, dropped_at) + SELECT '{{.Schema}}.{{.Table}}', count(*), now() + FROM {{.Schema}}.{{.Table}} +``` + +### Notify via pg_notify + +```yaml +hooks: + after-detach: + - name: "notify-detach" + type: postgresql + timeout: 10s + on_failure: continue + config: + sql_query: "SELECT pg_notify('partition_events', json_build_object('event', 'detached', 'partition', '{{.Schema}}.{{.Table}}', 'parent', '{{.ParentTable}}')::text)" +``` + +### Update a tracking table + +```yaml +hooks: + after-drop: + - name: "track-dropped" + type: postgresql + timeout: 15s + on_failure: continue + config: + sql_query: >- + INSERT INTO partition_lifecycle (schema_name, table_name, parent_table, event, occurred_at) + VALUES ('{{.Schema}}', '{{.Table}}', '{{.ParentTable}}', 'dropped', now()) +``` + +### Combine multiple PostgreSQL hooks + +```yaml +hooks: + after-detach: + - name: "vacuum-partition" + type: postgresql + timeout: 5m + config: + sql_query: "VACUUM ANALYZE {{.Schema}}.{{.Table}}" + + - name: "update-statistics" + type: postgresql + timeout: 30s + on_failure: continue + config: + sql_query: "ANALYZE {{.Schema}}.{{.ParentTable}}" +``` + +Hooks execute sequentially — `vacuum-partition` completes before `update-statistics` begins. diff --git a/docs/hooks/shell.md b/docs/hooks/shell.md new file mode 100644 index 0000000..9bda898 --- /dev/null +++ b/docs/hooks/shell.md @@ -0,0 +1,189 @@ +# Shell Hook + +The `shell` hook type executes system commands during the partition cleanup lifecycle. Use it to run scripts, send notifications, archive data, or invoke external tools. + +## Configuration + +```yaml +hooks: + before-detach: + - name: "my-shell-hook" + type: shell + timeout: 30s + config: + command: "/usr/local/bin/my-script" + args: ["--partition", "{{.Schema}}.{{.Table}}"] + env: + MY_VAR: "my-value" +``` + +### Config Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `command` | string | Yes | Executable path or command name | +| `args` | list | No | Command arguments (supports template variables) | +| `env` | map | No | Additional environment variables (supports template variables) | +| `propagate-credentials` | bool | No | Inject `PGHOST`, `PGPORT`, `PGDATABASE`, `PGUSER`, `PGPASSWORD` env vars (default `false`) | + +## Execution Behavior + +- The command is executed directly via `exec` (not through a shell interpreter) +- The parent process environment variables are inherited +- Additional `env` vars are merged on top of the inherited environment +- `stdout` and `stderr` are captured and logged at debug level +- A non-zero exit code is treated as a failure + +!!! tip + Since commands are executed directly (not via a shell), you cannot use shell features like pipes (`|`), redirection (`>`), or variable expansion (`$VAR`) in the `command` field. To use shell features, invoke a shell explicitly: + + ```yaml + config: + command: "/bin/sh" + args: ["-c", "echo 'hello' >> /tmp/output.log"] + ``` + +## Credential Propagation + +When `propagate-credentials: true` is set inside a shell hook's `config` section, the hook receives PostgreSQL connection details as environment variables extracted from the PPM connection URL: + +| Variable | Description | Example | +|----------|-------------|---------| +| `PGHOST` | Database hostname | `db.example.com` | +| `PGPORT` | Database port | `5432` | +| `PGDATABASE` | Database name | `production` | +| `PGUSER` | Database username | `app_user` | +| `PGPASSWORD` | Database password | `secret` | + +This allows shell hooks to connect to the same database as PPM without duplicating credentials in the configuration. + +```yaml +hooks: + before-drop: + - name: "dump-partition" + type: shell + timeout: 10m + config: + command: "pg_dump" + args: ["-t", "{{.Schema}}.{{.Table}}", "-f", "/backups/{{.Table}}.sql"] + propagate-credentials: true +``` + +!!! note + `propagate-credentials` is a `shell` hook config option. PostgreSQL hooks use the same connection parameters as PPM automatically. + +## Template Variables + +The `command`, `args`, and `env` fields all support [template variables](index.md#template-variables). Variables are resolved before execution. + +```yaml +config: + command: "/usr/local/bin/archive" + args: + - "--schema" + - "{{.Schema}}" + - "--table" + - "{{.Table}}" + - "--database" + - "{{.DatabaseName}}" + env: + S3_PREFIX: "{{.DatabaseName}}/{{.ParentTable}}/{{.Table}}" + PARTITION_BOUNDS: "{{.LowerBound}}_{{.UpperBound}}" +``` + +## Examples + +### Send a Slack notification + +```yaml +hooks: + after-drop: + - name: "notify-slack" + type: shell + timeout: 15s + on_failure: continue + config: + command: "/usr/local/bin/slack-notify" + args: + - "--channel" + - "#database-ops" + - "--message" + - "Partition {{.Schema}}.{{.Table}} dropped from {{.ParentTable}}" +``` + +### Archive partition to S3 before drop + +```yaml +hooks: + before-drop: + - name: "archive-to-s3" + type: shell + timeout: 10m + on_failure: abort + retry: + attempts: 3 + backoff: exponential + initial_delay: 10s + max_delay: 120s + config: + command: "/usr/local/bin/archive-partition" + args: + - "--schema" + - "{{.Schema}}" + - "--table" + - "{{.Table}}" + env: + S3_BUCKET: "my-archive-bucket" + S3_PREFIX: "{{.DatabaseName}}/{{.ParentTable}}/{{.Table}}" + propagate-credentials: true +``` + +### Log operations to a file + +```yaml +hooks: + before-detach: + - name: "log-detach" + type: shell + timeout: 10s + on_failure: continue + config: + command: "/bin/sh" + args: ["-c", "echo '{{.Schema}}.{{.Table}} detaching at $(date)' >> /var/log/ppm-hooks.log"] +``` + +### Run pg_dump with credential propagation + +```yaml +hooks: + before-drop: + - name: "backup-partition" + type: shell + timeout: 30m + retry: + attempts: 2 + backoff: fixed + initial_delay: 30s + config: + command: "pg_dump" + args: + - "--format=custom" + - "--table={{.Schema}}.{{.Table}}" + - "--file=/backups/{{.ParentTable}}/{{.Table}}.dump" + propagate-credentials: true +``` + +### Verify backup exists before drop + +```yaml +hooks: + before-drop: + - name: "verify-backup" + type: shell + timeout: 2m + config: + command: "/usr/local/bin/verify-backup" + args: ["--table", "{{.Schema}}.{{.Table}}", "--bucket", "my-archive-bucket"] +``` + +If this hook fails, the drop operation is cancelled for the partition. diff --git a/docs/index.md b/docs/index.md index b06301f..129c86c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,6 +11,7 @@ The key objective of PPM is to simplify the use of PostgreSQL partitions for dev - **Automatic provisioning** — Create upcoming partitions ahead of time - **Cleanup management** — Delete or detach outdated partitions - **Configuration checking** — Verify partitions match expected configuration +- **Lifecycle hooks** — Execute custom actions (shell commands, SQL) before/after partition operations - **Multiple partition intervals** — Support for daily, weekly, monthly, quarterly, and yearly partitioning - **Flexible partition keys** — Support for `date`, `timestamp`, `timestamptz`, and `uuid` column types diff --git a/internal/infra/config/config.go b/internal/infra/config/config.go index de92acc..4d0c4fd 100644 --- a/internal/infra/config/config.go +++ b/internal/infra/config/config.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/go-playground/validator/v10" + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" "github.com/qonto/postgresql-partition-manager/internal/infra/partition" ) @@ -15,6 +16,7 @@ type Config struct { ConnectionURL string `mapstructure:"connection-url"` StatementTimeout int `mapstructure:"statement-timeout" validate:"required"` LockTimeout int `mapstructure:"lock-timeout" validate:"required"` + Hooks *hook.HooksConfig `mapstructure:"hooks"` Partitions map[string]partition.Configuration `mapstructure:"partitions" validate:"required,dive,keys,endkeys,required"` } @@ -28,6 +30,22 @@ func (c *Config) Check() error { return fmt.Errorf("configuration validation failed: %w", err) } + // Validate global hooks if present + if c.Hooks != nil { + if err := c.Hooks.Validate(); err != nil { + return fmt.Errorf("global hooks validation failed: %w", err) + } + } + + // Validate partition-level hooks if present + for name, p := range c.Partitions { + if p.Hooks != nil { + if err := p.Hooks.Validate(); err != nil { + return fmt.Errorf("partition '%s' hooks validation failed: %w", name, err) + } + } + } + return nil } diff --git a/internal/infra/hook/config.go b/internal/infra/hook/config.go new file mode 100644 index 0000000..9209059 --- /dev/null +++ b/internal/infra/hook/config.go @@ -0,0 +1,207 @@ +// Package hook provides hook configuration types and lifecycle management for partition cleanup operations. +package hook + +import ( + "errors" + "fmt" + "time" +) + +// LifecycleEvent represents when a hook executes relative to an operation. +type LifecycleEvent string + +const ( + BeforeDetach LifecycleEvent = "before-detach" + AfterDetach LifecycleEvent = "after-detach" + BeforeDrop LifecycleEvent = "before-drop" + AfterDrop LifecycleEvent = "after-drop" +) + +// HookType identifies the runner implementation. +type HookType string + +const ( + ShellType HookType = "shell" + PostgreSQLType HookType = "postgresql" +) + +// OnFailure defines behavior when a hook fails. +type OnFailure string + +const ( + OnFailureAbort OnFailure = "abort" + OnFailureContinue OnFailure = "continue" +) + +// BackoffStrategy defines the retry delay strategy. +type BackoffStrategy string + +const ( + BackoffFixed BackoffStrategy = "fixed" + BackoffExponential BackoffStrategy = "exponential" +) + +// Default values for hook configuration. +const ( + DefaultInitialDelay = 5 * time.Second + DefaultMaxDelay = 60 * time.Second +) + +// Validation errors for hook configuration. +var ( + ErrNameRequired = errors.New("hook name is required") + ErrTypeRequired = errors.New("type is required") + ErrTimeoutRequired = errors.New("timeout is required") + ErrInvalidType = errors.New("invalid type, must be one of [shell, postgresql]") + ErrInvalidOnFailure = errors.New("invalid on_failure, must be one of [abort, continue]") + ErrInvalidBackoff = errors.New("invalid retry backoff, must be one of [fixed, exponential]") + ErrShellConfigRequired = errors.New("config section is required for shell hooks") + ErrShellCommandRequired = errors.New("'command' is required in config for shell hooks") + ErrPostgreSQLConfigRequired = errors.New("config section is required for postgresql hooks") + ErrPostgreSQLQueryRequired = errors.New("'sql_query' is required in config for postgresql hooks") +) + +// RetryConfig defines retry behavior for a hook. +type RetryConfig struct { + Attempts int `mapstructure:"attempts"` + Backoff BackoffStrategy `mapstructure:"backoff"` + InitialDelay time.Duration `mapstructure:"initial_delay"` + MaxDelay time.Duration `mapstructure:"max_delay"` +} + +// HookEntry represents a single hook definition in configuration. +type HookEntry struct { + Name string `mapstructure:"name"` + Type HookType `mapstructure:"type"` + Enabled *bool `mapstructure:"enabled"` + Timeout time.Duration `mapstructure:"timeout"` + OnFailure OnFailure `mapstructure:"on_failure"` + Retry RetryConfig `mapstructure:"retry"` + Config map[string]interface{} `mapstructure:"config"` +} + +// ShellConfig holds shell hook-specific configuration. +type ShellConfig struct { + Command string `mapstructure:"command"` + Args []string `mapstructure:"args"` + Env map[string]string `mapstructure:"env"` + PropagateCredentials bool `mapstructure:"propagate-credentials"` +} + +// PostgreSQLConfig holds postgresql hook-specific configuration. +type PostgreSQLConfig struct { + SQLQuery string `mapstructure:"sql_query"` +} + +// HooksConfig groups hooks by lifecycle event. +type HooksConfig struct { + BeforeDetach []HookEntry `mapstructure:"before-detach"` + AfterDetach []HookEntry `mapstructure:"after-detach"` + BeforeDrop []HookEntry `mapstructure:"before-drop"` + AfterDrop []HookEntry `mapstructure:"after-drop"` +} + +// ApplyDefaults sets default values on a HookEntry for any unset optional fields. +func (h *HookEntry) ApplyDefaults() { + if h.Enabled == nil { + enabled := true + h.Enabled = &enabled + } + + if h.Retry.Backoff == "" { + h.Retry.Backoff = BackoffExponential + } + + if h.Retry.InitialDelay == 0 { + h.Retry.InitialDelay = DefaultInitialDelay + } + + if h.Retry.MaxDelay == 0 { + h.Retry.MaxDelay = DefaultMaxDelay + } +} + +// Validate checks that a HookEntry has all required fields and valid values. +func (h *HookEntry) Validate() error { + if h.Name == "" { + return ErrNameRequired + } + + if h.Type == "" { + return fmt.Errorf("hook '%s': %w", h.Name, ErrTypeRequired) + } + + if h.Timeout == 0 { + return fmt.Errorf("hook '%s': %w", h.Name, ErrTimeoutRequired) + } + + if _, ok := registry[h.Type]; !ok { + return fmt.Errorf("hook '%s': %w", h.Name, ErrInvalidType) + } + + if h.OnFailure != "" && h.OnFailure != OnFailureAbort && h.OnFailure != OnFailureContinue { + return fmt.Errorf("hook '%s': %w", h.Name, ErrInvalidOnFailure) + } + + if h.Retry.Backoff != "" && h.Retry.Backoff != BackoffFixed && h.Retry.Backoff != BackoffExponential { + return fmt.Errorf("hook '%s': %w", h.Name, ErrInvalidBackoff) + } + + if err := h.validateTypeConfig(); err != nil { + return fmt.Errorf("hook '%s': %w", h.Name, err) + } + + return nil +} + +func (h *HookEntry) validateTypeConfig() error { + handler, ok := registry[h.Type] + if !ok { + return nil // unknown types are rejected earlier by the Type validity check + } + + return handler.validate(h.Config) +} + +// hooksFor returns the hook entries configured for the given lifecycle event. +func (c *HooksConfig) hooksFor(event LifecycleEvent) []HookEntry { + switch event { + case BeforeDetach: + return c.BeforeDetach + case AfterDetach: + return c.AfterDetach + case BeforeDrop: + return c.BeforeDrop + case AfterDrop: + return c.AfterDrop + default: + return nil + } +} + +// allEvents lists the lifecycle events in execution order, for iterating over every hook slice. +var allEvents = []LifecycleEvent{BeforeDetach, AfterDetach, BeforeDrop, AfterDrop} + +// ApplyDefaults sets default values on all hook entries in the configuration. +func (c *HooksConfig) ApplyDefaults() { + for _, event := range allEvents { + hooks := c.hooksFor(event) + for i := range hooks { + hooks[i].ApplyDefaults() + } + } +} + +// Validate checks all hook entries across all lifecycle events. +func (c *HooksConfig) Validate() error { + for _, event := range allEvents { + hooks := c.hooksFor(event) + for i := range hooks { + if err := hooks[i].Validate(); err != nil { + return fmt.Errorf("hooks.%s[%d]: %w", string(event), i, err) + } + } + } + + return nil +} diff --git a/internal/infra/hook/config_property_test.go b/internal/infra/hook/config_property_test.go new file mode 100644 index 0000000..4707331 --- /dev/null +++ b/internal/infra/hook/config_property_test.go @@ -0,0 +1,217 @@ +// Feature: partition-hooks, Property 2: Default Values Application +package hook + +import ( + "testing" + "time" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 2.6, 2.9, 2.15, 2.16, 2.17, 2.18, 8.2** +// +// Property 2: Default Values Application +// For any hook configuration with missing optional fields, the resolved hook SHALL have: +// enabled = true, timeout = 300s, retry.attempts = 0, retry.backoff = "exponential", +// retry.initial_delay = 5s, retry.max_delay = 60s, and propagate-credentials = false. + +// genHookEntryWithMissingDefaults generates a HookEntry with required fields set +// but optional fields left at their zero values (simulating missing configuration). +func genHookEntryWithMissingDefaults(t *rapid.T) HookEntry { + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, "hookType") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, "name") + + var config map[string]interface{} + + switch hookType { + case ShellType: + config = map[string]interface{}{ + "command": rapid.StringMatching(`/[a-z/]{1,30}`).Draw(t, "command"), + } + case PostgreSQLType: + config = map[string]interface{}{ + "sql_query": rapid.StringMatching(`SELECT [a-z]{1,10}`).Draw(t, "sql_query"), + } + } + + return HookEntry{ + Name: name, + Type: hookType, + Enabled: nil, // Missing - should default to true + Timeout: 0, // Missing - should default to 300s + Retry: RetryConfig{ + Attempts: 0, // Default: 0 attempts + Backoff: "", // Missing - should default to "exponential" + InitialDelay: 0, // Missing - should default to 5s + MaxDelay: 0, // Missing - should default to 60s + }, + Config: config, + } +} + +func TestProperty_DefaultValuesApplication_HookEntry(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + hook := genHookEntryWithMissingDefaults(t) + + // Apply defaults + hook.ApplyDefaults() + + // Verify: enabled defaults to true (Requirement 2.6) + if hook.Enabled == nil { + t.Fatal("Enabled should not be nil after ApplyDefaults") + } + + if *hook.Enabled != true { + t.Fatalf("Enabled should default to true, got %v", *hook.Enabled) + } + + // Verify: timeout is not modified by ApplyDefaults (it is a required field) + if hook.Timeout != 0 { + t.Fatalf("Timeout should remain unchanged (0) after ApplyDefaults, got %v", hook.Timeout) + } + + // Verify: retry.attempts defaults to 0 (Requirement 2.15) + if hook.Retry.Attempts != 0 { + t.Fatalf("Retry.Attempts should default to 0, got %d", hook.Retry.Attempts) + } + + // Verify: retry.backoff defaults to "exponential" (Requirement 2.16) + if hook.Retry.Backoff != BackoffExponential { + t.Fatalf("Retry.Backoff should default to 'exponential', got %q", hook.Retry.Backoff) + } + + // Verify: retry.initial_delay defaults to 5s (Requirement 2.17) + if hook.Retry.InitialDelay != 5*time.Second { + t.Fatalf("Retry.InitialDelay should default to 5s, got %v", hook.Retry.InitialDelay) + } + + // Verify: retry.max_delay defaults to 60s (Requirement 2.18) + if hook.Retry.MaxDelay != 60*time.Second { + t.Fatalf("Retry.MaxDelay should default to 60s, got %v", hook.Retry.MaxDelay) + } + }) +} + +func TestProperty_DefaultValuesApplication_HooksConfig(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a HooksConfig with hooks in various lifecycle events, + // all with missing optional fields + numBeforeDetach := rapid.IntRange(0, 3).Draw(t, "numBeforeDetach") + numAfterDetach := rapid.IntRange(0, 3).Draw(t, "numAfterDetach") + numBeforeDrop := rapid.IntRange(0, 3).Draw(t, "numBeforeDrop") + numAfterDrop := rapid.IntRange(0, 3).Draw(t, "numAfterDrop") + + config := HooksConfig{} + + for i := 0; i < numBeforeDetach; i++ { + config.BeforeDetach = append(config.BeforeDetach, genHookEntryWithMissingDefaults(t)) + } + + for i := 0; i < numAfterDetach; i++ { + config.AfterDetach = append(config.AfterDetach, genHookEntryWithMissingDefaults(t)) + } + + for i := 0; i < numBeforeDrop; i++ { + config.BeforeDrop = append(config.BeforeDrop, genHookEntryWithMissingDefaults(t)) + } + + for i := 0; i < numAfterDrop; i++ { + config.AfterDrop = append(config.AfterDrop, genHookEntryWithMissingDefaults(t)) + } + + // Apply defaults at the config level + config.ApplyDefaults() + + // Verify all hooks across all lifecycle events have correct defaults + allHooks := [][]HookEntry{ + config.BeforeDetach, + config.AfterDetach, + config.BeforeDrop, + config.AfterDrop, + } + + for _, hooks := range allHooks { + for _, hook := range hooks { + if hook.Enabled == nil || *hook.Enabled != true { + t.Fatalf("Hook %q: Enabled should default to true", hook.Name) + } + + if hook.Retry.Backoff != BackoffExponential { + t.Fatalf("Hook %q: Retry.Backoff should default to 'exponential', got %q", hook.Name, hook.Retry.Backoff) + } + + if hook.Retry.InitialDelay != 5*time.Second { + t.Fatalf("Hook %q: Retry.InitialDelay should default to 5s, got %v", hook.Name, hook.Retry.InitialDelay) + } + + if hook.Retry.MaxDelay != 60*time.Second { + t.Fatalf("Hook %q: Retry.MaxDelay should default to 60s, got %v", hook.Name, hook.Retry.MaxDelay) + } + } + } + }) +} + +func TestProperty_DefaultValuesApplication_PreserveExplicitValues(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a hook with SOME explicit values set - verify defaults don't override them + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, "hookType") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, "name") + + var config map[string]interface{} + + switch hookType { + case ShellType: + config = map[string]interface{}{ + "command": "/usr/bin/test", + } + case PostgreSQLType: + config = map[string]interface{}{ + "sql_query": "SELECT 1", + } + } + + // Explicitly set some values + explicitEnabled := rapid.Bool().Draw(t, "explicitEnabled") + explicitTimeout := time.Duration(rapid.IntRange(1, 3600).Draw(t, "timeoutSec")) * time.Second + explicitBackoff := rapid.SampledFrom([]BackoffStrategy{BackoffFixed, BackoffExponential}).Draw(t, "backoff") + explicitInitialDelay := time.Duration(rapid.IntRange(1, 120).Draw(t, "initialDelaySec")) * time.Second + explicitMaxDelay := time.Duration(rapid.IntRange(1, 300).Draw(t, "maxDelaySec")) * time.Second + + hook := HookEntry{ + Name: name, + Type: hookType, + Enabled: &explicitEnabled, + Timeout: explicitTimeout, + Retry: RetryConfig{ + Backoff: explicitBackoff, + InitialDelay: explicitInitialDelay, + MaxDelay: explicitMaxDelay, + }, + Config: config, + } + + hook.ApplyDefaults() + + // Verify explicit values are preserved + if *hook.Enabled != explicitEnabled { + t.Fatalf("Enabled should preserve explicit value %v, got %v", explicitEnabled, *hook.Enabled) + } + + if hook.Timeout != explicitTimeout { + t.Fatalf("Timeout should preserve explicit value %v, got %v", explicitTimeout, hook.Timeout) + } + + if hook.Retry.Backoff != explicitBackoff { + t.Fatalf("Retry.Backoff should preserve explicit value %q, got %q", explicitBackoff, hook.Retry.Backoff) + } + + if hook.Retry.InitialDelay != explicitInitialDelay { + t.Fatalf("Retry.InitialDelay should preserve explicit value %v, got %v", explicitInitialDelay, hook.Retry.InitialDelay) + } + + if hook.Retry.MaxDelay != explicitMaxDelay { + t.Fatalf("Retry.MaxDelay should preserve explicit value %v, got %v", explicitMaxDelay, hook.Retry.MaxDelay) + } + }) +} diff --git a/internal/infra/hook/config_test.go b/internal/infra/hook/config_test.go new file mode 100644 index 0000000..7a2ff3b --- /dev/null +++ b/internal/infra/hook/config_test.go @@ -0,0 +1,418 @@ +package hook + +import ( + "errors" + "testing" + "time" +) + +// **Validates: Requirements 2.3, 2.4, 2.5, 2.15** + +func TestValidate_MissingName(t *testing.T) { + hook := HookEntry{ + Name: "", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing name, got nil") + } + + if !errors.Is(err, ErrNameRequired) { + t.Fatalf("expected ErrNameRequired, got: %v", err) + } +} + +func TestValidate_MissingType(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: "", + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing type, got nil") + } + + if !errors.Is(err, ErrTypeRequired) { + t.Fatalf("expected ErrTypeRequired, got: %v", err) + } +} + +func TestValidate_MissingTimeout(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing timeout, got nil") + } + + if !errors.Is(err, ErrTimeoutRequired) { + t.Fatalf("expected ErrTimeoutRequired, got: %v", err) + } +} + +func TestValidate_InvalidType(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: "invalid-type", + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for invalid type, got nil") + } + + if !errors.Is(err, ErrInvalidType) { + t.Fatalf("expected ErrInvalidType, got: %v", err) + } +} + +func TestValidate_InvalidOnFailure(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + OnFailure: "invalid-policy", + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for invalid on_failure, got nil") + } + + if !errors.Is(err, ErrInvalidOnFailure) { + t.Fatalf("expected ErrInvalidOnFailure, got: %v", err) + } +} + +func TestValidate_InvalidBackoff(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Retry: RetryConfig{ + Backoff: "invalid-backoff", + }, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for invalid backoff, got nil") + } + + if !errors.Is(err, ErrInvalidBackoff) { + t.Fatalf("expected ErrInvalidBackoff, got: %v", err) + } +} + +func TestValidate_ValidOnFailureAbort(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + OnFailure: OnFailureAbort, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for on_failure=abort, got: %v", err) + } +} + +func TestValidate_ValidOnFailureContinue(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + OnFailure: OnFailureContinue, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for on_failure=continue, got: %v", err) + } +} + +func TestValidate_ValidBackoffFixed(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Retry: RetryConfig{ + Backoff: BackoffFixed, + }, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for backoff=fixed, got: %v", err) + } +} + +func TestValidate_ValidBackoffExponential(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Retry: RetryConfig{ + Backoff: BackoffExponential, + }, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for backoff=exponential, got: %v", err) + } +} + +// Shell config validation tests + +func TestValidate_Shell_MissingConfig(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Config: nil, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing shell config, got nil") + } + + if !errors.Is(err, ErrShellConfigRequired) { + t.Fatalf("expected ErrShellConfigRequired, got: %v", err) + } +} + +func TestValidate_Shell_MissingCommand(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "args": []string{"--verbose"}, + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing command in shell config, got nil") + } + + if !errors.Is(err, ErrShellCommandRequired) { + t.Fatalf("expected ErrShellCommandRequired, got: %v", err) + } +} + +func TestValidate_Shell_ValidWithArgs(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/local/bin/notify", + "args": []string{"--partition", "{{.Table}}"}, + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for valid shell config with args, got: %v", err) + } +} + +func TestValidate_Shell_ValidWithEnv(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/local/bin/archive", + "env": map[string]string{ + "BUCKET": "my-bucket", + "PREFIX": "data/{{.Table}}", + }, + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for valid shell config with env, got: %v", err) + } +} + +func TestValidate_Shell_ValidWithArgsAndEnv(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: ShellType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/local/bin/archive", + "args": []string{"--schema", "{{.Schema}}", "--table", "{{.Table}}"}, + "env": map[string]string{ + "BUCKET": "my-archive-bucket", + }, + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for valid shell config with args and env, got: %v", err) + } +} + +// PostgreSQL config validation tests + +func TestValidate_PostgreSQL_MissingConfig(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: PostgreSQLType, + Timeout: 30 * time.Second, + Config: nil, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing postgresql config, got nil") + } + + if !errors.Is(err, ErrPostgreSQLConfigRequired) { + t.Fatalf("expected ErrPostgreSQLConfigRequired, got: %v", err) + } +} + +func TestValidate_PostgreSQL_MissingSQLQuery(t *testing.T) { + hook := HookEntry{ + Name: "my-hook", + Type: PostgreSQLType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "other_field": "value", + }, + } + + err := hook.Validate() + if err == nil { + t.Fatal("expected error for missing sql_query in postgresql config, got nil") + } + + if !errors.Is(err, ErrPostgreSQLQueryRequired) { + t.Fatalf("expected ErrPostgreSQLQueryRequired, got: %v", err) + } +} + +func TestValidate_PostgreSQL_ValidConfig(t *testing.T) { + hook := HookEntry{ + Name: "vacuum-hook", + Type: PostgreSQLType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}", + }, + } + + err := hook.Validate() + if err != nil { + t.Fatalf("expected no error for valid postgresql config, got: %v", err) + } +} + +// HooksConfig.Validate tests + +func TestHooksConfig_Validate_AllValid(t *testing.T) { + config := HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "notify", + Type: ShellType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "command": "/usr/bin/notify", + }, + }, + }, + AfterDetach: []HookEntry{ + { + Name: "vacuum", + Type: PostgreSQLType, + Timeout: 30 * time.Second, + Config: map[string]any{ + "sql_query": "VACUUM ANALYZE public.events", + }, + }, + }, + } + + err := config.Validate() + if err != nil { + t.Fatalf("expected no error for valid HooksConfig, got: %v", err) + } +} + +func TestHooksConfig_Validate_InvalidEntryInBeforeDrop(t *testing.T) { + config := HooksConfig{ + BeforeDrop: []HookEntry{ + { + Name: "", + Type: ShellType, + Config: map[string]any{"command": "/bin/true"}, + }, + }, + } + + err := config.Validate() + if err == nil { + t.Fatal("expected error for invalid entry in HooksConfig, got nil") + } + + if !errors.Is(err, ErrNameRequired) { + t.Fatalf("expected ErrNameRequired, got: %v", err) + } +} + +func TestHooksConfig_Validate_EmptyConfig(t *testing.T) { + config := HooksConfig{} + + err := config.Validate() + if err != nil { + t.Fatalf("expected no error for empty HooksConfig, got: %v", err) + } +} diff --git a/internal/infra/hook/credentials.go b/internal/infra/hook/credentials.go new file mode 100644 index 0000000..e44e3b7 --- /dev/null +++ b/internal/infra/hook/credentials.go @@ -0,0 +1,58 @@ +package hook + +import ( + "errors" + "fmt" + "net/url" +) + +const defaultPostgreSQLPort = "5432" + +// ErrInvalidConnectionURL is returned when the connection URL cannot be parsed. +var ErrInvalidConnectionURL = errors.New("invalid PostgreSQL connection URL") + +// ExtractCredentials parses a PostgreSQL connection URL and returns a map of +// environment variables (PGHOST, PGPORT, PGDATABASE, PGUSER, PGPASSWORD). +// It handles standard PostgreSQL connection URL format: +// postgresql://user:password@host:port/dbname +func ExtractCredentials(connectionURL string) (map[string]string, error) { + parsed, err := url.Parse(connectionURL) + if err != nil { + return nil, fmt.Errorf("%w: %s", ErrInvalidConnectionURL, err) + } + + if parsed.Scheme != "postgresql" && parsed.Scheme != "postgres" { + return nil, fmt.Errorf("%w: unsupported scheme %q, expected \"postgresql\" or \"postgres\"", ErrInvalidConnectionURL, parsed.Scheme) + } + + host := parsed.Hostname() + port := parsed.Port() + + if port == "" { + port = defaultPostgreSQLPort + } + + // Database name is the path without the leading slash + database := "" + if len(parsed.Path) > 1 { + database = parsed.Path[1:] + } + + user := "" + password := "" + + if parsed.User != nil { + user = parsed.User.Username() + password, _ = parsed.User.Password() + } + + credentials := map[string]string{ + "PGHOST": host, + "PGPORT": port, + "PGDATABASE": database, + "PGUSER": user, + "PGPASSWORD": password, + } + + return credentials, nil +} diff --git a/internal/infra/hook/credentials_property_test.go b/internal/infra/hook/credentials_property_test.go new file mode 100644 index 0000000..9c98c7d --- /dev/null +++ b/internal/infra/hook/credentials_property_test.go @@ -0,0 +1,214 @@ +// Feature: partition-hooks, Property 17: Credential Propagation +package hook + +import ( + "fmt" + "net/url" + "testing" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 8.3, 8.4** +// +// Property 17: Credential Propagation +// For any valid PostgreSQL connection URL and propagate-credentials = true, +// shell hook execution SHALL receive environment variables PGHOST, PGPORT, +// PGDATABASE, PGUSER, and PGPASSWORD with values correctly extracted from +// the connection URL. When propagate-credentials = false, these variables +// SHALL NOT be injected. + +// genAlphanumeric generates a non-empty alphanumeric string suitable for URL components. +func genAlphanumeric(t *rapid.T, label string) string { + return rapid.StringMatching(`[a-z][a-z0-9]{1,15}`).Draw(t, label) +} + +// genHost generates a valid hostname (simple alphanumeric with dots). +func genHost(t *rapid.T) string { + parts := rapid.IntRange(1, 3).Draw(t, "hostParts") + host := genAlphanumeric(t, "hostPart0") + + for i := 1; i < parts; i++ { + host += "." + genAlphanumeric(t, fmt.Sprintf("hostPart%d", i)) + } + + return host +} + +// genPort generates a valid port number as a string. +func genPort(t *rapid.T) string { + port := rapid.IntRange(1, 65535).Draw(t, "port") + return fmt.Sprintf("%d", port) +} + +// genConnectionURL builds a valid PostgreSQL connection URL from components. +func genConnectionURL(t *rapid.T) (connURL string, host string, port string, database string, user string, password string) { + scheme := rapid.SampledFrom([]string{"postgresql", "postgres"}).Draw(t, "scheme") + host = genHost(t) + port = genPort(t) + database = genAlphanumeric(t, "database") + user = genAlphanumeric(t, "user") + password = genAlphanumeric(t, "password") + + connURL = fmt.Sprintf("%s://%s:%s@%s:%s/%s", scheme, user, password, host, port, database) + + return connURL, host, port, database, user, password +} + +func TestProperty_CredentialPropagation_ExtractAll(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + connURL, expectedHost, expectedPort, expectedDB, expectedUser, expectedPassword := genConnectionURL(t) + + creds, err := ExtractCredentials(connURL) + if err != nil { + t.Fatalf("ExtractCredentials(%q) returned error: %v", connURL, err) + } + + // Verify all 5 environment variables are present + requiredKeys := []string{"PGHOST", "PGPORT", "PGDATABASE", "PGUSER", "PGPASSWORD"} + for _, key := range requiredKeys { + if _, ok := creds[key]; !ok { + t.Fatalf("Missing required key %q in credentials map", key) + } + } + + // Verify extracted values match the components used to construct the URL + if creds["PGHOST"] != expectedHost { + t.Fatalf("PGHOST: expected %q, got %q", expectedHost, creds["PGHOST"]) + } + + if creds["PGPORT"] != expectedPort { + t.Fatalf("PGPORT: expected %q, got %q", expectedPort, creds["PGPORT"]) + } + + if creds["PGDATABASE"] != expectedDB { + t.Fatalf("PGDATABASE: expected %q, got %q", expectedDB, creds["PGDATABASE"]) + } + + if creds["PGUSER"] != expectedUser { + t.Fatalf("PGUSER: expected %q, got %q", expectedUser, creds["PGUSER"]) + } + + if creds["PGPASSWORD"] != expectedPassword { + t.Fatalf("PGPASSWORD: expected %q, got %q", expectedPassword, creds["PGPASSWORD"]) + } + }) +} + +func TestProperty_CredentialPropagation_DefaultPort(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a URL without a port - should default to 5432 + scheme := rapid.SampledFrom([]string{"postgresql", "postgres"}).Draw(t, "scheme") + host := genHost(t) + database := genAlphanumeric(t, "database") + user := genAlphanumeric(t, "user") + password := genAlphanumeric(t, "password") + + connURL := fmt.Sprintf("%s://%s:%s@%s/%s", scheme, user, password, host, database) + + creds, err := ExtractCredentials(connURL) + if err != nil { + t.Fatalf("ExtractCredentials(%q) returned error: %v", connURL, err) + } + + // When port is omitted, PGPORT should default to 5432 + if creds["PGPORT"] != defaultPostgreSQLPort { + t.Fatalf("PGPORT: expected default %q when port omitted, got %q", defaultPostgreSQLPort, creds["PGPORT"]) + } + + // Other fields should still be correctly extracted + if creds["PGHOST"] != host { + t.Fatalf("PGHOST: expected %q, got %q", host, creds["PGHOST"]) + } + + if creds["PGDATABASE"] != database { + t.Fatalf("PGDATABASE: expected %q, got %q", database, creds["PGDATABASE"]) + } + + if creds["PGUSER"] != user { + t.Fatalf("PGUSER: expected %q, got %q", user, creds["PGUSER"]) + } + + if creds["PGPASSWORD"] != password { + t.Fatalf("PGPASSWORD: expected %q, got %q", password, creds["PGPASSWORD"]) + } + }) +} + +func TestProperty_CredentialPropagation_SpecialCharsInPassword(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a URL with special characters in the password (URL-encoded) + scheme := rapid.SampledFrom([]string{"postgresql", "postgres"}).Draw(t, "scheme") + host := genHost(t) + port := genPort(t) + database := genAlphanumeric(t, "database") + user := genAlphanumeric(t, "user") + + // Generate a password with special characters + base := genAlphanumeric(t, "passBase") + special := rapid.SampledFrom([]string{"@", "#", "$", "%", "&", "!", "=", "+"}).Draw(t, "special") + rawPassword := base + special + genAlphanumeric(t, "passSuffix") + + // URL-encode the password for the connection URL + encodedPassword := url.QueryEscape(rawPassword) + connURL := fmt.Sprintf("%s://%s:%s@%s:%s/%s", scheme, user, encodedPassword, host, port, database) + + creds, err := ExtractCredentials(connURL) + if err != nil { + t.Fatalf("ExtractCredentials(%q) returned error: %v", connURL, err) + } + + // The extracted password should be the decoded (raw) password + if creds["PGPASSWORD"] != rawPassword { + t.Fatalf("PGPASSWORD: expected decoded %q, got %q", rawPassword, creds["PGPASSWORD"]) + } + + if creds["PGHOST"] != host { + t.Fatalf("PGHOST: expected %q, got %q", host, creds["PGHOST"]) + } + + if creds["PGPORT"] != port { + t.Fatalf("PGPORT: expected %q, got %q", port, creds["PGPORT"]) + } + + if creds["PGDATABASE"] != database { + t.Fatalf("PGDATABASE: expected %q, got %q", database, creds["PGDATABASE"]) + } + + if creds["PGUSER"] != user { + t.Fatalf("PGUSER: expected %q, got %q", user, creds["PGUSER"]) + } + }) +} + +func TestProperty_CredentialPropagation_NoPropagationWhenDisabled(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // When propagate-credentials is false, the shell hook should not propagate credentials. + // This tests that the ShellConfig.PropagateCredentials field correctly controls behavior. + config := ShellConfig{ + PropagateCredentials: false, + } + + // Verify the config has propagation disabled + if config.PropagateCredentials != false { + t.Fatal("PropagateCredentials should be false") + } + + // Generate a valid connection URL + connURL, _, _, _, _, _ := genConnectionURL(t) + + // Even though we can extract credentials from the URL, + // when PropagateCredentials is false, the system SHALL NOT inject them. + // We verify the flag is correctly set and that ExtractCredentials still works + // (the decision to inject or not is made at the shell runner based on this flag). + creds, err := ExtractCredentials(connURL) + if err != nil { + t.Fatalf("ExtractCredentials should still parse valid URLs, got error: %v", err) + } + + // Credentials can be extracted but should NOT be injected when PropagateCredentials is false + if len(creds) != 5 { + t.Fatalf("Expected 5 credential keys, got %d", len(creds)) + } + }) +} diff --git a/internal/infra/hook/credentials_test.go b/internal/infra/hook/credentials_test.go new file mode 100644 index 0000000..c31836b --- /dev/null +++ b/internal/infra/hook/credentials_test.go @@ -0,0 +1,130 @@ +package hook + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExtractCredentials(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + url string + expected map[string]string + expectError bool + }{ + { + name: "standard URL with all components", + url: "postgresql://myuser:mypassword@localhost:5432/mydb", + expected: map[string]string{ + "PGHOST": "localhost", + "PGPORT": "5432", + "PGDATABASE": "mydb", + "PGUSER": "myuser", + "PGPASSWORD": "mypassword", + }, + }, + { + name: "URL with custom port", + url: "postgresql://admin:secret@db.example.com:5433/production", + expected: map[string]string{ + "PGHOST": "db.example.com", + "PGPORT": "5433", + "PGDATABASE": "production", + "PGUSER": "admin", + "PGPASSWORD": "secret", + }, + }, + { + name: "URL without port defaults to 5432", + url: "postgresql://user:pass@host.example.com/testdb", + expected: map[string]string{ + "PGHOST": "host.example.com", + "PGPORT": "5432", + "PGDATABASE": "testdb", + "PGUSER": "user", + "PGPASSWORD": "pass", + }, + }, + { + name: "URL without password", + url: "postgresql://user@localhost:5432/mydb", + expected: map[string]string{ + "PGHOST": "localhost", + "PGPORT": "5432", + "PGDATABASE": "mydb", + "PGUSER": "user", + "PGPASSWORD": "", + }, + }, + { + name: "URL with special characters in password (URL-encoded)", + url: "postgresql://user:p%40ss%23word@localhost:5432/mydb", + expected: map[string]string{ + "PGHOST": "localhost", + "PGPORT": "5432", + "PGDATABASE": "mydb", + "PGUSER": "user", + "PGPASSWORD": "p@ss#word", + }, + }, + { + name: "URL with special characters in username (URL-encoded)", + url: "postgresql://user%40domain:password@localhost:5432/mydb", + expected: map[string]string{ + "PGHOST": "localhost", + "PGPORT": "5432", + "PGDATABASE": "mydb", + "PGUSER": "user@domain", + "PGPASSWORD": "password", + }, + }, + { + name: "postgres scheme (short form)", + url: "postgres://user:pass@localhost:5432/mydb", + expected: map[string]string{ + "PGHOST": "localhost", + "PGPORT": "5432", + "PGDATABASE": "mydb", + "PGUSER": "user", + "PGPASSWORD": "pass", + }, + }, + { + name: "invalid scheme", + url: "mysql://user:pass@localhost:3306/mydb", + expectError: true, + }, + { + name: "empty URL", + url: "", + expectError: true, + }, + { + name: "not a URL", + url: "not-a-valid-url", + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + result, err := ExtractCredentials(tc.url) + + if tc.expectError { + require.Error(t, err) + assert.ErrorIs(t, err, ErrInvalidConnectionURL) + + return + } + + require.NoError(t, err) + assert.Equal(t, tc.expected, result) + }) + } +} diff --git a/internal/infra/hook/dryrun_property_test.go b/internal/infra/hook/dryrun_property_test.go new file mode 100644 index 0000000..5b63d24 --- /dev/null +++ b/internal/infra/hook/dryrun_property_test.go @@ -0,0 +1,316 @@ +// Feature: partition-hooks, Property 18: Dry-Run No Side Effects +package hook + +import ( + "context" + "fmt" + "log/slog" + "os" + "testing" + "time" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 17.2, 17.3, 17.4** +// +// Property 18: Dry-Run No Side Effects +// For any hook configuration in dry-run mode, template variables SHALL be resolved and logged, +// but no hook runner SHALL be invoked, no partition SHALL be detached, and no partition SHALL be dropped. + +// newDryRunTestOrchestrator creates an orchestrator in dry-run mode. +// It does NOT pass an executor/runner, matching the production usage of NewDryRunOrchestrator. +func newDryRunTestOrchestrator(hooks *HooksConfig, connURL string) *Orchestrator { + logger := *slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) + metrics := NewMetricsCollector(logger) + + return NewDryRunOrchestrator(hooks, metrics, logger, connURL) +} + +// genDryRunPartitionContext generates a random valid PartitionContext for dry-run property testing. +func genDryRunPartitionContext(t *rapid.T) PartitionContext { + return PartitionContext{ + Schema: rapid.StringMatching(`[a-z][a-z0-9_]{1,15}`).Draw(t, "schema"), + Table: rapid.StringMatching(`[a-z][a-z0-9_]{1,15}_\d{4}_\d{2}`).Draw(t, "table"), + ParentTable: rapid.StringMatching(`[a-z][a-z0-9_]{1,15}`).Draw(t, "parentTable"), + LowerBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "lowerBound"), + UpperBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "upperBound"), + PartitionName: rapid.StringMatching(`[a-z][a-z0-9_]{1,15}`).Draw(t, "partitionName"), + Retention: rapid.StringMatching(`\d{1,3}`).Draw(t, "retention"), + Interval: rapid.SampledFrom([]string{"daily", "weekly", "monthly"}).Draw(t, "interval"), + DatabaseName: rapid.StringMatching(`[a-z][a-z0-9_]{1,15}`).Draw(t, "databaseName"), + Hostname: rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, "hostname"), + } +} + +// genDryRunHookEntry generates a valid enabled hook entry with template variables in config. +func genDryRunHookEntry(t *rapid.T, label string) HookEntry { + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, label+"_type") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, label+"_name") + + var config map[string]interface{} + + switch hookType { + case ShellType: + // Use template variables to verify they get resolved + config = map[string]interface{}{ + "command": "/usr/bin/hook-{{.ParentTable}}", + "args": []interface{}{"--schema", "{{.Schema}}", "--table", "{{.Table}}"}, + "env": map[string]interface{}{ + "DB_HOST": "{{.Hostname}}", + "DB_NAME": "{{.DatabaseName}}", + }, + } + case PostgreSQLType: + config = map[string]interface{}{ + "sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}", + } + } + + return HookEntry{ + Name: name, + Type: hookType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: config, + } +} + +// genDryRunHooksConfig generates a HooksConfig with hooks spread across lifecycle events. +func genDryRunHooksConfig(t *rapid.T) *HooksConfig { + cfg := &HooksConfig{} + + numBeforeDetach := rapid.IntRange(0, 3).Draw(t, "numBeforeDetach") + for i := 0; i < numBeforeDetach; i++ { + cfg.BeforeDetach = append(cfg.BeforeDetach, genDryRunHookEntry(t, fmt.Sprintf("bd_%d", i))) + } + + numAfterDetach := rapid.IntRange(0, 3).Draw(t, "numAfterDetach") + for i := 0; i < numAfterDetach; i++ { + cfg.AfterDetach = append(cfg.AfterDetach, genDryRunHookEntry(t, fmt.Sprintf("ad_%d", i))) + } + + numBeforeDrop := rapid.IntRange(0, 3).Draw(t, "numBeforeDrop") + for i := 0; i < numBeforeDrop; i++ { + cfg.BeforeDrop = append(cfg.BeforeDrop, genDryRunHookEntry(t, fmt.Sprintf("bdr_%d", i))) + } + + numAfterDrop := rapid.IntRange(0, 3).Draw(t, "numAfterDrop") + for i := 0; i < numAfterDrop; i++ { + cfg.AfterDrop = append(cfg.AfterDrop, genDryRunHookEntry(t, fmt.Sprintf("adr_%d", i))) + } + + return cfg +} + +// TestProperty_DryRunNoSideEffects_RunnerNeverInvoked verifies that in dry-run mode, +// no hook runner is ever invoked regardless of the hook configuration or lifecycle event. +func TestProperty_DryRunNoSideEffects_RunnerNeverInvoked(t *testing.T) { + // Feature: partition-hooks, Property 18: Dry-Run No Side Effects + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + partition := genDryRunPartitionContext(t) + + // Generate hooks for the selected event + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hooks = append(hooks, genDryRunHookEntry(t, fmt.Sprintf("hook_%d", i))) + } + + cfg := buildHooksConfigForEvent(event, hooks) + connURL := "postgresql://user:pass@localhost:5432/mydb" + orch := newDryRunTestOrchestrator(cfg, connURL) + + // Execute the lifecycle event in dry-run mode + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + // Dry-run should succeed without errors (no side effects) + if err != nil { + t.Fatalf("dry-run should not return error for valid hooks, got: %v", err) + } + + // The orchestrator in dry-run mode does not use an executor/runner at all. + // NewDryRunOrchestrator does not accept a runner, so it's structurally impossible + // for it to invoke one. We verify the orchestrator has no failures reported. + if orch.HasFailures() { + t.Fatal("dry-run should not report failures when all templates resolve successfully") + } + }) +} + +// TestProperty_DryRunNoSideEffects_TemplatesResolved verifies that in dry-run mode, +// template variables are resolved correctly (requirement 17.2). If templates contain +// undefined variables, errors are reported as in normal mode (requirement 17.7). +func TestProperty_DryRunNoSideEffects_TemplatesResolved(t *testing.T) { + // Feature: partition-hooks, Property 18: Dry-Run No Side Effects + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + partition := genDryRunPartitionContext(t) + + // Generate valid hooks with template variables + numHooks := rapid.IntRange(1, 4).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hooks = append(hooks, genDryRunHookEntry(t, fmt.Sprintf("hook_%d", i))) + } + + cfg := buildHooksConfigForEvent(event, hooks) + connURL := "postgresql://user:pass@localhost:5432/mydb" + orch := newDryRunTestOrchestrator(cfg, connURL) + + // Execute in dry-run mode + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + // All templates should resolve without error since we use valid template variables + if err != nil { + t.Fatalf("dry-run with valid templates should not return error, got: %v", err) + } + }) +} + +// TestProperty_DryRunNoSideEffects_UndefinedVarReportsError verifies that in dry-run mode, +// undefined template variables still produce errors (requirement 17.7), matching normal mode behavior. +func TestProperty_DryRunNoSideEffects_UndefinedVarReportsError(t *testing.T) { + // Feature: partition-hooks, Property 18: Dry-Run No Side Effects + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + partition := genDryRunPartitionContext(t) + + // Create a hook with an undefined template variable + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, "hookType") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, "name") + undefinedVar := rapid.StringMatching(`[A-Z][a-zA-Z]{3,15}`).Draw(t, "undefinedVar") + + var config map[string]interface{} + + switch hookType { + case ShellType: + config = map[string]interface{}{ + "command": fmt.Sprintf("/usr/bin/hook-{{.%s}}", undefinedVar), + } + case PostgreSQLType: + config = map[string]interface{}{ + "sql_query": fmt.Sprintf("SELECT * FROM {{.%s}}", undefinedVar), + } + } + + hook := HookEntry{ + Name: name, + Type: hookType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: config, + } + + cfg := buildHooksConfigForEvent(event, []HookEntry{hook}) + connURL := "postgresql://user:pass@localhost:5432/mydb" + orch := newDryRunTestOrchestrator(cfg, connURL) + + // Execute in dry-run mode + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + // Undefined template variables MUST produce an error even in dry-run mode + if err == nil { + t.Fatalf("dry-run with undefined template variable {{.%s}} should return error, got nil", undefinedVar) + } + + // The orchestrator should track the failure + if !orch.HasFailures() { + t.Fatal("dry-run should report failure when template variable is undefined") + } + }) +} + +// TestProperty_DryRunNoSideEffects_AllLifecycleEventsNoExecution verifies that in dry-run mode, +// executing hooks across ALL lifecycle events produces no side effects (no runner calls, +// no partition operations). This simulates the full cleanup flow scenario. +func TestProperty_DryRunNoSideEffects_AllLifecycleEventsNoExecution(t *testing.T) { + // Feature: partition-hooks, Property 18: Dry-Run No Side Effects + rapid.Check(t, func(t *rapid.T) { + partition := genDryRunPartitionContext(t) + cfg := genDryRunHooksConfig(t) + connURL := "postgresql://user:pass@localhost:5432/mydb" + orch := newDryRunTestOrchestrator(cfg, connURL) + + ctx := context.Background() + + // Simulate full cleanup flow in dry-run: all 4 lifecycle events + err := orch.ExecuteBeforeDetach(ctx, partition) + if err != nil { + t.Fatalf("dry-run ExecuteBeforeDetach should not error, got: %v", err) + } + + err = orch.ExecuteAfterDetach(ctx, partition) + if err != nil { + t.Fatalf("dry-run ExecuteAfterDetach should not error, got: %v", err) + } + + err = orch.ExecuteBeforeDrop(ctx, partition) + if err != nil { + t.Fatalf("dry-run ExecuteBeforeDrop should not error, got: %v", err) + } + + err = orch.ExecuteAfterDrop(ctx, partition) + if err != nil { + t.Fatalf("dry-run ExecuteAfterDrop should not error, got: %v", err) + } + + // Verify no failures (all templates use valid variables) + if orch.HasFailures() { + t.Fatal("dry-run should not report failures with valid hook configurations") + } + + // Verify the executor field is nil (structurally impossible to invoke a runner) + if orch.executor != nil { + t.Fatal("dry-run orchestrator should have nil executor") + } + }) +} + +// TestProperty_DryRunNoSideEffects_DisabledHooksSkipped verifies that in dry-run mode, +// disabled hooks are still skipped (consistent with normal mode behavior). +func TestProperty_DryRunNoSideEffects_DisabledHooksSkipped(t *testing.T) { + // Feature: partition-hooks, Property 18: Dry-Run No Side Effects + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + partition := genDryRunPartitionContext(t) + + // Generate a mix of enabled and disabled hooks + numEnabled := rapid.IntRange(0, 3).Draw(t, "numEnabled") + numDisabled := rapid.IntRange(1, 3).Draw(t, "numDisabled") + + hooks := make([]HookEntry, 0, numEnabled+numDisabled) + + for i := 0; i < numEnabled; i++ { + hooks = append(hooks, genDryRunHookEntry(t, fmt.Sprintf("en_%d", i))) + } + + for i := 0; i < numDisabled; i++ { + hook := genDryRunHookEntry(t, fmt.Sprintf("dis_%d", i)) + hook.Enabled = boolPtr(false) + hooks = append(hooks, hook) + } + + cfg := buildHooksConfigForEvent(event, hooks) + connURL := "postgresql://user:pass@localhost:5432/mydb" + orch := newDryRunTestOrchestrator(cfg, connURL) + + // Execute in dry-run mode + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + // Should succeed without errors + if err != nil { + t.Fatalf("dry-run with disabled hooks should not error, got: %v", err) + } + + // No failures should be reported + if orch.HasFailures() { + t.Fatal("dry-run should not report failures when hooks are simply disabled") + } + }) +} diff --git a/internal/infra/hook/dryrun_test.go b/internal/infra/hook/dryrun_test.go new file mode 100644 index 0000000..c3dbd42 --- /dev/null +++ b/internal/infra/hook/dryrun_test.go @@ -0,0 +1,333 @@ +package hook + +import ( + "bytes" + "context" + "log/slog" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- Dry-run orchestrator unit tests --- +// Validates: Requirements 17.2, 17.3, 17.4, 17.5 + +func newDryRunTestLogger(buf *bytes.Buffer) slog.Logger { + handler := slog.NewTextHandler(buf, &slog.HandlerOptions{Level: slog.LevelInfo}) + + return *slog.New(handler) +} + +// TestDryRun_HooksNotExecuted verifies that in dry-run mode, no runner is invoked. +// The dry-run orchestrator has a nil executor, making it structurally impossible to call a runner. +// Validates: Requirement 17.3 +func TestDryRun_HooksNotExecuted(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "shell-before-detach", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "/usr/bin/backup", "args": []interface{}{"--table", "{{.Table}}"}}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + AfterDetach: []HookEntry{ + { + Name: "pg-after-detach", + Type: PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + BeforeDrop: []HookEntry{ + { + Name: "shell-before-drop", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "/usr/bin/notify"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + AfterDrop: []HookEntry{ + { + Name: "shell-after-drop", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "/usr/bin/cleanup"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + ctx := context.Background() + partition := newTestPartitionContext() + + // Execute all lifecycle events + require.NoError(t, orch.ExecuteBeforeDetach(ctx, partition)) + require.NoError(t, orch.ExecuteAfterDetach(ctx, partition)) + require.NoError(t, orch.ExecuteBeforeDrop(ctx, partition)) + require.NoError(t, orch.ExecuteAfterDrop(ctx, partition)) + + // Verify executor is nil (structurally cannot invoke a runner) + assert.Nil(t, orch.executor, "dry-run orchestrator should have nil executor") + assert.True(t, orch.dryRun, "dry-run flag should be set") + assert.False(t, orch.HasFailures(), "no failures should be reported") +} + +// TestDryRun_TemplateVariablesResolved verifies that template variables in shell commands, +// args, env vars, and SQL queries are resolved correctly in dry-run mode. +// Validates: Requirement 17.2 +func TestDryRun_TemplateVariablesResolved(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "template-shell-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/archive-{{.ParentTable}}", + "args": []interface{}{"--schema", "{{.Schema}}", "--table", "{{.Table}}", "--retention", "{{.Retention}}"}, + "env": map[string]interface{}{ + "DB_NAME": "{{.DatabaseName}}", + "DB_HOST": "{{.Hostname}}", + "INTERVAL": "{{.Interval}}", + }, + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + AfterDetach: []HookEntry{ + { + Name: "template-pg-hook", + Type: PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + ctx := context.Background() + partition := newTestPartitionContext() + + // Execute hooks in dry-run mode + require.NoError(t, orch.ExecuteBeforeDetach(ctx, partition)) + require.NoError(t, orch.ExecuteAfterDetach(ctx, partition)) + + // Verify resolved templates appear in the log output + logOutput := buf.String() + + // Shell hook: command should be resolved + assert.Contains(t, logOutput, "/usr/bin/archive-events", "command template should resolve ParentTable") + // Shell hook: args should be resolved + assert.Contains(t, logOutput, "public", "args template should resolve Schema") + assert.Contains(t, logOutput, "events_2024_01", "args template should resolve Table") + // Shell hook: env should be resolved + assert.Contains(t, logOutput, "mydb", "env template should resolve DatabaseName") + assert.Contains(t, logOutput, "localhost", "env template should resolve Hostname") + // PostgreSQL hook: sql_query should be resolved + assert.Contains(t, logOutput, "VACUUM ANALYZE public.events_2024_01", "sql_query template should resolve Schema and Table") +} + +// TestDryRun_ResolvedConfigLogged verifies that [DRY-RUN] log messages contain +// the expected hook details: hook name, type, lifecycle event, partition name, and resolved config. +// Validates: Requirement 17.5 +func TestDryRun_ResolvedConfigLogged(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "notify-before-detach", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/notify", + "args": []interface{}{"--partition", "{{.PartitionName}}"}, + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + AfterDrop: []HookEntry{ + { + Name: "vacuum-after-drop", + Type: PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "sql_query": "SELECT pg_stat_reset_single_table_counters('{{.Schema}}.{{.Table}}')", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + ctx := context.Background() + partition := newTestPartitionContext() + + require.NoError(t, orch.ExecuteBeforeDetach(ctx, partition)) + require.NoError(t, orch.ExecuteAfterDrop(ctx, partition)) + + logOutput := buf.String() + + // Verify [DRY-RUN] prefix is present + assert.Contains(t, logOutput, "[DRY-RUN] Would execute hook", "log should contain dry-run prefix") + + // Verify hook name is logged + assert.Contains(t, logOutput, "notify-before-detach", "log should contain hook name") + assert.Contains(t, logOutput, "vacuum-after-drop", "log should contain hook name") + + // Verify type is logged + assert.Contains(t, logOutput, "shell", "log should contain hook type") + assert.Contains(t, logOutput, "postgresql", "log should contain hook type") + + // Verify lifecycle event is logged + assert.Contains(t, logOutput, "before-detach", "log should contain lifecycle event") + assert.Contains(t, logOutput, "after-drop", "log should contain lifecycle event") + + // Verify partition name is logged + assert.Contains(t, logOutput, "events", "log should contain partition name") + + // Verify resolved command/sql is logged + assert.Contains(t, logOutput, "/usr/bin/notify", "log should contain resolved command") + assert.Contains(t, logOutput, "pg_stat_reset_single_table_counters", "log should contain resolved sql") +} + +// TestDryRun_DisabledHooksSkipped verifies that disabled hooks do not produce dry-run log messages. +// Validates: Requirement 17.3 (disabled hooks are not executed, even in dry-run) +func TestDryRun_DisabledHooksSkipped(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "disabled-hook", + Type: ShellType, + Enabled: boolPtr(false), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "/usr/bin/should-not-appear"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "enabled-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "/usr/bin/should-appear"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + + require.NoError(t, orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext())) + + logOutput := buf.String() + + // Disabled hook should NOT appear in log + assert.NotContains(t, logOutput, "disabled-hook", "disabled hook should not produce dry-run log") + assert.NotContains(t, logOutput, "/usr/bin/should-not-appear", "disabled hook command should not appear") + + // Enabled hook SHOULD appear in log + assert.Contains(t, logOutput, "enabled-hook", "enabled hook should produce dry-run log") + assert.Contains(t, logOutput, "/usr/bin/should-appear", "enabled hook command should appear") +} + +// TestDryRun_UndefinedTemplateVariableReportsError verifies that undefined template variables +// still produce errors in dry-run mode (consistent with normal mode). +// Validates: Requirement 17.2 (template resolution errors still reported) +func TestDryRun_UndefinedTemplateVariableReportsError(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "bad-template-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "echo {{.NonExistentVariable}}", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + // Should return an error for undefined template variable + require.Error(t, err) + assert.Contains(t, err.Error(), "template rendering failed") + assert.True(t, orch.HasFailures(), "orchestrator should track the failure") +} + +// TestDryRun_PostgreSQLUndefinedTemplateVariable verifies that undefined template variables +// in PostgreSQL hook SQL queries also produce errors in dry-run mode. +func TestDryRun_PostgreSQLUndefinedTemplateVariable(t *testing.T) { + t.Parallel() + + hooks := &HooksConfig{ + AfterDetach: []HookEntry{ + { + Name: "bad-pg-hook", + Type: PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "sql_query": "SELECT * FROM {{.InvalidField}}", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + var buf bytes.Buffer + logger := newDryRunTestLogger(&buf) + metrics := NewMetricsCollector(logger) + orch := NewDryRunOrchestrator(hooks, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.Contains(t, err.Error(), "template rendering failed") + assert.True(t, orch.HasFailures()) +} diff --git a/internal/infra/hook/executor.go b/internal/infra/hook/executor.go new file mode 100644 index 0000000..53b0cd1 --- /dev/null +++ b/internal/infra/hook/executor.go @@ -0,0 +1,98 @@ +package hook + +import ( + "context" + "fmt" + "log/slog" + "math" + "time" +) + +// Executor wraps a Runner with retry and timeout logic. +type Executor struct { + runner Runner + logger slog.Logger +} + +// NewExecutor creates a new Executor that wraps the given Runner with retry and timeout logic. +func NewExecutor(runner Runner, logger slog.Logger) *Executor { + return &Executor{ + runner: runner, + logger: logger, + } +} + +// Execute runs the hook with timeout and retry logic. +// It applies the configured timeout via context.WithTimeout and retries on failure +// up to retry.attempts times using the configured backoff strategy. +func (e *Executor) Execute(ctx context.Context, hook *ResolvedHook, entry HookEntry) error { + var lastErr error + + maxAttempts := entry.Retry.Attempts + 1 // 1 initial attempt + N retries + + for attempt := 1; attempt <= maxAttempts; attempt++ { + lastErr = e.executeWithTimeout(ctx, hook, entry.Timeout) + if lastErr == nil { + return nil + } + + // If this was the last attempt, don't retry + if attempt >= maxAttempts { + break + } + + // Check if the parent context is already cancelled + if ctx.Err() != nil { + return fmt.Errorf("hook %q: context cancelled during retry: %w", hook.Name, ctx.Err()) + } + + // Calculate backoff delay for the next retry + delay := calculateBackoff(entry.Retry, attempt) + + e.logger.Warn("Hook execution failed, retrying", + "hook", hook.Name, + "type", string(hook.Type), + "partition", hook.PartitionContext.PartitionName, + "attempt", attempt, + "max_attempts", maxAttempts, + "next_delay", delay.String(), + "error", lastErr.Error(), + ) + + // Wait for the backoff delay or context cancellation + select { + case <-ctx.Done(): + return fmt.Errorf("hook %q: context cancelled during backoff wait: %w", hook.Name, ctx.Err()) + case <-time.After(delay): + } + } + + return lastErr +} + +// executeWithTimeout runs the hook with a timeout derived from the hook's configured timeout. +func (e *Executor) executeWithTimeout(ctx context.Context, hook *ResolvedHook, timeout time.Duration) error { + timeoutCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + return e.runner.Run(timeoutCtx, hook) +} + +// calculateBackoff computes the delay for the given retry attempt (1-based). +// For fixed backoff: returns initial_delay. +// For exponential backoff (the default): returns min(initial_delay × 2^(attempt-1), max_delay). +func calculateBackoff(retry RetryConfig, attempt int) time.Duration { + if retry.Backoff == BackoffFixed { + return retry.InitialDelay + } + + // Exponential backoff (also the default for unrecognized strategies). + multiplier := math.Pow(2, float64(attempt-1)) + + delay := time.Duration(float64(retry.InitialDelay) * multiplier) + if delay > retry.MaxDelay { + return retry.MaxDelay + } + + return delay +} diff --git a/internal/infra/hook/executor_property_test.go b/internal/infra/hook/executor_property_test.go new file mode 100644 index 0000000..416306b --- /dev/null +++ b/internal/infra/hook/executor_property_test.go @@ -0,0 +1,108 @@ +// Feature: partition-hooks, Property 13: Backoff Delay Calculation +package hook + +import ( + "math" + "testing" + "time" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 2.13, 2.14, 14.2, 14.3** +// +// Property 13: Backoff Delay Calculation +// For any retry configuration with `backoff` = "fixed", the delay between each retry attempt +// SHALL equal `initial_delay`. For any retry configuration with `backoff` = "exponential", +// the delay for attempt N SHALL equal min(`initial_delay` × 2^(N-1), `max_delay`). + +// genRetryConfigFixed generates a RetryConfig with fixed backoff strategy. +func genRetryConfigFixed(t *rapid.T) RetryConfig { + initialDelay := time.Duration(rapid.IntRange(1, 120).Draw(t, "initialDelaySec")) * time.Second + + return RetryConfig{ + Attempts: rapid.IntRange(1, 10).Draw(t, "attempts"), + Backoff: BackoffFixed, + InitialDelay: initialDelay, + MaxDelay: time.Duration(rapid.IntRange(60, 300).Draw(t, "maxDelaySec")) * time.Second, + } +} + +// genRetryConfigExponential generates a RetryConfig with exponential backoff strategy +// where MaxDelay >= InitialDelay. +func genRetryConfigExponential(t *rapid.T) RetryConfig { + initialDelaySec := rapid.IntRange(1, 30).Draw(t, "initialDelaySec") + maxDelaySec := rapid.IntRange(initialDelaySec, 300).Draw(t, "maxDelaySec") + + return RetryConfig{ + Attempts: rapid.IntRange(1, 10).Draw(t, "attempts"), + Backoff: BackoffExponential, + InitialDelay: time.Duration(initialDelaySec) * time.Second, + MaxDelay: time.Duration(maxDelaySec) * time.Second, + } +} + +func TestProperty_BackoffDelayCalculation_Fixed(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + retry := genRetryConfigFixed(t) + attempt := rapid.IntRange(1, retry.Attempts+1).Draw(t, "attempt") + + delay := calculateBackoff(retry, attempt) + + // Property: For fixed backoff, the delay SHALL always equal initial_delay + if delay != retry.InitialDelay { + t.Fatalf("Fixed backoff: expected delay %v for attempt %d, got %v", + retry.InitialDelay, attempt, delay) + } + }) +} + +func TestProperty_BackoffDelayCalculation_Exponential(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + retry := genRetryConfigExponential(t) + attempt := rapid.IntRange(1, retry.Attempts+1).Draw(t, "attempt") + + delay := calculateBackoff(retry, attempt) + + // Property: For exponential backoff, delay = min(initial_delay × 2^(attempt-1), max_delay) + multiplier := math.Pow(2, float64(attempt-1)) + expectedDelay := time.Duration(float64(retry.InitialDelay) * multiplier) + if expectedDelay > retry.MaxDelay { + expectedDelay = retry.MaxDelay + } + + if delay != expectedDelay { + t.Fatalf("Exponential backoff: expected delay %v for attempt %d (initial=%v, max=%v), got %v", + expectedDelay, attempt, retry.InitialDelay, retry.MaxDelay, delay) + } + + // Property: The result SHALL never exceed MaxDelay + if delay > retry.MaxDelay { + t.Fatalf("Exponential backoff: delay %v exceeds max_delay %v for attempt %d", + delay, retry.MaxDelay, attempt) + } + }) +} + +func TestProperty_BackoffDelayCalculation_ExponentialMonotonicity(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + retry := genRetryConfigExponential(t) + + // Need at least 2 attempts to test monotonicity + if retry.Attempts < 2 { + retry.Attempts = 2 + } + + attempt := rapid.IntRange(1, retry.Attempts).Draw(t, "attempt") + + delayN := calculateBackoff(retry, attempt) + delayN1 := calculateBackoff(retry, attempt+1) + + // Property: For exponential backoff, delay for attempt N+1 >= delay for attempt N + // (monotonically non-decreasing) + if delayN1 < delayN { + t.Fatalf("Exponential backoff: delay for attempt %d (%v) < delay for attempt %d (%v), violates monotonicity", + attempt+1, delayN1, attempt, delayN) + } + }) +} diff --git a/internal/infra/hook/executor_retry_property_test.go b/internal/infra/hook/executor_retry_property_test.go new file mode 100644 index 0000000..664cf2a --- /dev/null +++ b/internal/infra/hook/executor_retry_property_test.go @@ -0,0 +1,134 @@ +// Feature: partition-hooks, Property 14: Retry Execution Count +package hook + +import ( + "context" + "fmt" + "log/slog" + "os" + "sync/atomic" + "testing" + "time" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 14.1, 14.4** +// +// Property 14: Retry Execution Count +// For any hook with retry.attempts = N that fails on every attempt, the hook SHALL be +// executed exactly N+1 times (1 initial + N retries). If the hook succeeds on attempt K +// (where K ≤ N+1), it SHALL be executed exactly K times. + +// countingRunner is a test runner that counts invocations and can be configured +// to succeed on a specific attempt or always fail. +type countingRunner struct { + callCount atomic.Int32 + succeedOnAttempt int // 0 means always fail +} + +func (r *countingRunner) Run(_ context.Context, _ *ResolvedHook) error { + current := int(r.callCount.Add(1)) + if r.succeedOnAttempt > 0 && current >= r.succeedOnAttempt { + return nil + } + + return fmt.Errorf("simulated failure on attempt %d", current) +} + +func TestProperty_RetryExecutionCount_AlwaysFails(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a random number of retry attempts (1 to 10) + attempts := rapid.IntRange(1, 10).Draw(t, "attempts") + + runner := &countingRunner{succeedOnAttempt: 0} // always fail + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) + executor := NewExecutor(runner, *logger) + + entry := HookEntry{ + Name: "test-hook", + Type: ShellType, + Timeout: 5 * time.Second, + Retry: RetryConfig{ + Attempts: attempts, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, // minimal delay for fast tests + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := &ResolvedHook{ + Name: "test-hook", + Type: ShellType, + PartitionContext: PartitionContext{ + PartitionName: "test-partition", + }, + } + + err := executor.Execute(context.Background(), hook, entry) + + // The hook should have failed + if err == nil { + t.Fatal("Expected error when hook always fails, got nil") + } + + // Property: hook SHALL be executed exactly N+1 times (1 initial + N retries) + expectedCalls := attempts + 1 + actualCalls := int(runner.callCount.Load()) + + if actualCalls != expectedCalls { + t.Fatalf("Hook with retry.attempts=%d that always fails: expected %d executions (1 initial + %d retries), got %d", + attempts, expectedCalls, attempts, actualCalls) + } + }) +} + +func TestProperty_RetryExecutionCount_SucceedsOnAttemptK(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a random number of retry attempts (1 to 10) + attempts := rapid.IntRange(1, 10).Draw(t, "attempts") + + // Generate the attempt on which the hook succeeds (1 to N+1) + maxAttempts := attempts + 1 + succeedOn := rapid.IntRange(1, maxAttempts).Draw(t, "succeedOnAttempt") + + runner := &countingRunner{succeedOnAttempt: succeedOn} + logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) + executor := NewExecutor(runner, *logger) + + entry := HookEntry{ + Name: "test-hook", + Type: ShellType, + Timeout: 5 * time.Second, + Retry: RetryConfig{ + Attempts: attempts, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, // minimal delay for fast tests + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := &ResolvedHook{ + Name: "test-hook", + Type: ShellType, + PartitionContext: PartitionContext{ + PartitionName: "test-partition", + }, + } + + err := executor.Execute(context.Background(), hook, entry) + + // The hook should have succeeded + if err != nil { + t.Fatalf("Expected hook to succeed on attempt %d, got error: %v", succeedOn, err) + } + + // Property: hook SHALL be executed exactly K times + actualCalls := int(runner.callCount.Load()) + + if actualCalls != succeedOn { + t.Fatalf("Hook with retry.attempts=%d that succeeds on attempt %d: expected %d executions, got %d", + attempts, succeedOn, succeedOn, actualCalls) + } + }) +} diff --git a/internal/infra/hook/executor_test.go b/internal/infra/hook/executor_test.go new file mode 100644 index 0000000..fe8ca94 --- /dev/null +++ b/internal/infra/hook/executor_test.go @@ -0,0 +1,457 @@ +package hook + +import ( + "context" + "fmt" + "log/slog" + "os" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockRunner is a configurable test runner for executor unit tests. +type mockRunner struct { + callCount atomic.Int32 + // delay simulates how long the hook takes to execute. + delay time.Duration + // failUntilAttempt: fail on attempts < this value, succeed on >= this value. + // 0 means always fail. + failUntilAttempt int +} + +func (r *mockRunner) Run(ctx context.Context, _ *ResolvedHook) error { + current := int(r.callCount.Add(1)) + + if r.delay > 0 { + select { + case <-ctx.Done(): + return fmt.Errorf("hook execution timed out: %w", ctx.Err()) + case <-time.After(r.delay): + } + } + + if r.failUntilAttempt > 0 && current >= r.failUntilAttempt { + return nil + } + + return fmt.Errorf("simulated failure on attempt %d", current) +} + +func newTestLogger() slog.Logger { + return *slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) +} + +func newTestHook(name string) *ResolvedHook { + return &ResolvedHook{ + Name: name, + Type: ShellType, + PartitionContext: PartitionContext{ + PartitionName: "test-partition", + }, + } +} + +// --- Timeout Enforcement Tests --- +// Validates: Requirements 13.1, 13.2 + +func TestExecutor_TimeoutEnforcement_HookExceedsTimeout(t *testing.T) { + t.Parallel() + + // Runner takes 500ms but timeout is 50ms + runner := &mockRunner{delay: 500 * time.Millisecond, failUntilAttempt: 0} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "slow-hook", + Type: ShellType, + Timeout: 50 * time.Millisecond, + Retry: RetryConfig{ + Attempts: 0, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := newTestHook("slow-hook") + + err := executor.Execute(context.Background(), hook, entry) + + // Requirement 13.1: hook exceeding timeout SHALL be terminated and treated as failed + require.Error(t, err) + assert.Contains(t, err.Error(), "timed out") +} + +func TestExecutor_TimeoutEnforcement_HookCompletesWithinTimeout(t *testing.T) { + t.Parallel() + + // Runner takes 10ms and timeout is 1s - should succeed + runner := &mockRunner{delay: 10 * time.Millisecond, failUntilAttempt: 1} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "fast-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 0, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := newTestHook("fast-hook") + + err := executor.Execute(context.Background(), hook, entry) + require.NoError(t, err) +} + +func TestExecutor_TimeoutEnforcement_TimeoutOnRetry(t *testing.T) { + t.Parallel() + + // Runner takes 200ms, timeout is 50ms, with 2 retries + // All attempts should time out + runner := &mockRunner{delay: 200 * time.Millisecond, failUntilAttempt: 0} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "timeout-retry-hook", + Type: ShellType, + Timeout: 50 * time.Millisecond, + Retry: RetryConfig{ + Attempts: 2, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := newTestHook("timeout-retry-hook") + + err := executor.Execute(context.Background(), hook, entry) + + require.Error(t, err) + // Should have attempted 3 times (1 initial + 2 retries) + assert.Equal(t, int32(3), runner.callCount.Load()) +} + +// --- Fixed Backoff Delay Tests --- +// Validates: Requirement 14.2 + +func TestExecutor_FixedBackoff_DelayIsConstant(t *testing.T) { + t.Parallel() + + initialDelay := 50 * time.Millisecond + + // Verify calculateBackoff returns initial_delay for all attempts with fixed strategy + retry := RetryConfig{ + Attempts: 5, + Backoff: BackoffFixed, + InitialDelay: initialDelay, + MaxDelay: 1 * time.Second, + } + + for attempt := 1; attempt <= 5; attempt++ { + delay := calculateBackoff(retry, attempt) + assert.Equal(t, initialDelay, delay, "Fixed backoff should return initial_delay for attempt %d", attempt) + } +} + +func TestExecutor_FixedBackoff_ExecutionWithRetries(t *testing.T) { + t.Parallel() + + // Runner fails first 2 attempts, succeeds on 3rd + runner := &mockRunner{failUntilAttempt: 3} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + initialDelay := 20 * time.Millisecond + + entry := HookEntry{ + Name: "fixed-backoff-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 3, + Backoff: BackoffFixed, + InitialDelay: initialDelay, + MaxDelay: 1 * time.Second, + }, + } + + hook := newTestHook("fixed-backoff-hook") + + start := time.Now() + err := executor.Execute(context.Background(), hook, entry) + elapsed := time.Since(start) + + require.NoError(t, err) + assert.Equal(t, int32(3), runner.callCount.Load()) + + // With fixed backoff of 20ms and 2 waits (between attempt 1→2 and 2→3), + // total delay should be at least 40ms + expectedMinDelay := 2 * initialDelay + assert.GreaterOrEqual(t, elapsed, expectedMinDelay, + "Expected at least %v of backoff delay, got %v", expectedMinDelay, elapsed) +} + +// --- Exponential Backoff with max_delay Cap Tests --- +// Validates: Requirement 14.3 + +func TestExecutor_ExponentialBackoff_DelayDoubles(t *testing.T) { + t.Parallel() + + initialDelay := 10 * time.Millisecond + maxDelay := 1 * time.Second + + retry := RetryConfig{ + Attempts: 5, + Backoff: BackoffExponential, + InitialDelay: initialDelay, + MaxDelay: maxDelay, + } + + // Verify delay = initial_delay × 2^(N-1) for each attempt + expectedDelays := []time.Duration{ + 10 * time.Millisecond, // attempt 1: 10ms × 2^0 = 10ms + 20 * time.Millisecond, // attempt 2: 10ms × 2^1 = 20ms + 40 * time.Millisecond, // attempt 3: 10ms × 2^2 = 40ms + 80 * time.Millisecond, // attempt 4: 10ms × 2^3 = 80ms + 160 * time.Millisecond, // attempt 5: 10ms × 2^4 = 160ms + } + + for i, expected := range expectedDelays { + attempt := i + 1 + delay := calculateBackoff(retry, attempt) + assert.Equal(t, expected, delay, "Exponential backoff for attempt %d", attempt) + } +} + +func TestExecutor_ExponentialBackoff_CappedAtMaxDelay(t *testing.T) { + t.Parallel() + + initialDelay := 10 * time.Millisecond + maxDelay := 50 * time.Millisecond + + retry := RetryConfig{ + Attempts: 10, + Backoff: BackoffExponential, + InitialDelay: initialDelay, + MaxDelay: maxDelay, + } + + // attempt 1: 10ms, attempt 2: 20ms, attempt 3: 40ms, attempt 4: 80ms → capped at 50ms + assert.Equal(t, 10*time.Millisecond, calculateBackoff(retry, 1)) + assert.Equal(t, 20*time.Millisecond, calculateBackoff(retry, 2)) + assert.Equal(t, 40*time.Millisecond, calculateBackoff(retry, 3)) + assert.Equal(t, maxDelay, calculateBackoff(retry, 4)) // 80ms capped to 50ms + assert.Equal(t, maxDelay, calculateBackoff(retry, 5)) // 160ms capped to 50ms + assert.Equal(t, maxDelay, calculateBackoff(retry, 10)) +} + +func TestExecutor_ExponentialBackoff_ExecutionWithRetries(t *testing.T) { + t.Parallel() + + // Runner always fails - we want to verify the exponential delay behavior + runner := &mockRunner{failUntilAttempt: 0} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + initialDelay := 20 * time.Millisecond + maxDelay := 100 * time.Millisecond + + entry := HookEntry{ + Name: "exp-backoff-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 3, + Backoff: BackoffExponential, + InitialDelay: initialDelay, + MaxDelay: maxDelay, + }, + } + + hook := newTestHook("exp-backoff-hook") + + start := time.Now() + err := executor.Execute(context.Background(), hook, entry) + elapsed := time.Since(start) + + require.Error(t, err) + assert.Equal(t, int32(4), runner.callCount.Load()) // 1 initial + 3 retries + + // Expected delays: 20ms (attempt 1→2) + 40ms (attempt 2→3) + 80ms (attempt 3→4) = 140ms + expectedMinDelay := 20*time.Millisecond + 40*time.Millisecond + 80*time.Millisecond + assert.GreaterOrEqual(t, elapsed, expectedMinDelay, + "Expected at least %v of exponential backoff delay, got %v", expectedMinDelay, elapsed) +} + +func TestExecutor_ExponentialBackoff_MaxDelayCapsExecution(t *testing.T) { + t.Parallel() + + // Runner always fails + runner := &mockRunner{failUntilAttempt: 0} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + initialDelay := 20 * time.Millisecond + maxDelay := 30 * time.Millisecond // Cap at 30ms (less than 2nd exponential step of 40ms) + + entry := HookEntry{ + Name: "capped-exp-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 3, + Backoff: BackoffExponential, + InitialDelay: initialDelay, + MaxDelay: maxDelay, + }, + } + + hook := newTestHook("capped-exp-hook") + + start := time.Now() + err := executor.Execute(context.Background(), hook, entry) + elapsed := time.Since(start) + + require.Error(t, err) + assert.Equal(t, int32(4), runner.callCount.Load()) + + // Expected delays: 20ms (attempt 1→2) + 30ms (capped, attempt 2→3) + 30ms (capped, attempt 3→4) = 80ms + expectedMinDelay := 20*time.Millisecond + 30*time.Millisecond + 30*time.Millisecond + assert.GreaterOrEqual(t, elapsed, expectedMinDelay, + "Expected at least %v with max_delay cap, got %v", expectedMinDelay, elapsed) + + // Should not take much longer than expected (allow some tolerance for scheduling) + maxExpected := expectedMinDelay + 100*time.Millisecond + assert.LessOrEqual(t, elapsed, maxExpected, + "Execution took too long (%v), max_delay cap may not be working", elapsed) +} + +// --- Retry Behavior Tests --- +// Validates: Requirement 14.1 + +func TestExecutor_Retry_SucceedsOnFirstAttempt(t *testing.T) { + t.Parallel() + + runner := &mockRunner{failUntilAttempt: 1} // succeed immediately + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "first-attempt-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 3, + Backoff: BackoffFixed, + InitialDelay: 50 * time.Millisecond, + MaxDelay: 1 * time.Second, + }, + } + + hook := newTestHook("first-attempt-hook") + + err := executor.Execute(context.Background(), hook, entry) + + require.NoError(t, err) + assert.Equal(t, int32(1), runner.callCount.Load(), "Should only execute once when first attempt succeeds") +} + +func TestExecutor_Retry_SucceedsOnLastAttempt(t *testing.T) { + t.Parallel() + + // Succeed on attempt 4 (1 initial + 3 retries) + runner := &mockRunner{failUntilAttempt: 4} + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "last-attempt-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 3, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := newTestHook("last-attempt-hook") + + err := executor.Execute(context.Background(), hook, entry) + + require.NoError(t, err) + assert.Equal(t, int32(4), runner.callCount.Load(), "Should execute 4 times (1 initial + 3 retries)") +} + +func TestExecutor_Retry_NoRetriesConfigured(t *testing.T) { + t.Parallel() + + runner := &mockRunner{failUntilAttempt: 0} // always fail + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "no-retry-hook", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 0, + Backoff: BackoffFixed, + InitialDelay: 1 * time.Millisecond, + MaxDelay: 10 * time.Millisecond, + }, + } + + hook := newTestHook("no-retry-hook") + + err := executor.Execute(context.Background(), hook, entry) + + require.Error(t, err) + assert.Equal(t, int32(1), runner.callCount.Load(), "Should only execute once with 0 retry attempts") +} + +func TestExecutor_Retry_ContextCancelledDuringBackoff(t *testing.T) { + t.Parallel() + + runner := &mockRunner{failUntilAttempt: 0} // always fail + logger := newTestLogger() + executor := NewExecutor(runner, logger) + + entry := HookEntry{ + Name: "cancel-during-backoff", + Type: ShellType, + Timeout: 1 * time.Second, + Retry: RetryConfig{ + Attempts: 5, + Backoff: BackoffFixed, + InitialDelay: 5 * time.Second, // long delay so we can cancel during it + MaxDelay: 10 * time.Second, + }, + } + + hook := newTestHook("cancel-during-backoff") + + // Cancel context after 100ms (during the first backoff wait) + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + err := executor.Execute(ctx, hook, entry) + + require.Error(t, err) + assert.Contains(t, err.Error(), "context cancelled during backoff wait") + // Should have executed only once before the backoff wait was interrupted + assert.Equal(t, int32(1), runner.callCount.Load()) +} diff --git a/internal/infra/hook/metrics.go b/internal/infra/hook/metrics.go new file mode 100644 index 0000000..c39dfe0 --- /dev/null +++ b/internal/infra/hook/metrics.go @@ -0,0 +1,105 @@ +package hook + +import ( + "log/slog" + "sync" + "time" +) + +// ExecutionSummary holds aggregate metrics for the cleanup run. +type ExecutionSummary struct { + TotalExecuted int + TotalSuccess int + TotalFailures int + TotalRetries int +} + +// HookMetric records a single hook execution. +type HookMetric struct { + HookName string + HookType HookType + LifecycleEvent LifecycleEvent + PartitionName string + Duration time.Duration + Outcome string // "success" or "failure" + RetryAttempts int +} + +// MetricsCollector records hook execution metrics and provides a summary. +type MetricsCollector struct { + mu sync.Mutex + metrics []HookMetric + logger slog.Logger +} + +// NewMetricsCollector creates a new MetricsCollector with the given logger. +func NewMetricsCollector(logger slog.Logger) *MetricsCollector { + return &MetricsCollector{ + metrics: make([]HookMetric, 0), + logger: logger, + } +} + +// Record adds a hook execution metric to the collector and logs it as structured JSON output. +func (m *MetricsCollector) Record(metric HookMetric) { + m.mu.Lock() + defer m.mu.Unlock() + + m.metrics = append(m.metrics, metric) + + m.logger.Info("Hook execution metric", + "hook", metric.HookName, + "type", string(metric.HookType), + "lifecycle_event", string(metric.LifecycleEvent), + "partition", metric.PartitionName, + "duration_ms", metric.Duration.Milliseconds(), + "outcome", metric.Outcome, + "retry_attempts", metric.RetryAttempts, + ) +} + +// Summary returns the aggregate execution summary across all recorded metrics. +func (m *MetricsCollector) Summary() ExecutionSummary { + m.mu.Lock() + defer m.mu.Unlock() + + summary := ExecutionSummary{ + TotalExecuted: len(m.metrics), + } + + for _, metric := range m.metrics { + switch metric.Outcome { + case "success": + summary.TotalSuccess++ + case "failure": + summary.TotalFailures++ + } + + summary.TotalRetries += metric.RetryAttempts + } + + return summary +} + +// LogSummary logs the execution summary at info level for end-of-cleanup reporting. +func (m *MetricsCollector) LogSummary() { + summary := m.Summary() + + m.logger.Info("Hook execution summary", + "total_executed", summary.TotalExecuted, + "total_success", summary.TotalSuccess, + "total_failures", summary.TotalFailures, + "total_retries", summary.TotalRetries, + ) +} + +// Metrics returns a copy of all recorded metrics. +func (m *MetricsCollector) Metrics() []HookMetric { + m.mu.Lock() + defer m.mu.Unlock() + + result := make([]HookMetric, len(m.metrics)) + copy(result, m.metrics) + + return result +} diff --git a/internal/infra/hook/metrics_test.go b/internal/infra/hook/metrics_test.go new file mode 100644 index 0000000..d89a83c --- /dev/null +++ b/internal/infra/hook/metrics_test.go @@ -0,0 +1,152 @@ +package hook + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestNewMetricsCollector(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + assert.NotNil(t, collector) + assert.Empty(t, collector.Metrics()) + + summary := collector.Summary() + assert.Equal(t, 0, summary.TotalExecuted) + assert.Equal(t, 0, summary.TotalSuccess) + assert.Equal(t, 0, summary.TotalFailures) + assert.Equal(t, 0, summary.TotalRetries) +} + +func TestMetricsCollector_Record(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + metric := HookMetric{ + HookName: "notify-detach", + HookType: ShellType, + LifecycleEvent: BeforeDetach, + PartitionName: "events_2024_01", + Duration: 150 * time.Millisecond, + Outcome: "success", + RetryAttempts: 0, + } + + collector.Record(metric) + + metrics := collector.Metrics() + assert.Len(t, metrics, 1) + assert.Equal(t, metric, metrics[0]) +} + +func TestMetricsCollector_Summary_MixedOutcomes(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + collector.Record(HookMetric{ + HookName: "hook-1", + HookType: ShellType, + LifecycleEvent: BeforeDetach, + PartitionName: "part_1", + Duration: 100 * time.Millisecond, + Outcome: "success", + RetryAttempts: 0, + }) + + collector.Record(HookMetric{ + HookName: "hook-2", + HookType: PostgreSQLType, + LifecycleEvent: AfterDetach, + PartitionName: "part_1", + Duration: 200 * time.Millisecond, + Outcome: "failure", + RetryAttempts: 3, + }) + + collector.Record(HookMetric{ + HookName: "hook-3", + HookType: ShellType, + LifecycleEvent: BeforeDrop, + PartitionName: "part_2", + Duration: 50 * time.Millisecond, + Outcome: "success", + RetryAttempts: 1, + }) + + summary := collector.Summary() + assert.Equal(t, 3, summary.TotalExecuted) + assert.Equal(t, 2, summary.TotalSuccess) + assert.Equal(t, 1, summary.TotalFailures) + assert.Equal(t, 4, summary.TotalRetries) +} + +func TestMetricsCollector_Summary_AllSuccess(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + for i := 0; i < 5; i++ { + collector.Record(HookMetric{ + HookName: "hook", + HookType: ShellType, + LifecycleEvent: AfterDrop, + PartitionName: "part", + Duration: 10 * time.Millisecond, + Outcome: "success", + RetryAttempts: 0, + }) + } + + summary := collector.Summary() + assert.Equal(t, 5, summary.TotalExecuted) + assert.Equal(t, 5, summary.TotalSuccess) + assert.Equal(t, 0, summary.TotalFailures) + assert.Equal(t, 0, summary.TotalRetries) +} + +func TestMetricsCollector_Summary_AllFailures(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + for i := 0; i < 3; i++ { + collector.Record(HookMetric{ + HookName: "failing-hook", + HookType: PostgreSQLType, + LifecycleEvent: BeforeDetach, + PartitionName: "part", + Duration: 500 * time.Millisecond, + Outcome: "failure", + RetryAttempts: 2, + }) + } + + summary := collector.Summary() + assert.Equal(t, 3, summary.TotalExecuted) + assert.Equal(t, 0, summary.TotalSuccess) + assert.Equal(t, 3, summary.TotalFailures) + assert.Equal(t, 6, summary.TotalRetries) +} + +func TestMetricsCollector_Metrics_ReturnsCopy(t *testing.T) { + logger := newTestLogger() + collector := NewMetricsCollector(logger) + + collector.Record(HookMetric{ + HookName: "hook-1", + HookType: ShellType, + LifecycleEvent: BeforeDetach, + PartitionName: "part_1", + Duration: 100 * time.Millisecond, + Outcome: "success", + RetryAttempts: 0, + }) + + metrics := collector.Metrics() + metrics[0].Outcome = "modified" + + // Original should be unchanged + original := collector.Metrics() + assert.Equal(t, "success", original[0].Outcome) +} diff --git a/internal/infra/hook/orchestrator.go b/internal/infra/hook/orchestrator.go new file mode 100644 index 0000000..a325059 --- /dev/null +++ b/internal/infra/hook/orchestrator.go @@ -0,0 +1,294 @@ +package hook + +import ( + "context" + "errors" + "fmt" + "log/slog" + "time" +) + +// ErrAbort is returned when a hook with on_failure=abort fails, signaling +// that the entire cleanup process should stop immediately. +var ErrAbort = errors.New("hook abort: stopping entire cleanup process") + +// Orchestrator coordinates hook execution around cleanup operations. +// It executes hooks sequentially, handles failure policies, and tracks metrics. +type Orchestrator struct { + hooks *HooksConfig + executor *Executor + metrics *MetricsCollector + logger slog.Logger + connURL string + hasFailure bool + dryRun bool +} + +// NewOrchestrator creates a new Orchestrator with the given configuration. +// If hooks is nil, all Execute* methods become no-ops. +func NewOrchestrator(hooks *HooksConfig, executor *Executor, metrics *MetricsCollector, logger slog.Logger, connectionURL string) *Orchestrator { + return &Orchestrator{ + hooks: hooks, + executor: executor, + metrics: metrics, + logger: logger, + connURL: connectionURL, + } +} + +// NewDryRunOrchestrator creates an Orchestrator that resolves templates and logs +// resolved hook configurations without actually executing hooks. +func NewDryRunOrchestrator(hooks *HooksConfig, metrics *MetricsCollector, logger slog.Logger, connectionURL string) *Orchestrator { + return &Orchestrator{ + hooks: hooks, + metrics: metrics, + logger: logger, + connURL: connectionURL, + dryRun: true, + } +} + +// ExecuteBeforeDetach runs before-detach hooks for a partition. +// Returns an error if hooks fail and the detach operation should be cancelled (default behavior). +// Returns ErrAbort if a hook with on_failure=abort fails. +// Returns nil if on_failure=continue is set on the failing hook. +func (o *Orchestrator) ExecuteBeforeDetach(ctx context.Context, partition PartitionContext) error { + return o.Execute(ctx, BeforeDetach, partition) +} + +// ExecuteAfterDetach runs after-detach hooks for a partition. +// Returns an error if hooks fail (caller decides whether to skip drop). +// Returns ErrAbort if a hook with on_failure=abort fails. +func (o *Orchestrator) ExecuteAfterDetach(ctx context.Context, partition PartitionContext) error { + return o.Execute(ctx, AfterDetach, partition) +} + +// ExecuteBeforeDrop runs before-drop hooks for a partition. +// Returns an error if hooks fail and the drop operation should be cancelled (default behavior). +// Returns ErrAbort if a hook with on_failure=abort fails. +// Returns nil if on_failure=continue is set on the failing hook. +func (o *Orchestrator) ExecuteBeforeDrop(ctx context.Context, partition PartitionContext) error { + return o.Execute(ctx, BeforeDrop, partition) +} + +// ExecuteAfterDrop runs after-drop hooks for a partition. +// Returns an error if hooks fail (informational, operation already done). +// Returns ErrAbort if a hook with on_failure=abort fails. +func (o *Orchestrator) ExecuteAfterDrop(ctx context.Context, partition PartitionContext) error { + return o.Execute(ctx, AfterDrop, partition) +} + +// Execute runs the hooks configured for the given lifecycle event against a partition. +// It is a no-op (returns nil) when no hooks are configured. +func (o *Orchestrator) Execute(ctx context.Context, event LifecycleEvent, partition PartitionContext) error { + if o.hooks == nil { + return nil + } + + return o.executeHooks(ctx, partition, o.hooks.hooksFor(event), event) +} + +// HasFailures returns true if any hook failed during the cleanup process. +func (o *Orchestrator) HasFailures() bool { + return o.hasFailure +} + +// Summary returns the aggregate execution metrics. +func (o *Orchestrator) Summary() ExecutionSummary { + return o.metrics.Summary() +} + +// executeHooks runs a list of hooks sequentially for a given lifecycle event. +// It skips disabled hooks, short-circuits on failure, and applies on_failure policies. +// In dry-run mode, it resolves templates and logs the resolved configuration without executing. +func (o *Orchestrator) executeHooks(ctx context.Context, partition PartitionContext, hooks []HookEntry, event LifecycleEvent) error { + for i := range hooks { + entry := hooks[i] + + // Skip disabled hooks without error (Requirement 2.7) + if entry.Enabled != nil && !*entry.Enabled { + o.logger.Debug("Skipping disabled hook", + "hook", entry.Name, + "lifecycle_event", string(event), + "partition", partition.PartitionName, + ) + + continue + } + + // Resolve template variables and build the ResolvedHook + resolved, err := o.resolveHook(entry, event, partition) + if err != nil { + o.hasFailure = true + o.recordMetric(entry, event, partition, 0, "failure", 0) + + // Template variable errors are reported in both normal and dry-run mode (Requirement 17.7) + return o.handleFailure(entry, event, fmt.Errorf("template rendering failed for hook %q: %w", entry.Name, err)) + } + + // In dry-run mode: log the resolved hook configuration and skip execution (Requirements 17.2, 17.3, 17.5) + if o.dryRun { + o.logHookConfig(entry, event, partition, resolved, true) + + continue + } + + // Log resolved hook configuration at debug level (Requirement 16.4) + o.logHookConfig(entry, event, partition, resolved, false) + + // Log hook start (Requirement 16.1) + o.logger.Info("Executing hook", + "hook", entry.Name, + "type", string(entry.Type), + "lifecycle_event", string(event), + "partition", partition.PartitionName, + ) + + // Execute the hook with retry/timeout via the Executor + start := time.Now() + execErr := o.executor.Execute(ctx, resolved, entry) + duration := time.Since(start) + + if execErr != nil { + o.hasFailure = true + o.recordMetric(entry, event, partition, duration, "failure", entry.Retry.Attempts) + + o.logger.Error("Hook execution failed", + "hook", entry.Name, + "type", string(entry.Type), + "lifecycle_event", string(event), + "partition", partition.PartitionName, + "error", execErr.Error(), + "duration_ms", duration.Milliseconds(), + ) + + // Short-circuit: remaining hooks in this event are skipped (Requirements 6.2, 6.6) + return o.handleFailure(entry, event, execErr) + } + + o.recordMetric(entry, event, partition, duration, "success", 0) + + o.logger.Info("Hook execution succeeded", + "hook", entry.Name, + "type", string(entry.Type), + "lifecycle_event", string(event), + "partition", partition.PartitionName, + "duration_ms", duration.Milliseconds(), + ) + } + + return nil +} + +// handleFailure applies the on_failure policy for a failed hook. +// - abort: returns ErrAbort to stop the entire cleanup process +// - continue: returns nil so the operation proceeds despite the failure +// - default (unset): before-hooks return error (cancel operation), after-hooks return error (informational) +func (o *Orchestrator) handleFailure(entry HookEntry, event LifecycleEvent, execErr error) error { + switch entry.OnFailure { + case OnFailureAbort: + // Requirement 6.8: stop entire cleanup process immediately + return fmt.Errorf("%w: hook %q failed: %w", ErrAbort, entry.Name, execErr) + case OnFailureContinue: + // Requirement 6.9: proceed with the operation despite hook failure + o.logger.Warn("Hook failed but on_failure=continue, proceeding", + "hook", entry.Name, + "lifecycle_event", string(event), + "error", execErr.Error(), + ) + + return nil + default: + // Default behavior depends on lifecycle position + if isBeforeHook(event) { + // Before-hook failure cancels the associated operation (Requirements 6.1, 9.2, 11.2) + return fmt.Errorf("hook %q failed, cancelling %s operation: %w", entry.Name, string(event), execErr) + } + + // After-hook failure: log error, operation already done (Requirement 6.4) + // Still return error so caller can decide (e.g., skip drop after after-detach failure) + return fmt.Errorf("hook %q failed during %s: %w", entry.Name, string(event), execErr) + } +} + +// resolveHook renders template variables and builds a ResolvedHook ready for execution. +func (o *Orchestrator) resolveHook(entry HookEntry, event LifecycleEvent, partition PartitionContext) (*ResolvedHook, error) { + resolved := &ResolvedHook{ + Name: entry.Name, + Type: entry.Type, + LifecycleEvent: event, + PartitionContext: partition, + ConnectionURL: o.connURL, + } + + handler, ok := registry[entry.Type] + if !ok { + return nil, fmt.Errorf("%w: %q for hook %q", ErrUnsupportedHookType, entry.Type, entry.Name) + } + + rendered, err := handler.resolve(entry.Config, partition) + if err != nil { + return nil, err + } + + resolved.Config = rendered + + return resolved, nil +} + +// recordMetric records a hook execution metric via the MetricsCollector. +func (o *Orchestrator) recordMetric(entry HookEntry, event LifecycleEvent, partition PartitionContext, duration time.Duration, outcome string, retryAttempts int) { + o.metrics.Record(HookMetric{ + HookName: entry.Name, + HookType: entry.Type, + LifecycleEvent: event, + PartitionName: partition.PartitionName, + Duration: duration, + Outcome: outcome, + RetryAttempts: retryAttempts, + }) +} + +// isBeforeHook returns true if the lifecycle event is a before-* event. +func isBeforeHook(event LifecycleEvent) bool { + return event == BeforeDetach || event == BeforeDrop +} + +// logHookConfig logs the fully resolved hook configuration (with template variables substituted). +// When dryRun is true, it logs at info level to announce what would be executed (Requirements 17.2, 17.5). +// Otherwise it logs at debug level so operators can inspect the resolved configuration (Requirement 16.4). +func (o *Orchestrator) logHookConfig(entry HookEntry, event LifecycleEvent, partition PartitionContext, resolved *ResolvedHook, dryRun bool) { + logArgs := []any{ + "hook", entry.Name, + "type", string(entry.Type), + "lifecycle_event", string(event), + "partition", partition.PartitionName, + } + + if resolved.Config != nil { + logArgs = append(logArgs, resolved.Config.LogAttrs()...) + } + + if entry.Timeout > 0 { + logArgs = append(logArgs, "timeout", entry.Timeout.String()) + } + + if entry.OnFailure != "" { + logArgs = append(logArgs, "on_failure", string(entry.OnFailure)) + } + + if entry.Retry.Attempts > 0 { + logArgs = append(logArgs, + "retry_attempts", entry.Retry.Attempts, + "retry_backoff", string(entry.Retry.Backoff), + ) + } + + if dryRun { + o.logger.Info("[DRY-RUN] Would execute hook", append(logArgs, "dry_run", true)...) + + return + } + + o.logger.Debug("Resolved hook configuration", logArgs...) +} diff --git a/internal/infra/hook/orchestrator_property_test.go b/internal/infra/hook/orchestrator_property_test.go new file mode 100644 index 0000000..2124a59 --- /dev/null +++ b/internal/infra/hook/orchestrator_property_test.go @@ -0,0 +1,1965 @@ +// Feature: partition-hooks, Property 3: Disabled Hook Skipping +package hook + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 2.7** +// +// Property 3: Disabled Hook Skipping +// For any hook with `enabled` set to false, the hook runner SHALL NOT be invoked, +// and the hook execution SHALL not produce an error or affect subsequent hooks in the same lifecycle event. + +// allLifecycleEvents returns all lifecycle events for property testing. +func allLifecycleEvents() []LifecycleEvent { + return []LifecycleEvent{BeforeDetach, AfterDetach, BeforeDrop, AfterDrop} +} + +// genValidHookEntry generates a valid HookEntry with all required fields for orchestrator testing. +func genValidHookEntry(t *rapid.T, label string, enabled bool) HookEntry { + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, label+"_type") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, label+"_name") + + var config map[string]interface{} + + switch hookType { + case ShellType: + config = map[string]interface{}{ + "command": "/usr/bin/echo", + } + case PostgreSQLType: + config = map[string]interface{}{ + "sql_query": "SELECT 1", + } + } + + return HookEntry{ + Name: name, + Type: hookType, + Enabled: boolPtr(enabled), + Timeout: 30 * time.Second, + OnFailure: "", + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: config, + } +} + +// buildHooksConfigForEvent creates a HooksConfig with hooks placed in the specified lifecycle event. +func buildHooksConfigForEvent(event LifecycleEvent, hooks []HookEntry) *HooksConfig { + cfg := &HooksConfig{} + + switch event { + case BeforeDetach: + cfg.BeforeDetach = hooks + case AfterDetach: + cfg.AfterDetach = hooks + case BeforeDrop: + cfg.BeforeDrop = hooks + case AfterDrop: + cfg.AfterDrop = hooks + } + + return cfg +} + +// executeEventOnOrchestrator calls the appropriate Execute method based on the lifecycle event. +func executeEventOnOrchestrator(ctx context.Context, orch *Orchestrator, event LifecycleEvent, partition PartitionContext) error { + switch event { + case BeforeDetach: + return orch.ExecuteBeforeDetach(ctx, partition) + case AfterDetach: + return orch.ExecuteAfterDetach(ctx, partition) + case BeforeDrop: + return orch.ExecuteBeforeDrop(ctx, partition) + case AfterDrop: + return orch.ExecuteAfterDrop(ctx, partition) + } + + return nil +} + +// TestProperty_DisabledHookSkipping_NeverExecuted verifies that disabled hooks are never executed +// (the runner is never called for them) across all lifecycle events. +func TestProperty_DisabledHookSkipping_NeverExecuted(t *testing.T) { + // Feature: partition-hooks, Property 3: Disabled Hook Skipping + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a list of hooks where ALL are disabled + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hooks = append(hooks, genValidHookEntry(t, "hook", false)) + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Disabled hooks must not produce errors + if err != nil { + t.Fatalf("disabled hooks should not produce errors, got: %v", err) + } + + // Runner must never be called for disabled hooks + if runner.callCount.Load() != 0 { + t.Fatalf("runner should not be called for disabled hooks, got %d calls", runner.callCount.Load()) + } + }) +} + +// TestProperty_DisabledHookSkipping_NoError verifies that disabled hooks don't produce errors +// regardless of the lifecycle event. +func TestProperty_DisabledHookSkipping_NoError(t *testing.T) { + // Feature: partition-hooks, Property 3: Disabled Hook Skipping + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Mix of disabled hooks with various configurations + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", false) + // Randomize on_failure to ensure disabled hooks never trigger failure handling + hook.OnFailure = rapid.SampledFrom([]OnFailure{"", OnFailureAbort, OnFailureContinue}).Draw(t, "onFailure") + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Disabled hooks must not produce errors + if err != nil { + t.Fatalf("disabled hooks should not produce errors regardless of on_failure setting, got: %v", err) + } + + // Orchestrator must not report failures for disabled hooks + if orch.HasFailures() { + t.Fatal("orchestrator should not report failures when only disabled hooks are present") + } + }) +} + +// TestProperty_DisabledHookSkipping_SubsequentHooksStillExecute verifies that disabled hooks +// don't affect subsequent enabled hooks in the same lifecycle event. +func TestProperty_DisabledHookSkipping_SubsequentHooksStillExecute(t *testing.T) { + // Feature: partition-hooks, Property 3: Disabled Hook Skipping + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a mixed list: some disabled, some enabled + // At least 1 disabled and 1 enabled hook + numDisabled := rapid.IntRange(1, 3).Draw(t, "numDisabled") + numEnabled := rapid.IntRange(1, 3).Draw(t, "numEnabled") + + hooks := make([]HookEntry, 0, numDisabled+numEnabled) + expectedExecuted := make([]string, 0, numEnabled) + + // Interleave disabled and enabled hooks randomly + disabledIdx := 0 + enabledIdx := 0 + totalHooks := numDisabled + numEnabled + + for i := 0; i < totalHooks; i++ { + // Decide whether to place a disabled or enabled hook + placeDisabled := false + if disabledIdx < numDisabled && enabledIdx < numEnabled { + placeDisabled = rapid.Bool().Draw(t, "placeDisabled") + } else if disabledIdx < numDisabled { + placeDisabled = true + } + + if placeDisabled { + hook := genValidHookEntry(t, "disabled", false) + // Ensure unique name for tracking + hook.Name = "disabled-" + hook.Name + hooks = append(hooks, hook) + disabledIdx++ + } else { + hook := genValidHookEntry(t, "enabled", true) + // Ensure unique name for tracking + hook.Name = "enabled-" + hook.Name + hooks = append(hooks, hook) + expectedExecuted = append(expectedExecuted, hook.Name) + enabledIdx++ + } + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // No errors expected (all enabled hooks succeed with mock runner) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + // Verify only enabled hooks were executed + if int(runner.callCount.Load()) != numEnabled { + t.Fatalf("expected %d enabled hooks to be executed, got %d", numEnabled, runner.callCount.Load()) + } + + // Verify the execution order matches the order of enabled hooks + if len(runner.executedHooks) != len(expectedExecuted) { + t.Fatalf("expected %d executed hooks, got %d", len(expectedExecuted), len(runner.executedHooks)) + } + + for i, name := range expectedExecuted { + if runner.executedHooks[i] != name { + t.Fatalf("expected hook at position %d to be %q, got %q", i, name, runner.executedHooks[i]) + } + } + + // Orchestrator must not report failures + if orch.HasFailures() { + t.Fatal("orchestrator should not report failures when all enabled hooks succeed") + } + }) +} + +// TestProperty_DisabledHookSkipping_DisabledBeforeEnabled verifies that disabled hooks +// placed before enabled hooks don't prevent the enabled hooks from executing. +func TestProperty_DisabledHookSkipping_DisabledBeforeEnabled(t *testing.T) { + // Feature: partition-hooks, Property 3: Disabled Hook Skipping + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Place disabled hooks first, then enabled hooks + numDisabledBefore := rapid.IntRange(1, 4).Draw(t, "numDisabledBefore") + numEnabledAfter := rapid.IntRange(1, 4).Draw(t, "numEnabledAfter") + + hooks := make([]HookEntry, 0, numDisabledBefore+numEnabledAfter) + + // Add disabled hooks first + for i := 0; i < numDisabledBefore; i++ { + hook := genValidHookEntry(t, "dis", false) + hook.Name = "disabled-" + hook.Name + hooks = append(hooks, hook) + } + + // Add enabled hooks after + expectedNames := make([]string, 0, numEnabledAfter) + + for i := 0; i < numEnabledAfter; i++ { + hook := genValidHookEntry(t, "en", true) + hook.Name = "enabled-" + hook.Name + hooks = append(hooks, hook) + expectedNames = append(expectedNames, hook.Name) + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // No errors expected + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + // All enabled hooks must have been executed + if int(runner.callCount.Load()) != numEnabledAfter { + t.Fatalf("expected %d enabled hooks to execute, got %d", numEnabledAfter, runner.callCount.Load()) + } + + // Verify execution order + for i, name := range expectedNames { + if runner.executedHooks[i] != name { + t.Fatalf("expected hook at position %d to be %q, got %q", i, name, runner.executedHooks[i]) + } + } + }) +} + +// Feature: partition-hooks, Property 4: Sequential Execution Order + +// **Validates: Requirements 2.14, 9.1, 10.1, 11.1, 12.1** +// +// Property 4: Sequential Execution Order +// For any list of enabled hooks within a lifecycle event, the hooks SHALL be executed +// in the exact order they are defined in the configuration, and each hook SHALL complete +// (success or failure) before the next hook begins. + +// TestProperty_SequentialExecution_OrderMatchesDefinition verifies that for any randomly +// generated list of N enabled hooks (N >= 2) in any lifecycle event, the execution order +// matches the definition order. +func TestProperty_SequentialExecution_OrderMatchesDefinition(t *testing.T) { + // Feature: partition-hooks, Property 4: Sequential Execution Order + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate N >= 2 enabled hooks with unique names + numHooks := rapid.IntRange(2, 8).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + expectedOrder := make([]string, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + // Ensure unique names by prefixing with index + hook.Name = rapid.StringMatching(`[a-z][a-z0-9]{2,10}`).Draw(t, "name") + "-" + string(rune('a'+i)) + hooks = append(hooks, hook) + expectedOrder = append(expectedOrder, hook.Name) + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // All hooks succeed with mock runner, so no error expected + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + // Verify execution order matches definition order + if len(runner.executedHooks) != numHooks { + t.Fatalf("expected %d hooks executed, got %d", numHooks, len(runner.executedHooks)) + } + + for i, expectedName := range expectedOrder { + if runner.executedHooks[i] != expectedName { + t.Fatalf("execution order mismatch at position %d: expected %q, got %q\nExpected order: %v\nActual order: %v", + i, expectedName, runner.executedHooks[i], expectedOrder, runner.executedHooks) + } + } + }) +} + +// TestProperty_SequentialExecution_CountEqualsEnabled verifies that the number of executions +// equals the number of enabled hooks, confirming each hook completes before the next begins. +func TestProperty_SequentialExecution_CountEqualsEnabled(t *testing.T) { + // Feature: partition-hooks, Property 4: Sequential Execution Order + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a mix of enabled and disabled hooks (at least 2 enabled) + numEnabled := rapid.IntRange(2, 6).Draw(t, "numEnabled") + numDisabled := rapid.IntRange(0, 4).Draw(t, "numDisabled") + totalHooks := numEnabled + numDisabled + + hooks := make([]HookEntry, 0, totalHooks) + enabledCount := 0 + disabledCount := 0 + + // Interleave enabled and disabled hooks randomly + for i := 0; i < totalHooks; i++ { + placeDisabled := false + if disabledCount < numDisabled && enabledCount < numEnabled { + placeDisabled = rapid.Bool().Draw(t, "placeDisabled") + } else if disabledCount < numDisabled { + placeDisabled = true + } + + if placeDisabled { + hook := genValidHookEntry(t, "dis", false) + hook.Name = "disabled-" + hook.Name + "-" + string(rune('a'+i)) + hooks = append(hooks, hook) + disabledCount++ + } else { + hook := genValidHookEntry(t, "en", true) + hook.Name = "enabled-" + hook.Name + "-" + string(rune('a'+i)) + hooks = append(hooks, hook) + enabledCount++ + } + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // No error expected (all enabled hooks succeed with mock runner) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + // The number of executions must equal the number of enabled hooks + if int(runner.callCount.Load()) != numEnabled { + t.Fatalf("expected %d executions (one per enabled hook), got %d", numEnabled, runner.callCount.Load()) + } + + // executedHooks slice length must match callCount (sequential, no concurrent writes) + if len(runner.executedHooks) != int(runner.callCount.Load()) { + t.Fatalf("executedHooks length (%d) does not match callCount (%d), suggesting non-sequential execution", + len(runner.executedHooks), runner.callCount.Load()) + } + }) +} + +// TestProperty_SequentialExecution_AllLifecycleEvents verifies that sequential execution +// holds across all lifecycle events (before-detach, after-detach, before-drop, after-drop). +func TestProperty_SequentialExecution_AllLifecycleEvents(t *testing.T) { + // Feature: partition-hooks, Property 4: Sequential Execution Order + rapid.Check(t, func(t *rapid.T) { + // Test each lifecycle event explicitly + for _, event := range allLifecycleEvents() { + numHooks := rapid.IntRange(2, 5).Draw(t, "numHooks_"+string(event)) + hooks := make([]HookEntry, 0, numHooks) + expectedOrder := make([]string, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook_"+string(event), true) + hook.Name = string(event) + "-hook-" + string(rune('a'+i)) + hooks = append(hooks, hook) + expectedOrder = append(expectedOrder, hook.Name) + } + + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + if err != nil { + t.Fatalf("event %s: expected no error, got: %v", event, err) + } + + // Verify count matches + if int(runner.callCount.Load()) != numHooks { + t.Fatalf("event %s: expected %d executions, got %d", event, numHooks, runner.callCount.Load()) + } + + // Verify order matches definition order + for i, expectedName := range expectedOrder { + if runner.executedHooks[i] != expectedName { + t.Fatalf("event %s: order mismatch at position %d: expected %q, got %q", + event, i, expectedName, runner.executedHooks[i]) + } + } + } + }) +} + +// Feature: partition-hooks, Property 5: Before-Hook Failure Cancels Operation + +// **Validates: Requirements 6.1, 9.2, 11.2** +// +// Property 5: Before-Hook Failure Cancels Operation +// For any before-hook (before-detach or before-drop) that fails after all retry attempts +// with default on_failure behavior, the associated operation (detach or drop) SHALL NOT +// be executed for the affected partition. + +// beforeHookEvents returns only the before-* lifecycle events. +func beforeHookEvents() []LifecycleEvent { + return []LifecycleEvent{BeforeDetach, BeforeDrop} +} + +// TestProperty_BeforeHookFailureCancelsOperation_ReturnsError verifies that for any +// before-hook (before-detach or before-drop) that fails with default on_failure behavior, +// the orchestrator returns a non-nil error signaling the operation should be cancelled. +func TestProperty_BeforeHookFailureCancelsOperation_ReturnsError(t *testing.T) { + // Feature: partition-hooks, Property 5: Before-Hook Failure Cancels Operation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate a random number of hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) for all hooks + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Before-hook failure with default on_failure MUST return a non-nil error + // signaling the associated operation should be cancelled + if err == nil { + t.Fatalf("expected non-nil error when before-hook %q fails with default on_failure, got nil (event=%s)", + failingHookName, event) + } + }) +} + +// TestProperty_BeforeHookFailureCancelsOperation_NotAbort verifies that the error returned +// by a failing before-hook with default on_failure is NOT an ErrAbort. The default behavior +// is cancellation of the single operation, not aborting the entire cleanup process. +func TestProperty_BeforeHookFailureCancelsOperation_NotAbort(t *testing.T) { + // Feature: partition-hooks, Property 5: Before-Hook Failure Cancels Operation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) — NOT abort, NOT continue + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The error must NOT be an ErrAbort — default cancellation is different from abort + if errors.Is(err, ErrAbort) { + t.Fatalf("expected non-abort error for default on_failure, but got ErrAbort (event=%s, hook=%s)", + event, failingHookName) + } + }) +} + +// TestProperty_BeforeHookFailureCancelsOperation_TracksFailure verifies that when a +// before-hook fails with default on_failure, the orchestrator tracks the failure via HasFailures(). +func TestProperty_BeforeHookFailureCancelsOperation_TracksFailure(t *testing.T) { + // Feature: partition-hooks, Property 5: Before-Hook Failure Cancels Operation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The orchestrator MUST track the failure + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after before-hook %q failed (event=%s)", + failingHookName, event) + } + }) +} + +// Feature: partition-hooks, Property 6: Hook Failure Short-Circuits Event + +// **Validates: Requirements 6.2, 6.6, 9.3, 10.4, 11.3, 12.3** +// +// Property 6: Hook Failure Short-Circuits Event +// For any list of hooks in a lifecycle event where hook at position N fails, +// all hooks at positions N+1 through the end of the list SHALL NOT be executed. + +// TestProperty_HookFailureShortCircuitsEvent_OnlyHooksUpToFailAreExecuted verifies that +// when a hook at position N fails, only hooks at positions 0..N are executed and hooks +// at positions N+1..end are NOT executed. +func TestProperty_HookFailureShortCircuitsEvent_OnlyHooksUpToFailAreExecuted(t *testing.T) { + // Feature: partition-hooks, Property 6: Hook Failure Short-Circuits Event + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate N >= 2 hooks with unique names + numHooks := rapid.IntRange(2, 8).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + hookNames := make([]string, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Default on_failure (empty string) to test default short-circuit behavior + hook.OnFailure = "" + hooks = append(hooks, hook) + hookNames = append(hookNames, hook.Name) + } + + failingHookName := hookNames[failIdx] + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Only hooks 0..failIdx should have been executed + expectedExecuted := hookNames[:failIdx+1] + + if len(runner.executedHooks) != len(expectedExecuted) { + t.Fatalf("expected %d hooks executed (0..%d), got %d: %v (event=%s)", + len(expectedExecuted), failIdx, len(runner.executedHooks), runner.executedHooks, event) + } + + for i, name := range expectedExecuted { + if runner.executedHooks[i] != name { + t.Fatalf("expected hook at position %d to be %q, got %q (event=%s)", + i, name, runner.executedHooks[i], event) + } + } + + // Verify hooks after failIdx are NOT in executedHooks + skippedHooks := hookNames[failIdx+1:] + for _, skipped := range skippedHooks { + for _, executed := range runner.executedHooks { + if executed == skipped { + t.Fatalf("hook %q at position after fail index should NOT have been executed (event=%s)", + skipped, event) + } + } + } + }) +} + +// TestProperty_HookFailureShortCircuitsEvent_ExecutionCountEqualsFailIdxPlusOne verifies that +// the number of executed hooks equals N+1 where N is the fail position (0-indexed). +func TestProperty_HookFailureShortCircuitsEvent_ExecutionCountEqualsFailIdxPlusOne(t *testing.T) { + // Feature: partition-hooks, Property 6: Hook Failure Short-Circuits Event + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate N >= 2 hooks with unique names + numHooks := rapid.IntRange(2, 8).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + hook.OnFailure = "" + hooks = append(hooks, hook) + } + + failingHookName := hooks[failIdx].Name + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The number of executed hooks must equal failIdx + 1 + expectedCount := int32(failIdx + 1) + if runner.callCount.Load() != expectedCount { + t.Fatalf("expected callCount=%d (failIdx=%d + 1), got %d (event=%s)", + expectedCount, failIdx, runner.callCount.Load(), event) + } + }) +} + +// TestProperty_HookFailureShortCircuitsEvent_AllLifecycleEvents verifies that the +// short-circuit behavior applies to ALL lifecycle events (before-detach, after-detach, +// before-drop, after-drop). +func TestProperty_HookFailureShortCircuitsEvent_AllLifecycleEvents(t *testing.T) { + // Feature: partition-hooks, Property 6: Hook Failure Short-Circuits Event + rapid.Check(t, func(t *rapid.T) { + // Test each lifecycle event explicitly + for _, event := range allLifecycleEvents() { + numHooks := rapid.IntRange(2, 5).Draw(t, "numHooks_"+string(event)) + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx_"+string(event)) + + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook_"+string(event), true) + hook.Name = string(event) + "-hook-" + string(rune('a'+i)) + hook.OnFailure = "" + hooks = append(hooks, hook) + } + + failingHookName := hooks[failIdx].Name + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Verify short-circuit: only failIdx+1 hooks executed + expectedCount := int32(failIdx + 1) + if runner.callCount.Load() != expectedCount { + t.Fatalf("event %s: expected %d hooks executed, got %d (failIdx=%d)", + event, expectedCount, runner.callCount.Load(), failIdx) + } + + // Verify the last executed hook is the failing one + if len(runner.executedHooks) == 0 { + t.Fatalf("event %s: no hooks were executed", event) + } + + lastExecuted := runner.executedHooks[len(runner.executedHooks)-1] + if lastExecuted != failingHookName { + t.Fatalf("event %s: expected last executed hook to be %q (the failing one), got %q", + event, failingHookName, lastExecuted) + } + } + }) +} + +// TestProperty_HookFailureShortCircuitsEvent_HasFailuresTrue verifies that the orchestrator +// reports HasFailures() = true after a hook failure causes short-circuit. +func TestProperty_HookFailureShortCircuitsEvent_HasFailuresTrue(t *testing.T) { + // Feature: partition-hooks, Property 6: Hook Failure Short-Circuits Event + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate N >= 2 hooks with unique names + numHooks := rapid.IntRange(2, 8).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + hook.OnFailure = "" + hooks = append(hooks, hook) + } + + failingHookName := hooks[failIdx].Name + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The orchestrator MUST report HasFailures() = true after short-circuit + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after hook %q failed and short-circuited (event=%s, failIdx=%d)", + failingHookName, event, failIdx) + } + }) +} + +// Feature: partition-hooks, Property 7: After-Detach Failure Skips Drop + +// **Validates: Requirements 6.5, 10.3** +// +// Property 7: After-Detach Failure Skips Drop +// For any partition with cleanup policy "drop", if any after-detach hook fails after all retry attempts, +// the drop operation and all drop-related hooks (before-drop, after-drop) SHALL NOT be executed for that partition. +// +// The orchestrator's contract: ExecuteAfterDetach returns a non-nil error when an after-detach hook fails, +// signaling the caller (cleanup flow) to skip the drop operation. + +// TestProperty_AfterDetachFailureSkipsDrop_ReturnsError verifies that when any after-detach hook +// fails with default on_failure behavior, ExecuteAfterDetach returns a non-nil error (which the +// caller uses to skip the drop operation). +func TestProperty_AfterDetachFailureSkipsDrop_ReturnsError(t *testing.T) { + // Feature: partition-hooks, Property 7: After-Detach Failure Skips Drop + rapid.Check(t, func(t *rapid.T) { + // Generate a random number of after-detach hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) for all hooks + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(AfterDetach, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + // After-detach hook failure MUST return a non-nil error + // signaling the caller to skip the drop operation + if err == nil { + t.Fatalf("expected non-nil error when after-detach hook %q fails with default on_failure, got nil", + failingHookName) + } + }) +} + +// TestProperty_AfterDetachFailureSkipsDrop_NotAbort verifies that the error returned by a +// failing after-detach hook with default on_failure is NOT an ErrAbort. The default behavior +// for after-hooks is informational (skip drop for this partition), not aborting the entire process. +func TestProperty_AfterDetachFailureSkipsDrop_NotAbort(t *testing.T) { + // Feature: partition-hooks, Property 7: After-Detach Failure Skips Drop + rapid.Check(t, func(t *rapid.T) { + // Generate hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) — NOT abort, NOT continue + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(AfterDetach, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + // The error must NOT be an ErrAbort — default after-hook failure is informational + if errors.Is(err, ErrAbort) { + t.Fatalf("expected non-abort error for default on_failure on after-detach, but got ErrAbort (hook=%s)", + failingHookName) + } + }) +} + +// TestProperty_AfterDetachFailureSkipsDrop_TracksFailure verifies that when an after-detach +// hook fails with default on_failure, the orchestrator tracks the failure via HasFailures(). +func TestProperty_AfterDetachFailureSkipsDrop_TracksFailure(t *testing.T) { + // Feature: partition-hooks, Property 7: After-Detach Failure Skips Drop + rapid.Check(t, func(t *rapid.T) { + // Generate hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Ensure default on_failure (empty string) + hook.OnFailure = "" + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(AfterDetach, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + // The orchestrator MUST track the failure + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after after-detach hook %q failed", + failingHookName) + } + }) +} + +// Feature: partition-hooks, Property 8: Operation Failure Skips After-Hooks + +// **Validates: Requirements 10.2, 12.2** +// +// Property 8: Operation Failure Skips After-Hooks +// For any partition where the detach operation fails, all after-detach hooks SHALL NOT be executed. +// Similarly, for any partition where the drop operation fails, all after-drop hooks SHALL NOT be executed. +// +// This property tests the CALLER's contract: the orchestrator's ExecuteAfterDetach and ExecuteAfterDrop +// methods are only called by the cleanup flow when the operation succeeds. When the operation fails, +// the caller simply does NOT call these methods. The test verifies that after-hooks configured in the +// HooksConfig are NOT automatically executed; they require explicit invocation. + +// TestProperty_OperationFailureSkipsAfterHooks_AfterDetachNotCalledMeansNoExecution verifies that +// when ExecuteAfterDetach is NOT called (simulating a failed detach operation), no after-detach hooks +// are executed, even though they are configured in the HooksConfig. +func TestProperty_OperationFailureSkipsAfterHooks_AfterDetachNotCalledMeansNoExecution(t *testing.T) { + // Feature: partition-hooks, Property 8: Operation Failure Skips After-Hooks + rapid.Check(t, func(t *rapid.T) { + // Generate a random number of after-detach hooks + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "after-detach-hook-" + string(rune('a'+i)) + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{} + cfg := &HooksConfig{ + AfterDetach: hooks, + } + orch := newTestOrchestrator(cfg, runner) + + // Simulate a failed detach operation: the caller does NOT call ExecuteAfterDetach. + // We only call ExecuteBeforeDetach (which has no hooks configured) to show the + // orchestrator is active but after-detach hooks are not auto-triggered. + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + // No error expected from before-detach (no hooks configured for that event) + if err != nil { + t.Fatalf("expected no error from ExecuteBeforeDetach with no before-detach hooks, got: %v", err) + } + + // The key assertion: no after-detach hooks were executed because ExecuteAfterDetach was never called + if runner.callCount.Load() != 0 { + t.Fatalf("expected 0 hook executions (simulating failed detach, ExecuteAfterDetach not called), got %d: %v", + runner.callCount.Load(), runner.executedHooks) + } + }) +} + +// TestProperty_OperationFailureSkipsAfterHooks_AfterDropNotCalledMeansNoExecution verifies that +// when ExecuteAfterDrop is NOT called (simulating a failed drop operation), no after-drop hooks +// are executed, even though they are configured in the HooksConfig. +func TestProperty_OperationFailureSkipsAfterHooks_AfterDropNotCalledMeansNoExecution(t *testing.T) { + // Feature: partition-hooks, Property 8: Operation Failure Skips After-Hooks + rapid.Check(t, func(t *rapid.T) { + // Generate a random number of after-drop hooks + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "after-drop-hook-" + string(rune('a'+i)) + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{} + cfg := &HooksConfig{ + AfterDrop: hooks, + } + orch := newTestOrchestrator(cfg, runner) + + // Simulate a failed drop operation: the caller does NOT call ExecuteAfterDrop. + // We only call ExecuteBeforeDrop (which has no hooks configured) to show the + // orchestrator is active but after-drop hooks are not auto-triggered. + err := orch.ExecuteBeforeDrop(context.Background(), newTestPartitionContext()) + + // No error expected from before-drop (no hooks configured for that event) + if err != nil { + t.Fatalf("expected no error from ExecuteBeforeDrop with no before-drop hooks, got: %v", err) + } + + // The key assertion: no after-drop hooks were executed because ExecuteAfterDrop was never called + if runner.callCount.Load() != 0 { + t.Fatalf("expected 0 hook executions (simulating failed drop, ExecuteAfterDrop not called), got %d: %v", + runner.callCount.Load(), runner.executedHooks) + } + }) +} + +// TestProperty_OperationFailureSkipsAfterHooks_ExplicitCallRequired verifies that after-hooks +// require explicit invocation — they are NOT automatically triggered by the orchestrator. +// This confirms the orchestrator correctly supports the pattern where the caller controls +// whether after-hooks run based on operation success/failure. +func TestProperty_OperationFailureSkipsAfterHooks_ExplicitCallRequired(t *testing.T) { + // Feature: partition-hooks, Property 8: Operation Failure Skips After-Hooks + rapid.Check(t, func(t *rapid.T) { + // Generate hooks for ALL lifecycle events + numAfterDetach := rapid.IntRange(1, 4).Draw(t, "numAfterDetach") + numAfterDrop := rapid.IntRange(1, 4).Draw(t, "numAfterDrop") + + afterDetachHooks := make([]HookEntry, 0, numAfterDetach) + for i := 0; i < numAfterDetach; i++ { + hook := genValidHookEntry(t, "ad_hook", true) + hook.Name = "after-detach-" + string(rune('a'+i)) + afterDetachHooks = append(afterDetachHooks, hook) + } + + afterDropHooks := make([]HookEntry, 0, numAfterDrop) + for i := 0; i < numAfterDrop; i++ { + hook := genValidHookEntry(t, "adrop_hook", true) + hook.Name = "after-drop-" + string(rune('a'+i)) + afterDropHooks = append(afterDropHooks, hook) + } + + runner := &orchestratorMockRunner{} + cfg := &HooksConfig{ + AfterDetach: afterDetachHooks, + AfterDrop: afterDropHooks, + } + orch := newTestOrchestrator(cfg, runner) + + // Simulate: both detach and drop operations fail. + // The caller does NOT call ExecuteAfterDetach or ExecuteAfterDrop. + // Instead, only before-* methods are called (which have no hooks configured). + _ = orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + _ = orch.ExecuteBeforeDrop(context.Background(), newTestPartitionContext()) + + // No after-hooks should have been executed + if runner.callCount.Load() != 0 { + t.Fatalf("expected 0 hook executions when after-hook methods are not called (simulating operation failures), got %d: %v", + runner.callCount.Load(), runner.executedHooks) + } + + // Now verify that calling ExecuteAfterDetach DOES execute the hooks (proving explicit call is required) + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + if err != nil { + t.Fatalf("expected no error from ExecuteAfterDetach, got: %v", err) + } + + // After explicit call, after-detach hooks should have been executed + if int(runner.callCount.Load()) != numAfterDetach { + t.Fatalf("expected %d after-detach hooks executed after explicit call, got %d: %v", + numAfterDetach, runner.callCount.Load(), runner.executedHooks) + } + }) +} + +// Feature: partition-hooks, Property 10: Partition Isolation + +// **Validates: Requirements 6.3** +// +// Property 10: Partition Isolation +// For any set of partitions being cleaned up, a hook failure for one partition SHALL NOT +// prevent the processing of other partitions (unless `on_failure` is set to "abort"). + +// TestProperty_PartitionIsolation_FailureForOneDoesNotPreventOthers verifies that when a hook +// fails for one partition, the orchestrator can still successfully execute hooks for other partitions. +// This simulates the cleanup flow iterating over multiple partitions. +func TestProperty_PartitionIsolation_FailureForOneDoesNotPreventOthers(t *testing.T) { + // Feature: partition-hooks, Property 10: Partition Isolation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate multiple partitions (at least 2) + numPartitions := rapid.IntRange(2, 6).Draw(t, "numPartitions") + + // Pick one partition to fail + failPartitionIdx := rapid.IntRange(0, numPartitions-1).Draw(t, "failPartitionIdx") + + // Generate partition contexts with unique names + partitions := make([]PartitionContext, 0, numPartitions) + for i := 0; i < numPartitions; i++ { + p := PartitionContext{ + Schema: "public", + Table: fmt.Sprintf("events_%d", i), + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: fmt.Sprintf("partition_%d", i), + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "localhost", + } + partitions = append(partitions, p) + } + + // Generate hooks — use a hook name that includes the partition name so we can + // configure the mock runner to fail only for the specific partition's hook + numHooks := rapid.IntRange(1, 3).Draw(t, "numHooks") + + // Track which partitions were successfully processed + successfulPartitions := 0 + + // Simulate the cleanup flow: iterate over partitions, calling the orchestrator for each + for i, partition := range partitions { + // Create a fresh orchestrator for each partition (as the cleanup flow would do + // after resolving hooks per partition) + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for j := 0; j < numHooks; j++ { + hookName := fmt.Sprintf("hook-%s-%d", partition.PartitionName, j) + hook := HookEntry{ + Name: hookName, + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: "", // default behavior (NOT abort) + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: map[string]interface{}{"command": "/usr/bin/echo"}, + } + hooks = append(hooks, hook) + + // Make the first hook of the failing partition fail + if i == failPartitionIdx && j == 0 { + failingHookName = hookName + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + if i == failPartitionIdx { + // This partition's hook should fail + if err == nil { + t.Fatalf("expected error for failing partition %d, got nil", i) + } + // But it should NOT be an abort error (default on_failure) + if errors.Is(err, ErrAbort) { + t.Fatalf("expected non-abort error for partition %d with default on_failure, got ErrAbort", i) + } + } else { + // Other partitions should succeed + if err != nil { + t.Fatalf("partition %d should not be affected by partition %d's failure, but got error: %v", + i, failPartitionIdx, err) + } + successfulPartitions++ + } + } + + // Verify that all non-failing partitions were successfully processed + expectedSuccessful := numPartitions - 1 + if successfulPartitions != expectedSuccessful { + t.Fatalf("expected %d successful partitions, got %d", expectedSuccessful, successfulPartitions) + } + }) +} + +// partitionAwareMockRunner fails only when executing hooks for a specific partition. +type partitionAwareMockRunner struct { + failForPartition string // partition name that should trigger failure + executedForPartitions []string +} + +func (r *partitionAwareMockRunner) Run(_ context.Context, hook *ResolvedHook) error { + r.executedForPartitions = append(r.executedForPartitions, hook.PartitionContext.PartitionName) + + if r.failForPartition != "" && hook.PartitionContext.PartitionName == r.failForPartition { + return fmt.Errorf("hook %q for partition %q: %w", hook.Name, hook.PartitionContext.PartitionName, errSimulatedFailure) + } + + return nil +} + +// TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses verifies that even when +// using a shared orchestrator instance (which tracks hasFailure state), subsequent partition +// hook executions still proceed. The hasFailure flag is informational and does NOT block execution. +func TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses(t *testing.T) { + // Feature: partition-hooks, Property 10: Partition Isolation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate multiple partitions (at least 3 to have partitions before and after the failure) + numPartitions := rapid.IntRange(3, 6).Draw(t, "numPartitions") + + // Pick one partition to fail (not the last one, so we can verify subsequent processing) + failPartitionIdx := rapid.IntRange(0, numPartitions-2).Draw(t, "failPartitionIdx") + + // Generate partition contexts + partitions := make([]PartitionContext, 0, numPartitions) + for i := 0; i < numPartitions; i++ { + p := PartitionContext{ + Schema: "public", + Table: fmt.Sprintf("events_%d", i), + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: fmt.Sprintf("partition_%d", i), + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "localhost", + } + partitions = append(partitions, p) + } + + // Create a partition-aware runner that only fails for the specific partition + failingPartitionName := fmt.Sprintf("partition_%d", failPartitionIdx) + runner := &partitionAwareMockRunner{failForPartition: failingPartitionName} + + // Create hooks config with a single hook + hookEntry := HookEntry{ + Name: "test-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: "", // default behavior (NOT abort) + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: map[string]interface{}{"command": "/usr/bin/echo"}, + } + + cfg := buildHooksConfigForEvent(event, []HookEntry{hookEntry}) + + logger := newOrchestratorTestLogger() + executor := NewExecutor(runner, logger) + metrics := NewMetricsCollector(logger) + orch := NewOrchestrator(cfg, executor, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + + // Simulate the cleanup flow with a shared orchestrator + processedAfterFailure := 0 + + for i, partition := range partitions { + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + if i == failPartitionIdx { + // This partition should fail + if err == nil { + t.Fatalf("expected error for failing partition %d, got nil", i) + } + } else if i > failPartitionIdx { + // Partitions AFTER the failure should still be processed + if err != nil { + t.Fatalf("partition %d (after failure at %d) should still be processed, but got error: %v", + i, failPartitionIdx, err) + } + processedAfterFailure++ + } + } + + // Verify that partitions after the failure were still processed + expectedAfterFailure := numPartitions - failPartitionIdx - 1 + if processedAfterFailure != expectedAfterFailure { + t.Fatalf("expected %d partitions processed after failure, got %d", + expectedAfterFailure, processedAfterFailure) + } + + // The orchestrator should report failures (from the failed partition) + if !orch.HasFailures() { + t.Fatal("expected HasFailures()=true after one partition's hook failed") + } + + // Verify that the runner was called for ALL partitions (not just the ones before failure) + if len(runner.executedForPartitions) != numPartitions { + t.Fatalf("expected runner to be called for all %d partitions, got %d calls", + numPartitions, len(runner.executedForPartitions)) + } + }) +} + +// TestProperty_PartitionIsolation_AbortPreventsSubsequentPartitions verifies the exception: +// when on_failure=abort is set and a hook fails, subsequent partitions SHALL NOT be processed. +// This confirms the "unless on_failure is set to abort" clause in the property. +func TestProperty_PartitionIsolation_AbortPreventsSubsequentPartitions(t *testing.T) { + // Feature: partition-hooks, Property 10: Partition Isolation + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate multiple partitions + numPartitions := rapid.IntRange(2, 5).Draw(t, "numPartitions") + + // Pick one partition to fail (not the last one) + failPartitionIdx := rapid.IntRange(0, numPartitions-2).Draw(t, "failPartitionIdx") + + // Generate partition contexts + partitions := make([]PartitionContext, 0, numPartitions) + for i := 0; i < numPartitions; i++ { + p := PartitionContext{ + Schema: "public", + Table: fmt.Sprintf("events_%d", i), + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: fmt.Sprintf("partition_%d", i), + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "localhost", + } + partitions = append(partitions, p) + } + + // Simulate the cleanup flow: when abort is received, stop processing + abortReceived := false + processedAfterAbort := 0 + + for i, partition := range partitions { + if abortReceived { + // After abort, the cleanup flow should NOT process any more partitions + processedAfterAbort++ + continue + } + + // Create hooks for this partition + hookName := fmt.Sprintf("hook-partition_%d", i) + hookEntry := HookEntry{ + Name: hookName, + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: OnFailureAbort, // ABORT on failure + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: map[string]interface{}{"command": "/usr/bin/echo"}, + } + + // Only the failing partition's hook will fail + failOnHook := "" + if i == failPartitionIdx { + failOnHook = hookName + } + + runner := &orchestratorMockRunner{failOnHook: failOnHook} + cfg := buildHooksConfigForEvent(event, []HookEntry{hookEntry}) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + if i == failPartitionIdx { + // This partition should fail with ErrAbort + if err == nil { + t.Fatalf("expected error for failing partition %d with on_failure=abort, got nil", i) + } + if !errors.Is(err, ErrAbort) { + t.Fatalf("expected ErrAbort for partition %d with on_failure=abort, got: %v", i, err) + } + // Signal that the cleanup flow should stop + abortReceived = true + } else if i < failPartitionIdx { + // Partitions before the failure should succeed + if err != nil { + t.Fatalf("partition %d (before failure) should succeed, got error: %v", i, err) + } + } + } + + // Verify that NO partitions were processed after the abort + if processedAfterAbort == 0 && failPartitionIdx == numPartitions-1 { + // If the failing partition is the last one, there are no subsequent partitions + // This is fine — the property still holds + } else if !abortReceived { + t.Fatal("expected abort to be received") + } + + // The key assertion: partitions after the abort were NOT processed + // (they were skipped in the loop above via the `continue` statement) + // This verifies the "unless on_failure is set to abort" exception + }) +} + +// Feature: partition-hooks, Property 11: Abort Stops Entire Process + +// **Validates: Requirements 6.8** +// +// Property 11: Abort Stops Entire Process +// For any hook with `on_failure` set to "abort" that fails after all retry attempts, +// the entire cleanup process SHALL stop immediately without processing any remaining partitions. + +// TestProperty_AbortStopsEntireProcess_ReturnsErrAbort verifies that for any hook with +// on_failure=abort that fails, the orchestrator returns an ErrAbort error across all lifecycle events. +func TestProperty_AbortStopsEntireProcess_ReturnsErrAbort(t *testing.T) { + // Feature: partition-hooks, Property 11: Abort Stops Entire Process + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate hooks with the abort hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + abortIdx := rapid.IntRange(0, numHooks-1).Draw(t, "abortIdx") + + hooks := make([]HookEntry, 0, numHooks) + var abortHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == abortIdx { + hook.OnFailure = OnFailureAbort + abortHookName = hook.Name + } else { + // Other hooks have default on_failure + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: abortHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The error MUST be non-nil + if err == nil { + t.Fatalf("expected non-nil error when abort hook %q fails (event=%s), got nil", + abortHookName, event) + } + + // The error MUST be ErrAbort + if !errors.Is(err, ErrAbort) { + t.Fatalf("expected ErrAbort when hook %q with on_failure=abort fails (event=%s), got: %v", + abortHookName, event, err) + } + }) +} + +// TestProperty_AbortStopsEntireProcess_StopsRemainingPartitions verifies that when a hook +// with on_failure=abort fails, the cleanup flow stops immediately and does NOT process +// any remaining partitions. +func TestProperty_AbortStopsEntireProcess_StopsRemainingPartitions(t *testing.T) { + // Feature: partition-hooks, Property 11: Abort Stops Entire Process + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate multiple partitions (at least 3 to have partitions before and after) + numPartitions := rapid.IntRange(3, 8).Draw(t, "numPartitions") + + // Pick a partition to fail (not the last one, so we can verify subsequent ones are skipped) + failPartitionIdx := rapid.IntRange(0, numPartitions-2).Draw(t, "failPartitionIdx") + + // Generate partition contexts + partitions := make([]PartitionContext, 0, numPartitions) + for i := 0; i < numPartitions; i++ { + p := PartitionContext{ + Schema: "public", + Table: fmt.Sprintf("events_%d", i), + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: fmt.Sprintf("partition_%d", i), + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "localhost", + } + partitions = append(partitions, p) + } + + // Simulate the cleanup flow: iterate over partitions, stop on ErrAbort + processedPartitions := 0 + abortReceived := false + + for i, partition := range partitions { + if abortReceived { + // After abort, the cleanup flow MUST NOT process any more partitions + break + } + + // Create a hook with on_failure=abort + hookName := fmt.Sprintf("abort-hook-partition_%d", i) + hookEntry := HookEntry{ + Name: hookName, + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: OnFailureAbort, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + Config: map[string]interface{}{"command": "/usr/bin/echo"}, + } + + // Only the failing partition's hook will fail + failOnHook := "" + if i == failPartitionIdx { + failOnHook = hookName + } + + runner := &orchestratorMockRunner{failOnHook: failOnHook} + cfg := buildHooksConfigForEvent(event, []HookEntry{hookEntry}) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, partition) + + processedPartitions++ + + if errors.Is(err, ErrAbort) { + abortReceived = true + } + } + + // Abort MUST have been received + if !abortReceived { + t.Fatalf("expected ErrAbort to be received at partition %d, but it was not", failPartitionIdx) + } + + // Only partitions up to and including the failing one should have been processed + expectedProcessed := failPartitionIdx + 1 + if processedPartitions != expectedProcessed { + t.Fatalf("expected %d partitions processed before abort (0..%d), got %d (event=%s)", + expectedProcessed, failPartitionIdx, processedPartitions, event) + } + }) +} + +// TestProperty_AbortStopsEntireProcess_ShortCircuitsHooksInEvent verifies that when a hook +// with on_failure=abort fails, remaining hooks in the same lifecycle event are also skipped +// (short-circuit behavior combined with abort). +func TestProperty_AbortStopsEntireProcess_ShortCircuitsHooksInEvent(t *testing.T) { + // Feature: partition-hooks, Property 11: Abort Stops Entire Process + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate multiple hooks with the abort hook at a random position + numHooks := rapid.IntRange(2, 6).Draw(t, "numHooks") + abortIdx := rapid.IntRange(0, numHooks-1).Draw(t, "abortIdx") + + hooks := make([]HookEntry, 0, numHooks) + var abortHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == abortIdx { + hook.OnFailure = OnFailureAbort + abortHookName = hook.Name + } else { + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: abortHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Must return ErrAbort + if !errors.Is(err, ErrAbort) { + t.Fatalf("expected ErrAbort, got: %v (event=%s, abortHook=%s)", err, event, abortHookName) + } + + // Only hooks up to and including the abort hook should have been executed + expectedExecuted := abortIdx + 1 + if int(runner.callCount.Load()) != expectedExecuted { + t.Fatalf("expected %d hooks executed (0..%d), got %d (event=%s)", + expectedExecuted, abortIdx, runner.callCount.Load(), event) + } + + // Verify hooks after abortIdx were NOT executed + for _, executed := range runner.executedHooks { + for j := abortIdx + 1; j < numHooks; j++ { + skippedName := "hook-" + string(rune('a'+j)) + if executed == skippedName { + t.Fatalf("hook %q (after abort at idx %d) should NOT have been executed (event=%s)", + skippedName, abortIdx, event) + } + } + } + }) +} + +// TestProperty_AbortStopsEntireProcess_TracksFailure verifies that when a hook with +// on_failure=abort fails, the orchestrator tracks the failure via HasFailures(). +func TestProperty_AbortStopsEntireProcess_TracksFailure(t *testing.T) { + // Feature: partition-hooks, Property 11: Abort Stops Entire Process + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a single hook with on_failure=abort that fails + hook := genValidHookEntry(t, "hook", true) + hook.OnFailure = OnFailureAbort + + runner := &orchestratorMockRunner{failOnHook: hook.Name} + cfg := buildHooksConfigForEvent(event, []HookEntry{hook}) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // Must return ErrAbort + if !errors.Is(err, ErrAbort) { + t.Fatalf("expected ErrAbort, got: %v (event=%s)", err, event) + } + + // The orchestrator MUST track the failure + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after abort hook failed (event=%s)", event) + } + }) +} + +// Feature: partition-hooks, Property 12: Continue Overrides Default Cancel + +// **Validates: Requirements 6.9** +// +// Property 12: Continue Overrides Default Cancel +// For any before-hook with `on_failure` set to "continue" that fails, the associated +// operation (detach or drop) SHALL still be executed. + +// TestProperty_ContinueOverridesDefaultCancel_ReturnsNilError verifies that for any +// before-hook (before-detach or before-drop) with on_failure=continue that fails, +// the orchestrator returns nil (no error), meaning the associated operation proceeds. +func TestProperty_ContinueOverridesDefaultCancel_ReturnsNilError(t *testing.T) { + // Feature: partition-hooks, Property 12: Continue Overrides Default Cancel + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate hooks with the continue hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + continueIdx := rapid.IntRange(0, numHooks-1).Draw(t, "continueIdx") + + hooks := make([]HookEntry, 0, numHooks) + var continueHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == continueIdx { + hook.OnFailure = OnFailureContinue + continueHookName = hook.Name + } else { + // Other hooks have default on_failure + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: continueHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // on_failure=continue MUST return nil error, meaning the operation proceeds + if err != nil { + t.Fatalf("expected nil error when before-hook %q with on_failure=continue fails (event=%s), got: %v", + continueHookName, event, err) + } + }) +} + +// TestProperty_ContinueOverridesDefaultCancel_TracksFailure verifies that even though +// on_failure=continue allows the operation to proceed, the failure is still tracked +// via HasFailures() for exit code purposes (Requirement 6.7). +func TestProperty_ContinueOverridesDefaultCancel_TracksFailure(t *testing.T) { + // Feature: partition-hooks, Property 12: Continue Overrides Default Cancel + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate hooks with the continue hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + continueIdx := rapid.IntRange(0, numHooks-1).Draw(t, "continueIdx") + + hooks := make([]HookEntry, 0, numHooks) + var continueHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == continueIdx { + hook.OnFailure = OnFailureContinue + continueHookName = hook.Name + } else { + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: continueHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // The failure MUST still be tracked even though the operation proceeds + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after hook %q with on_failure=continue failed (event=%s)", + continueHookName, event) + } + }) +} + +// TestProperty_ContinueOverridesDefaultCancel_NotErrAbort verifies that the behavior of +// on_failure=continue is distinct from on_failure=abort — it does NOT return ErrAbort. +func TestProperty_ContinueOverridesDefaultCancel_NotErrAbort(t *testing.T) { + // Feature: partition-hooks, Property 12: Continue Overrides Default Cancel + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate hooks with the continue hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + continueIdx := rapid.IntRange(0, numHooks-1).Draw(t, "continueIdx") + + hooks := make([]HookEntry, 0, numHooks) + var continueHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == continueIdx { + hook.OnFailure = OnFailureContinue + continueHookName = hook.Name + } else { + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: continueHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // on_failure=continue returns nil, so it cannot be ErrAbort + if err != nil && errors.Is(err, ErrAbort) { + t.Fatalf("on_failure=continue must NOT return ErrAbort (event=%s, hook=%s), got: %v", + event, continueHookName, err) + } + }) +} + +// TestProperty_ContinueOverridesDefaultCancel_ContrastWithDefault verifies the contrast: +// the same hook configuration with default on_failure (empty) WOULD cancel the operation, +// but with on_failure=continue it does NOT. This proves "continue overrides default cancel". +func TestProperty_ContinueOverridesDefaultCancel_ContrastWithDefault(t *testing.T) { + // Feature: partition-hooks, Property 12: Continue Overrides Default Cancel + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(beforeHookEvents()).Draw(t, "event") + + // Generate a single hook that will fail + hook := genValidHookEntry(t, "hook", true) + hook.Name = "contrast-hook" + + // Test 1: Default on_failure — should return error (cancel operation) + hook.OnFailure = "" + runner1 := &orchestratorMockRunner{failOnHook: hook.Name} + cfg1 := buildHooksConfigForEvent(event, []HookEntry{hook}) + orch1 := newTestOrchestrator(cfg1, runner1) + + errDefault := executeEventOnOrchestrator(context.Background(), orch1, event, newTestPartitionContext()) + + // Default behavior: before-hook failure cancels the operation (returns error) + if errDefault == nil { + t.Fatalf("expected error with default on_failure for before-hook %q (event=%s), got nil", + hook.Name, event) + } + + // Test 2: on_failure=continue — should return nil (operation proceeds) + hook.OnFailure = OnFailureContinue + runner2 := &orchestratorMockRunner{failOnHook: hook.Name} + cfg2 := buildHooksConfigForEvent(event, []HookEntry{hook}) + orch2 := newTestOrchestrator(cfg2, runner2) + + errContinue := executeEventOnOrchestrator(context.Background(), orch2, event, newTestPartitionContext()) + + // Continue behavior: operation proceeds despite hook failure (returns nil) + if errContinue != nil { + t.Fatalf("expected nil error with on_failure=continue for before-hook %q (event=%s), got: %v", + hook.Name, event, errContinue) + } + }) +} + +// Feature: partition-hooks, Property 19: Non-Zero Exit on Hook Failure + +// **Validates: Requirements 6.7** +// +// Property 19: Non-Zero Exit on Hook Failure +// For any cleanup run where at least one hook failed (after all retries), the cleanup process +// SHALL return a non-nil error (resulting in non-zero exit code), even if all partition operations +// themselves succeeded. The mechanism is: HasFailures() returns true when any hook has failed, +// and the cleanup flow checks HasFailures() to determine the exit code. + +// TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWhenHookFails verifies that for any +// lifecycle event, when a hook fails with default on_failure, HasFailures() must be true. +func TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWhenHookFails(t *testing.T) { + // Feature: partition-hooks, Property 19: Non-Zero Exit on Hook Failure + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a random number of hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + hook.OnFailure = "" // default on_failure + hooks = append(hooks, hook) + + if i == failIdx { + failingHookName = hook.Name + } + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + _ = executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // HasFailures() MUST be true when any hook has failed + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after hook %q failed with default on_failure (event=%s)", + failingHookName, event) + } + }) +} + +// TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWithOnFailureContinue verifies that +// even when on_failure=continue (which returns nil error so the operation proceeds), +// HasFailures() must still be true because the hook DID fail. +func TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWithOnFailureContinue(t *testing.T) { + // Feature: partition-hooks, Property 19: Non-Zero Exit on Hook Failure + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a random number of hooks with the failing hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + failIdx := rapid.IntRange(0, numHooks-1).Draw(t, "failIdx") + + hooks := make([]HookEntry, 0, numHooks) + var failingHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == failIdx { + hook.OnFailure = OnFailureContinue + failingHookName = hook.Name + } else { + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: failingHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // on_failure=continue means the Execute* method returns nil (operation proceeds) + if err != nil { + t.Fatalf("expected nil error with on_failure=continue for hook %q (event=%s), got: %v", + failingHookName, event, err) + } + + // But HasFailures() MUST still be true because the hook DID fail + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after hook %q failed with on_failure=continue (event=%s): "+ + "the hook failed even though the operation proceeded", + failingHookName, event) + } + }) +} + +// TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWithOnFailureAbort verifies that +// when on_failure=abort triggers, HasFailures() must be true. +func TestProperty_NonZeroExitOnHookFailure_HasFailuresTrueWithOnFailureAbort(t *testing.T) { + // Feature: partition-hooks, Property 19: Non-Zero Exit on Hook Failure + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a random number of hooks with the abort hook at a random position + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + abortIdx := rapid.IntRange(0, numHooks-1).Draw(t, "abortIdx") + + hooks := make([]HookEntry, 0, numHooks) + var abortHookName string + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + + if i == abortIdx { + hook.OnFailure = OnFailureAbort + abortHookName = hook.Name + } else { + hook.OnFailure = "" + } + + hooks = append(hooks, hook) + } + + runner := &orchestratorMockRunner{failOnHook: abortHookName} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // on_failure=abort returns ErrAbort + if !errors.Is(err, ErrAbort) { + t.Fatalf("expected ErrAbort when hook %q with on_failure=abort fails (event=%s), got: %v", + abortHookName, event, err) + } + + // HasFailures() MUST be true + if !orch.HasFailures() { + t.Fatalf("expected HasFailures()=true after hook %q failed with on_failure=abort (event=%s)", + abortHookName, event) + } + }) +} + +// TestProperty_NonZeroExitOnHookFailure_HasFailuresFalseWhenAllSucceed verifies the contrast: +// when no hooks fail, HasFailures() must be false. +func TestProperty_NonZeroExitOnHookFailure_HasFailuresFalseWhenAllSucceed(t *testing.T) { + // Feature: partition-hooks, Property 19: Non-Zero Exit on Hook Failure + rapid.Check(t, func(t *rapid.T) { + event := rapid.SampledFrom(allLifecycleEvents()).Draw(t, "event") + + // Generate a random number of hooks — all will succeed + numHooks := rapid.IntRange(1, 5).Draw(t, "numHooks") + hooks := make([]HookEntry, 0, numHooks) + + for i := 0; i < numHooks; i++ { + hook := genValidHookEntry(t, "hook", true) + hook.Name = "hook-" + string(rune('a'+i)) + // Randomize on_failure to ensure it doesn't matter when hooks succeed + hook.OnFailure = rapid.SampledFrom([]OnFailure{"", OnFailureAbort, OnFailureContinue}).Draw(t, "onFailure") + hooks = append(hooks, hook) + } + + // No hook will fail (failOnHook is empty) + runner := &orchestratorMockRunner{} + cfg := buildHooksConfigForEvent(event, hooks) + orch := newTestOrchestrator(cfg, runner) + + err := executeEventOnOrchestrator(context.Background(), orch, event, newTestPartitionContext()) + + // No error expected when all hooks succeed + if err != nil { + t.Fatalf("expected no error when all hooks succeed (event=%s), got: %v", event, err) + } + + // HasFailures() MUST be false when no hooks failed + if orch.HasFailures() { + t.Fatalf("expected HasFailures()=false when all hooks succeed (event=%s)", event) + } + }) +} diff --git a/internal/infra/hook/orchestrator_test.go b/internal/infra/hook/orchestrator_test.go new file mode 100644 index 0000000..d7f9582 --- /dev/null +++ b/internal/infra/hook/orchestrator_test.go @@ -0,0 +1,639 @@ +package hook + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// errSimulatedFailure is a static error used by the mock runner. +var errSimulatedFailure = errors.New("simulated hook failure") + +// orchestratorMockRunner tracks execution order and can be configured to fail on specific hooks. +type orchestratorMockRunner struct { + callCount atomic.Int32 + executedHooks []string + failOnHook string // hook name that should fail +} + +func (r *orchestratorMockRunner) Run(_ context.Context, hook *ResolvedHook) error { + r.callCount.Add(1) + r.executedHooks = append(r.executedHooks, hook.Name) + + if r.failOnHook != "" && hook.Name == r.failOnHook { + return fmt.Errorf("hook %q: %w", hook.Name, errSimulatedFailure) + } + + return nil +} + +func newOrchestratorTestLogger() slog.Logger { + return *slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) +} + +func newTestPartitionContext() PartitionContext { + return PartitionContext{ + Schema: "public", + Table: "events_2024_01", + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: "events", + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "localhost", + } +} + +func boolPtr(b bool) *bool { + return &b +} + +func newTestOrchestrator(hooks *HooksConfig, runner Runner) *Orchestrator { + logger := newOrchestratorTestLogger() + executor := NewExecutor(runner, logger) + metrics := NewMetricsCollector(logger) + + return NewOrchestrator(hooks, executor, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") +} + +// --- Nil hooks (no-op) --- + +func TestOrchestrator_NilHooks_NoOp(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + orch := newTestOrchestrator(nil, runner) + ctx := context.Background() + partition := newTestPartitionContext() + + assert.NoError(t, orch.ExecuteBeforeDetach(ctx, partition)) + assert.NoError(t, orch.ExecuteAfterDetach(ctx, partition)) + assert.NoError(t, orch.ExecuteBeforeDrop(ctx, partition)) + assert.NoError(t, orch.ExecuteAfterDrop(ctx, partition)) + assert.False(t, orch.HasFailures()) + assert.Equal(t, int32(0), runner.callCount.Load()) +} + +// --- Disabled hook skipping --- +// Validates: Requirement 2.7 + +func TestOrchestrator_DisabledHookSkipped(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "disabled-hook", + Type: ShellType, + Enabled: boolPtr(false), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo hello"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "enabled-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo world"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + assert.Equal(t, int32(1), runner.callCount.Load()) + assert.Equal(t, []string{"enabled-hook"}, runner.executedHooks) + assert.False(t, orch.HasFailures()) +} + +func TestOrchestrator_AllDisabledHooksSkipped(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "hook-1", + Type: ShellType, + Enabled: boolPtr(false), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo 1"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-2", + Type: ShellType, + Enabled: boolPtr(false), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo 2"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + assert.Equal(t, int32(0), runner.callCount.Load()) + assert.False(t, orch.HasFailures()) +} + +// --- Sequential execution order --- +// Validates: Requirements 2.14, 9.1, 10.1, 11.1, 12.1 + +func TestOrchestrator_SequentialExecution(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "hook-a", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo a"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-b", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo b"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-c", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo c"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + assert.Equal(t, []string{"hook-a", "hook-b", "hook-c"}, runner.executedHooks) +} + +// --- Before-hook failure cancels operation --- +// Validates: Requirements 6.1, 9.2, 11.2 + +func TestOrchestrator_BeforeDetachFailure_CancelsOperation(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "failing-hook"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "failing-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo fail"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.Contains(t, err.Error(), "cancelling") + assert.True(t, orch.HasFailures()) +} + +func TestOrchestrator_BeforeDropFailure_CancelsOperation(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "failing-hook"} + hooks := &HooksConfig{ + BeforeDrop: []HookEntry{ + { + Name: "failing-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo fail"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDrop(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.Contains(t, err.Error(), "cancelling") + assert.True(t, orch.HasFailures()) +} + +// --- Hook failure short-circuits event --- +// Validates: Requirements 6.2, 6.6, 9.3, 10.4, 11.3, 12.3 + +func TestOrchestrator_FailureShortCircuits_RemainingHooks(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "hook-b"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "hook-a", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo a"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-b", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo b"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-c", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo c"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + // hook-a executed, hook-b failed, hook-c should NOT be executed + assert.Equal(t, []string{"hook-a", "hook-b"}, runner.executedHooks) + assert.True(t, orch.HasFailures()) +} + +// --- on_failure=abort stops entire process --- +// Validates: Requirement 6.8 + +func TestOrchestrator_OnFailureAbort_ReturnsAbortError(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "abort-hook"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "abort-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: OnFailureAbort, + Config: map[string]interface{}{"command": "echo abort"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.True(t, errors.Is(err, ErrAbort)) + assert.True(t, orch.HasFailures()) +} + +// --- on_failure=continue proceeds with operation --- +// Validates: Requirement 6.9 + +func TestOrchestrator_OnFailureContinue_ReturnsNil(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "continue-hook"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "continue-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + OnFailure: OnFailureContinue, + Config: map[string]interface{}{"command": "echo continue"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + // on_failure=continue means the operation should proceed (nil error) + require.NoError(t, err) + // But the failure is still tracked + assert.True(t, orch.HasFailures()) +} + +// --- After-hook failure returns error (informational) --- +// Validates: Requirement 6.4 + +func TestOrchestrator_AfterDetachFailure_ReturnsError(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "after-hook"} + hooks := &HooksConfig{ + AfterDetach: []HookEntry{ + { + Name: "after-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo after"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + // After-hook failure returns error (caller decides what to do) + require.Error(t, err) + assert.True(t, orch.HasFailures()) + // Should NOT be an abort error + assert.False(t, errors.Is(err, ErrAbort)) +} + +func TestOrchestrator_AfterDropFailure_ReturnsError(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "after-drop-hook"} + hooks := &HooksConfig{ + AfterDrop: []HookEntry{ + { + Name: "after-drop-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo after-drop"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteAfterDrop(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.True(t, orch.HasFailures()) + assert.False(t, errors.Is(err, ErrAbort)) +} + +// --- Template rendering in hooks --- + +func TestOrchestrator_TemplateRendering_ShellHook(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "template-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/archive", + "args": []interface{}{"--schema", "{{.Schema}}", "--table", "{{.Table}}"}, + "env": map[string]interface{}{ + "DB_NAME": "{{.DatabaseName}}", + }, + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + assert.Equal(t, int32(1), runner.callCount.Load()) +} + +func TestOrchestrator_TemplateRendering_UndefinedVariable(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "bad-template-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "echo {{.UndefinedVar}}", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.Error(t, err) + assert.Equal(t, int32(0), runner.callCount.Load()) + assert.True(t, orch.HasFailures()) +} + +// --- PostgreSQL hook template rendering --- + +func TestOrchestrator_TemplateRendering_PostgreSQLHook(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{} + hooks := &HooksConfig{ + AfterDetach: []HookEntry{ + { + Name: "vacuum-hook", + Type: PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}", + }, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + err := orch.ExecuteAfterDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + assert.Equal(t, int32(1), runner.callCount.Load()) +} + +// --- Summary and metrics --- + +func TestOrchestrator_Summary_TracksMetrics(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "hook-2"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "hook-1", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo 1"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + { + Name: "hook-2", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo 2"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + + _ = orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + summary := orch.Summary() + assert.Equal(t, 2, summary.TotalExecuted) + assert.Equal(t, 1, summary.TotalSuccess) + assert.Equal(t, 1, summary.TotalFailures) +} + +// --- HasFailures tracks across multiple lifecycle events --- + +func TestOrchestrator_HasFailures_AcrossEvents(t *testing.T) { + t.Parallel() + + runner := &orchestratorMockRunner{failOnHook: "after-hook"} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "before-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo before"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + AfterDetach: []HookEntry{ + { + Name: "after-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo after"}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + orch := newTestOrchestrator(hooks, runner) + ctx := context.Background() + partition := newTestPartitionContext() + + // Before-detach succeeds + err := orch.ExecuteBeforeDetach(ctx, partition) + require.NoError(t, err) + assert.False(t, orch.HasFailures()) + + // After-detach fails + err = orch.ExecuteAfterDetach(ctx, partition) + require.Error(t, err) + assert.True(t, orch.HasFailures()) +} + +// --- Credential propagation flag --- + +func TestOrchestrator_PropagateCredentials_SetOnResolvedHook(t *testing.T) { + t.Parallel() + + // Custom runner that captures the resolved hook + var capturedHook *ResolvedHook + + capturingRunner := &capturingMockRunner{capturedHook: &capturedHook} + hooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "cred-hook", + Type: ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{"command": "echo creds", "propagate-credentials": true}, + Retry: RetryConfig{Attempts: 0, Backoff: BackoffFixed, InitialDelay: DefaultInitialDelay, MaxDelay: DefaultMaxDelay}, + }, + }, + } + + logger := newOrchestratorTestLogger() + executor := NewExecutor(capturingRunner, logger) + metrics := NewMetricsCollector(logger) + orch := NewOrchestrator(hooks, executor, metrics, logger, "postgresql://user:pass@localhost:5432/mydb") + + err := orch.ExecuteBeforeDetach(context.Background(), newTestPartitionContext()) + + require.NoError(t, err) + require.NotNil(t, capturedHook) + shellCfg, ok := capturedHook.Config.(*ShellConfig) + require.True(t, ok, "resolved config should be *ShellConfig") + assert.True(t, shellCfg.PropagateCredentials) + assert.Equal(t, "postgresql://user:pass@localhost:5432/mydb", capturedHook.ConnectionURL) +} + +// capturingMockRunner captures the resolved hook for inspection. +type capturingMockRunner struct { + capturedHook **ResolvedHook +} + +func (r *capturingMockRunner) Run(_ context.Context, hook *ResolvedHook) error { + *r.capturedHook = hook + + return nil +} diff --git a/internal/infra/hook/postgresql_runner.go b/internal/infra/hook/postgresql_runner.go new file mode 100644 index 0000000..317d750 --- /dev/null +++ b/internal/infra/hook/postgresql_runner.go @@ -0,0 +1,118 @@ +package hook + +import ( + "context" + "fmt" + "log/slog" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// Compile-time checks. +var ( + _ Runner = (*PostgreSQLRunner)(nil) + _ RenderedConfig = (*PostgreSQLConfig)(nil) +) + +// LogAttrs implements RenderedConfig, returning the resolved SQL query for structured logging. +func (c *PostgreSQLConfig) LogAttrs() []any { + return []any{"sql_query", c.SQLQuery} +} + +// validatePostgreSQLConfig checks that a postgresql hook's raw config has the required fields. +func validatePostgreSQLConfig(config map[string]interface{}) error { + if config == nil { + return ErrPostgreSQLConfigRequired + } + + if _, ok := config["sql_query"]; !ok { + return ErrPostgreSQLQueryRequired + } + + return nil +} + +// resolvePostgreSQLConfig renders template variables in postgresql hook configuration fields. +func resolvePostgreSQLConfig(config map[string]interface{}, partition PartitionContext) (RenderedConfig, error) { + pgCfg := &PostgreSQLConfig{} + + if query, ok := config["sql_query"]; ok { + rendered, err := Render(fmt.Sprintf("%v", query), partition) + if err != nil { + return nil, fmt.Errorf("rendering sql_query: %w", err) + } + + pgCfg.SQLQuery = rendered + } + + return pgCfg, nil +} + +// PgxConn defines the minimal interface needed for executing SQL statements. +type PgxConn interface { + Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) + Close(ctx context.Context) error +} + +// ConnectorFunc is a function that creates a new database connection. +// It takes a context and a connection URL, and returns a PgxConn or an error. +type ConnectorFunc func(ctx context.Context, connectionURL string) (PgxConn, error) + +// DefaultConnector creates a real pgx connection. +func DefaultConnector(ctx context.Context, connectionURL string) (PgxConn, error) { + return pgx.Connect(ctx, connectionURL) +} + +// PostgreSQLRunner executes SQL statements against a PostgreSQL database. +type PostgreSQLRunner struct { + logger slog.Logger + connector ConnectorFunc +} + +// NewPostgreSQLRunner creates a new PostgreSQLRunner with the given logger. +// It uses the default pgx connector for database connections. +func NewPostgreSQLRunner(logger slog.Logger) *PostgreSQLRunner { + return &PostgreSQLRunner{ + logger: logger, + connector: DefaultConnector, + } +} + +// NewPostgreSQLRunnerWithConnector creates a new PostgreSQLRunner with a custom connector. +// This is primarily used for testing to inject mock connections. +func NewPostgreSQLRunnerWithConnector(logger slog.Logger, connector ConnectorFunc) *PostgreSQLRunner { + return &PostgreSQLRunner{ + logger: logger, + connector: connector, + } +} + +// Run executes the SQL query defined in the resolved hook. +// It opens a separate database connection (not reusing the main PPM connection), +// executes the query using the provided context for timeout support, +// and closes the connection after execution. +func (r *PostgreSQLRunner) Run(ctx context.Context, hook *ResolvedHook) error { + pgCfg, ok := hook.Config.(*PostgreSQLConfig) + if !ok { + return fmt.Errorf("postgresql configuration is nil for hook %q", hook.Name) + } + + r.logger.Debug("Executing postgresql hook", + "hook", hook.Name, + "sql_query", pgCfg.SQLQuery, + ) + + conn, err := r.connector(ctx, hook.ConnectionURL) + if err != nil { + return fmt.Errorf("postgresql hook %q failed to connect: %w", hook.Name, err) + } + defer conn.Close(ctx) + + _, err = conn.Exec(ctx, pgCfg.SQLQuery) + if err != nil { + return fmt.Errorf("postgresql hook %q failed to execute query: %w", hook.Name, err) + } + + return nil +} diff --git a/internal/infra/hook/postgresql_runner_test.go b/internal/infra/hook/postgresql_runner_test.go new file mode 100644 index 0000000..7631e09 --- /dev/null +++ b/internal/infra/hook/postgresql_runner_test.go @@ -0,0 +1,211 @@ +package hook + +import ( + "context" + "errors" + "log/slog" + "os" + "testing" + + "github.com/jackc/pgx/v5/pgconn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockPgxConn implements PgxConn for testing. +type mockPgxConn struct { + execFunc func(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) + closeFunc func(ctx context.Context) error + closed bool +} + +func (m *mockPgxConn) Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + if m.execFunc != nil { + return m.execFunc(ctx, sql, arguments...) + } + + return pgconn.NewCommandTag(""), nil +} + +func (m *mockPgxConn) Close(ctx context.Context) error { + m.closed = true + + if m.closeFunc != nil { + return m.closeFunc(ctx) + } + + return nil +} + +func TestPostgreSQLRunner_Run(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + + tests := []struct { + name string + hook *ResolvedHook + connector ConnectorFunc + expectError bool + errContains string + }{ + { + name: "successful SQL execution", + hook: &ResolvedHook{ + Name: "test-success", + Type: PostgreSQLType, + ConnectionURL: "postgresql://user:pass@localhost:5432/testdb", + Config: &PostgreSQLConfig{ + SQLQuery: "VACUUM ANALYZE public.events", + }, + }, + connector: func(ctx context.Context, connectionURL string) (PgxConn, error) { + return &mockPgxConn{ + execFunc: func(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return pgconn.NewCommandTag("VACUUM"), nil + }, + }, nil + }, + expectError: false, + }, + { + name: "failing SQL execution", + hook: &ResolvedHook{ + Name: "test-sql-failure", + Type: PostgreSQLType, + ConnectionURL: "postgresql://user:pass@localhost:5432/testdb", + Config: &PostgreSQLConfig{ + SQLQuery: "INVALID SQL STATEMENT", + }, + }, + connector: func(ctx context.Context, connectionURL string) (PgxConn, error) { + return &mockPgxConn{ + execFunc: func(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return pgconn.NewCommandTag(""), errors.New("syntax error at or near \"INVALID\"") + }, + }, nil + }, + expectError: true, + errContains: "failed to execute query", + }, + { + name: "nil postgresql config", + hook: &ResolvedHook{ + Name: "test-nil-config", + Type: PostgreSQLType, + ConnectionURL: "postgresql://user:pass@localhost:5432/testdb", + Config: nil, + }, + connector: func(ctx context.Context, connectionURL string) (PgxConn, error) { + return &mockPgxConn{}, nil + }, + expectError: true, + errContains: "postgresql configuration is nil", + }, + { + name: "connection failure", + hook: &ResolvedHook{ + Name: "test-conn-failure", + Type: PostgreSQLType, + ConnectionURL: "postgresql://user:pass@unreachable:5432/testdb", + Config: &PostgreSQLConfig{ + SQLQuery: "SELECT 1", + }, + }, + connector: func(ctx context.Context, connectionURL string) (PgxConn, error) { + return nil, errors.New("connection refused") + }, + expectError: true, + errContains: "failed to connect", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + runner := NewPostgreSQLRunnerWithConnector(*logger, tc.connector) + err := runner.Run(context.Background(), tc.hook) + + if tc.expectError { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.errContains) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestPostgreSQLRunner_SeparateConnection(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + + expectedURL := "postgresql://hookuser:hookpass@hookhost:5433/hookdb" + var receivedURL string + + mock := &mockPgxConn{ + execFunc: func(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return pgconn.NewCommandTag("SELECT 1"), nil + }, + } + + connector := func(ctx context.Context, connectionURL string) (PgxConn, error) { + receivedURL = connectionURL + return mock, nil + } + + runner := NewPostgreSQLRunnerWithConnector(*logger, connector) + + hook := &ResolvedHook{ + Name: "test-separate-connection", + Type: PostgreSQLType, + ConnectionURL: expectedURL, + Config: &PostgreSQLConfig{ + SQLQuery: "SELECT 1", + }, + } + + err := runner.Run(context.Background(), hook) + require.NoError(t, err) + + // Verify the connector was called with the hook's ConnectionURL + assert.Equal(t, expectedURL, receivedURL) + + // Verify the connection was closed after execution + assert.True(t, mock.closed, "connection should be closed after execution") +} + +func TestPostgreSQLRunner_ConnectionClosedOnFailure(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + + mock := &mockPgxConn{ + execFunc: func(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return pgconn.NewCommandTag(""), errors.New("query failed") + }, + } + + connector := func(ctx context.Context, connectionURL string) (PgxConn, error) { + return mock, nil + } + + runner := NewPostgreSQLRunnerWithConnector(*logger, connector) + + hook := &ResolvedHook{ + Name: "test-close-on-failure", + Type: PostgreSQLType, + ConnectionURL: "postgresql://user:pass@localhost:5432/testdb", + Config: &PostgreSQLConfig{ + SQLQuery: "INVALID", + }, + } + + err := runner.Run(context.Background(), hook) + require.Error(t, err) + + // Verify the connection was still closed even on query failure + assert.True(t, mock.closed, "connection should be closed even when query fails") +} diff --git a/internal/infra/hook/registry.go b/internal/infra/hook/registry.go new file mode 100644 index 0000000..87e58bc --- /dev/null +++ b/internal/infra/hook/registry.go @@ -0,0 +1,81 @@ +package hook + +import ( + "context" + "errors" + "fmt" + "log/slog" +) + +// ErrUnsupportedHookType is returned when a hook has a type with no registered handler. +var ErrUnsupportedHookType = errors.New("unsupported hook type") + +// RenderedConfig is a type-specific hook configuration with all template variables already +// rendered. It knows how to describe itself for structured logging. Each hook type (shell, +// postgresql, ...) provides an implementation. +type RenderedConfig interface { + // LogAttrs returns key/value pairs describing the resolved configuration for logging. + LogAttrs() []any +} + +// typeHandler bundles everything the framework needs to support a hook type. Registering a new +// handler is the only change required to add a new hook type (e.g. s3); the orchestrator, +// executor and dispatcher are type-agnostic. +type typeHandler struct { + // validate checks the raw config map at configuration-load time. + validate func(config map[string]interface{}) error + + // resolve renders template variables in the raw config map and returns a RenderedConfig. + resolve func(config map[string]interface{}, partition PartitionContext) (RenderedConfig, error) + + // newRunner builds the Runner that executes this hook type. + newRunner func(logger slog.Logger) Runner +} + +// registry maps each supported hook type to its handler. To add a new hook type, implement a +// runner + RenderedConfig in its own file and register it here. +var registry = map[HookType]typeHandler{ + ShellType: { + validate: validateShellConfig, + resolve: resolveShellConfig, + newRunner: func(logger slog.Logger) Runner { return NewShellRunner(logger) }, + }, + PostgreSQLType: { + validate: validatePostgreSQLConfig, + resolve: resolvePostgreSQLConfig, + newRunner: func(logger slog.Logger) Runner { return NewPostgreSQLRunner(logger) }, + }, +} + +// RegistryRunner dispatches hook execution to the runner registered for the hook's type. +// It replaces hand-written type switches: support for a new type comes from the registry. +type RegistryRunner struct { + runners map[HookType]Runner +} + +// Compile-time check that RegistryRunner implements Runner. +var _ Runner = (*RegistryRunner)(nil) + +// NewRegistryRunner builds a RegistryRunner with one runner instance per registered hook type. +func NewRegistryRunner(logger slog.Logger) *RegistryRunner { + runners := make(map[HookType]Runner, len(registry)) + for hookType, handler := range registry { + runners[hookType] = handler.newRunner(logger) + } + + return &RegistryRunner{runners: runners} +} + +// Run executes the hook using the runner registered for its type. +func (r *RegistryRunner) Run(ctx context.Context, hook *ResolvedHook) error { + runner, ok := r.runners[hook.Type] + if !ok { + return fmt.Errorf("%w: %q for hook %q", ErrUnsupportedHookType, hook.Type, hook.Name) + } + + if err := runner.Run(ctx, hook); err != nil { + return fmt.Errorf("running %q hook %q: %w", hook.Type, hook.Name, err) + } + + return nil +} diff --git a/internal/infra/hook/resolver.go b/internal/infra/hook/resolver.go new file mode 100644 index 0000000..b2cb961 --- /dev/null +++ b/internal/infra/hook/resolver.go @@ -0,0 +1,17 @@ +package hook + +// Resolve determines which hooks apply to a given partition based on the resolution rules: +// 1. If partition-level hooks are defined (non-nil), use them (full override of global) +// 2. If only global hooks are defined, use global hooks +// 3. If neither is defined, return nil (no hooks to execute) +func Resolve(partitionName string, globalHooks *HooksConfig, partitionHooks *HooksConfig) *HooksConfig { + if partitionHooks != nil { + return partitionHooks + } + + if globalHooks != nil { + return globalHooks + } + + return nil +} diff --git a/internal/infra/hook/resolver_property_test.go b/internal/infra/hook/resolver_property_test.go new file mode 100644 index 0000000..61e069c --- /dev/null +++ b/internal/infra/hook/resolver_property_test.go @@ -0,0 +1,125 @@ +// Feature: partition-hooks, Property 1: Hook Resolution Override +package hook + +import ( + "testing" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 1.3, 1.4** +// +// Property 1: Hook Resolution Override +// For any partition configuration with hooks defined at both global and partition levels, +// the resolved hooks for that partition SHALL equal the partition-level hooks, completely +// overriding the global hooks. Conversely, for any partition without its own hooks section, +// the resolved hooks SHALL equal the global hooks. + +// genHooksConfig generates a random non-nil HooksConfig with random lifecycle hooks. +func genHooksConfig(t *rapid.T, label string) *HooksConfig { + numBeforeDetach := rapid.IntRange(0, 3).Draw(t, label+"_numBeforeDetach") + numAfterDetach := rapid.IntRange(0, 3).Draw(t, label+"_numAfterDetach") + numBeforeDrop := rapid.IntRange(0, 3).Draw(t, label+"_numBeforeDrop") + numAfterDrop := rapid.IntRange(0, 3).Draw(t, label+"_numAfterDrop") + + config := &HooksConfig{} + + for i := 0; i < numBeforeDetach; i++ { + config.BeforeDetach = append(config.BeforeDetach, genRandomHookEntry(t, label+"_bd")) + } + + for i := 0; i < numAfterDetach; i++ { + config.AfterDetach = append(config.AfterDetach, genRandomHookEntry(t, label+"_ad")) + } + + for i := 0; i < numBeforeDrop; i++ { + config.BeforeDrop = append(config.BeforeDrop, genRandomHookEntry(t, label+"_bdr")) + } + + for i := 0; i < numAfterDrop; i++ { + config.AfterDrop = append(config.AfterDrop, genRandomHookEntry(t, label+"_adr")) + } + + return config +} + +// genRandomHookEntry generates a random HookEntry with valid required fields. +func genRandomHookEntry(t *rapid.T, label string) HookEntry { + hookType := rapid.SampledFrom([]HookType{ShellType, PostgreSQLType}).Draw(t, label+"_type") + name := rapid.StringMatching(`[a-z][a-z0-9\-]{1,20}`).Draw(t, label+"_name") + + var config map[string]any + + switch hookType { + case ShellType: + config = map[string]any{ + "command": rapid.StringMatching(`/[a-z/]{1,30}`).Draw(t, label+"_cmd"), + } + case PostgreSQLType: + config = map[string]any{ + "sql_query": rapid.StringMatching(`SELECT [a-z]{1,10}`).Draw(t, label+"_sql"), + } + } + + return HookEntry{ + Name: name, + Type: hookType, + Config: config, + } +} + +func TestProperty_HookResolutionOverride_BothDefined(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + partitionName := rapid.StringMatching(`[a-z][a-z0-9_]{1,20}`).Draw(t, "partitionName") + globalHooks := genHooksConfig(t, "global") + partitionHooks := genHooksConfig(t, "partition") + + result := Resolve(partitionName, globalHooks, partitionHooks) + + // When both global and partition hooks are non-nil, Resolve must return partition hooks + if result != partitionHooks { + t.Fatalf("expected partition hooks to fully override global hooks, got different pointer") + } + }) +} + +func TestProperty_HookResolutionOverride_GlobalOnly(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + partitionName := rapid.StringMatching(`[a-z][a-z0-9_]{1,20}`).Draw(t, "partitionName") + globalHooks := genHooksConfig(t, "global") + + result := Resolve(partitionName, globalHooks, nil) + + // When only global hooks are defined, Resolve must return global hooks + if result != globalHooks { + t.Fatalf("expected global hooks to be returned when partition hooks is nil, got different pointer") + } + }) +} + +func TestProperty_HookResolutionOverride_PartitionOnly(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + partitionName := rapid.StringMatching(`[a-z][a-z0-9_]{1,20}`).Draw(t, "partitionName") + partitionHooks := genHooksConfig(t, "partition") + + result := Resolve(partitionName, nil, partitionHooks) + + // When only partition hooks are defined, Resolve must return partition hooks + if result != partitionHooks { + t.Fatalf("expected partition hooks to be returned when global hooks is nil, got different pointer") + } + }) +} + +func TestProperty_HookResolutionOverride_NeitherDefined(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + partitionName := rapid.StringMatching(`[a-z][a-z0-9_]{1,20}`).Draw(t, "partitionName") + + result := Resolve(partitionName, nil, nil) + + // When neither is defined, Resolve must return nil + if result != nil { + t.Fatalf("expected nil when both global and partition hooks are nil, got %v", result) + } + }) +} diff --git a/internal/infra/hook/resolver_test.go b/internal/infra/hook/resolver_test.go new file mode 100644 index 0000000..74a7c16 --- /dev/null +++ b/internal/infra/hook/resolver_test.go @@ -0,0 +1,130 @@ +package hook + +import "testing" + +// **Validates: Requirements 1.1, 1.2, 1.3, 1.4, 1.5** + +func TestResolve_GlobalOnly(t *testing.T) { + globalHooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "notify", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/bin/notify", + }, + }, + }, + } + + result := Resolve("events_2024_01", globalHooks, nil) + + if result != globalHooks { + t.Fatal("expected global hooks to be returned when partition hooks is nil") + } +} + +func TestResolve_PartitionOnly(t *testing.T) { + partitionHooks := &HooksConfig{ + BeforeDrop: []HookEntry{ + { + Name: "archive", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/local/bin/archive", + }, + }, + }, + } + + result := Resolve("events_2024_01", nil, partitionHooks) + + if result != partitionHooks { + t.Fatal("expected partition hooks to be returned when global hooks is nil") + } +} + +func TestResolve_BothDefined_PartitionOverridesGlobal(t *testing.T) { + globalHooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "global-notify", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/bin/global-notify", + }, + }, + }, + AfterDetach: []HookEntry{ + { + Name: "vacuum", + Type: PostgreSQLType, + Config: map[string]any{ + "sql_query": "VACUUM ANALYZE public.events", + }, + }, + }, + } + + partitionHooks := &HooksConfig{ + BeforeDrop: []HookEntry{ + { + Name: "partition-archive", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/local/bin/archive-partition", + }, + }, + }, + } + + result := Resolve("events_2024_01", globalHooks, partitionHooks) + + if result != partitionHooks { + t.Fatal("expected partition hooks to fully override global hooks") + } + + if result == globalHooks { + t.Fatal("expected global hooks to be overridden by partition hooks") + } +} + +func TestResolve_NeitherDefined(t *testing.T) { + result := Resolve("events_2024_01", nil, nil) + + if result != nil { + t.Fatalf("expected nil when no hooks are defined, got %v", result) + } +} + +func TestResolve_EmptyGlobalHooks(t *testing.T) { + globalHooks := &HooksConfig{} + + result := Resolve("events_2024_01", globalHooks, nil) + + if result != globalHooks { + t.Fatal("expected empty global hooks to be returned (non-nil pointer)") + } +} + +func TestResolve_EmptyPartitionHooks(t *testing.T) { + globalHooks := &HooksConfig{ + BeforeDetach: []HookEntry{ + { + Name: "global-hook", + Type: ShellType, + Config: map[string]any{ + "command": "/usr/bin/echo", + }, + }, + }, + } + + partitionHooks := &HooksConfig{} + + result := Resolve("events_2024_01", globalHooks, partitionHooks) + + if result != partitionHooks { + t.Fatal("expected empty partition hooks to override global hooks (partition pointer returned)") + } +} diff --git a/internal/infra/hook/runner.go b/internal/infra/hook/runner.go new file mode 100644 index 0000000..37f2a65 --- /dev/null +++ b/internal/infra/hook/runner.go @@ -0,0 +1,34 @@ +package hook + +import "context" + +// ResolvedHook contains a hook entry with all template variables already rendered, +// ready for execution by a Runner. +type ResolvedHook struct { + // Name is the hook name for identification in logs. + Name string + + // Type identifies which runner implementation to use. + Type HookType + + // LifecycleEvent indicates when this hook executes (before-detach, after-detach, etc.). + LifecycleEvent LifecycleEvent + + // Config holds the rendered, type-specific configuration. Its concrete type matches the + // hook Type (e.g. *ShellConfig for ShellType). Runners type-assert it to their own type. + Config RenderedConfig + + // PartitionContext holds the partition metadata used for template rendering. + PartitionContext PartitionContext + + // ConnectionURL is the PostgreSQL connection URL used by the PostgreSQL runner + // and for credential propagation to shell hooks. + ConnectionURL string +} + +// Runner executes a specific hook type. +type Runner interface { + // Run executes the hook and returns an error if it fails. + // The context carries the timeout deadline. + Run(ctx context.Context, hook *ResolvedHook) error +} diff --git a/internal/infra/hook/shell_runner.go b/internal/infra/hook/shell_runner.go new file mode 100644 index 0000000..f08595b --- /dev/null +++ b/internal/infra/hook/shell_runner.go @@ -0,0 +1,165 @@ +package hook + +import ( + "bytes" + "context" + "fmt" + "log/slog" + "os" + "os/exec" +) + +// Compile-time checks. +var ( + _ Runner = (*ShellRunner)(nil) + _ RenderedConfig = (*ShellConfig)(nil) +) + +// LogAttrs implements RenderedConfig, returning the resolved shell fields for structured logging. +func (c *ShellConfig) LogAttrs() []any { + attrs := []any{ + "command", c.Command, + "args", c.Args, + "propagate_credentials", c.PropagateCredentials, + } + + if len(c.Env) > 0 { + attrs = append(attrs, "env", c.Env) + } + + return attrs +} + +// validateShellConfig checks that a shell hook's raw config has the required fields. +func validateShellConfig(config map[string]interface{}) error { + if config == nil { + return ErrShellConfigRequired + } + + if _, ok := config["command"]; !ok { + return ErrShellCommandRequired + } + + return nil +} + +// resolveShellConfig renders template variables in shell hook configuration fields. +func resolveShellConfig(config map[string]interface{}, partition PartitionContext) (RenderedConfig, error) { + shell := &ShellConfig{} + + if cmd, ok := config["command"]; ok { + rendered, err := Render(fmt.Sprintf("%v", cmd), partition) + if err != nil { + return nil, fmt.Errorf("rendering command: %w", err) + } + + shell.Command = rendered + } + + if args, ok := config["args"].([]interface{}); ok { + shell.Args = make([]string, 0, len(args)) + + for _, arg := range args { + rendered, err := Render(fmt.Sprintf("%v", arg), partition) + if err != nil { + return nil, fmt.Errorf("rendering arg: %w", err) + } + + shell.Args = append(shell.Args, rendered) + } + } + + if envMap, ok := config["env"].(map[string]interface{}); ok { + shell.Env = make(map[string]string, len(envMap)) + + for k, v := range envMap { + rendered, err := Render(fmt.Sprintf("%v", v), partition) + if err != nil { + return nil, fmt.Errorf("rendering env var %q: %w", k, err) + } + + shell.Env[k] = rendered + } + } + + if propagate, ok := config["propagate-credentials"].(bool); ok { + shell.PropagateCredentials = propagate + } + + return shell, nil +} + +// ShellRunner executes shell hook commands using exec.CommandContext. +type ShellRunner struct { + logger slog.Logger +} + +// NewShellRunner creates a new ShellRunner with the given logger. +func NewShellRunner(logger slog.Logger) *ShellRunner { + return &ShellRunner{ + logger: logger, + } +} + +// Run executes the shell command defined in the resolved hook. +// It uses exec.CommandContext to support timeout via context cancellation. +// The command inherits the parent process environment variables, with +// hook-specific env vars merged on top. When PropagateCredentials is true, +// PG* environment variables are injected from the connection URL. +func (r *ShellRunner) Run(ctx context.Context, hook *ResolvedHook) error { + shell, ok := hook.Config.(*ShellConfig) + if !ok { + return fmt.Errorf("shell configuration is nil for hook %q", hook.Name) + } + + cmd := exec.CommandContext(ctx, shell.Command, shell.Args...) + + // Start with parent process environment variables + env := os.Environ() + + // Merge additional env vars from hook config + for key, value := range shell.Env { + env = append(env, fmt.Sprintf("%s=%s", key, value)) + } + + // When propagateCredentials is true, inject PG* env vars + if shell.PropagateCredentials { + credentials, err := ExtractCredentials(hook.ConnectionURL) + if err != nil { + return fmt.Errorf("failed to extract credentials for hook %q: %w", hook.Name, err) + } + + for key, value := range credentials { + env = append(env, fmt.Sprintf("%s=%s", key, value)) + } + } + + cmd.Env = env + + // Capture stdout and stderr for debug logging + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + r.logger.Debug("Executing shell hook", + "hook", hook.Name, + "command", shell.Command, + "args", shell.Args, + ) + + err := cmd.Run() + + if stdout.Len() > 0 { + r.logger.Debug("Shell hook stdout", "hook", hook.Name, "stdout", stdout.String()) + } + + if stderr.Len() > 0 { + r.logger.Debug("Shell hook stderr", "hook", hook.Name, "stderr", stderr.String()) + } + + if err != nil { + return fmt.Errorf("shell hook %q failed: %w", hook.Name, err) + } + + return nil +} diff --git a/internal/infra/hook/shell_runner_test.go b/internal/infra/hook/shell_runner_test.go new file mode 100644 index 0000000..63ea1cf --- /dev/null +++ b/internal/infra/hook/shell_runner_test.go @@ -0,0 +1,203 @@ +package hook + +import ( + "context" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestShellRunner_Run(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + tests := []struct { + name string + hook *ResolvedHook + expectError bool + }{ + { + name: "successful command", + hook: &ResolvedHook{ + Name: "test-success", + Type: ShellType, + Config: &ShellConfig{ + Command: "true", + }, + }, + expectError: false, + }, + { + name: "failing command", + hook: &ResolvedHook{ + Name: "test-failure", + Type: ShellType, + Config: &ShellConfig{ + Command: "false", + }, + }, + expectError: true, + }, + { + name: "nil shell config", + hook: &ResolvedHook{ + Name: "test-nil-shell", + Type: ShellType, + Config: nil, + }, + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + err := runner.Run(context.Background(), tc.hook) + + if tc.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestShellRunner_EnvPropagation(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + tmpDir := t.TempDir() + outFile := filepath.Join(tmpDir, "env_output.txt") + + hook := &ResolvedHook{ + Name: "test-env-propagation", + Type: ShellType, + Config: &ShellConfig{ + Command: "sh", + Args: []string{"-c", "echo $MY_CUSTOM_VAR > " + outFile}, + Env: map[string]string{ + "MY_CUSTOM_VAR": "hello_from_hook", + }, + }, + } + + err := runner.Run(context.Background(), hook) + require.NoError(t, err) + + content, err := os.ReadFile(outFile) + require.NoError(t, err) + assert.Equal(t, "hello_from_hook", strings.TrimSpace(string(content))) +} + +func TestShellRunner_CredentialInjection(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + tmpDir := t.TempDir() + outFile := filepath.Join(tmpDir, "creds_output.txt") + + hook := &ResolvedHook{ + Name: "test-credential-injection", + Type: ShellType, + Config: &ShellConfig{ + Command: "sh", + Args: []string{"-c", "echo $PGHOST:$PGPORT:$PGDATABASE:$PGUSER:$PGPASSWORD > " + outFile}, + PropagateCredentials: true, + }, + ConnectionURL: "postgresql://myuser:mypassword@dbhost:5433/mydb", + } + + err := runner.Run(context.Background(), hook) + require.NoError(t, err) + + content, err := os.ReadFile(outFile) + require.NoError(t, err) + assert.Equal(t, "dbhost:5433:mydb:myuser:mypassword", strings.TrimSpace(string(content))) +} + +func TestShellRunner_CredentialInjectionDisabled(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + tmpDir := t.TempDir() + outFile := filepath.Join(tmpDir, "no_creds_output.txt") + + hook := &ResolvedHook{ + Name: "test-no-credentials", + Type: ShellType, + Config: &ShellConfig{ + Command: "sh", + Args: []string{"-c", "echo \"PGHOST=${PGHOST:-unset}\" > " + outFile}, + PropagateCredentials: false, + }, + ConnectionURL: "postgresql://myuser:mypassword@dbhost:5433/mydb", + } + + err := runner.Run(context.Background(), hook) + require.NoError(t, err) + + content, err := os.ReadFile(outFile) + require.NoError(t, err) + assert.Equal(t, "PGHOST=unset", strings.TrimSpace(string(content))) +} + +func TestShellRunner_InvalidConnectionURLWithPropagateCredentials(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + hook := &ResolvedHook{ + Name: "test-invalid-url", + Type: ShellType, + Config: &ShellConfig{ + Command: "echo", + Args: []string{"hello"}, + PropagateCredentials: true, + }, + ConnectionURL: "not-a-valid-url", + } + + err := runner.Run(context.Background(), hook) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to extract credentials") +} + +func TestShellRunner_ContextCancellation(t *testing.T) { + t.Parallel() + + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug})) + runner := NewShellRunner(*logger) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + hook := &ResolvedHook{ + Name: "test-context-timeout", + Type: ShellType, + Config: &ShellConfig{ + Command: "sleep", + Args: []string{"10"}, + }, + } + + err := runner.Run(ctx, hook) + require.Error(t, err) + assert.Contains(t, err.Error(), "shell hook") +} diff --git a/internal/infra/hook/template.go b/internal/infra/hook/template.go new file mode 100644 index 0000000..427b884 --- /dev/null +++ b/internal/infra/hook/template.go @@ -0,0 +1,42 @@ +package hook + +import ( + "bytes" + "text/template" +) + +// PartitionContext holds all template variables available during hook rendering. +type PartitionContext struct { + // Partition metadata + Schema string // Partition schema + Table string // Partition table name (child) + ParentTable string // Parent table name + LowerBound string // Partition lower bound (formatted) + UpperBound string // Partition upper bound (formatted) + + // Configuration metadata + PartitionName string // Partition identifier from config file + Retention string // Configured retention value + Interval string // Configured interval value + + // Connection metadata + DatabaseName string // Database name from connection URL + Hostname string // Database hostname from connection URL +} + +// Render parses and executes a Go text/template string with the given PartitionContext. +// It returns the rendered string or an error if the template is invalid or references undefined variables. +func Render(templateStr string, ctx PartitionContext) (string, error) { + tmpl, err := template.New("hook").Option("missingkey=error").Parse(templateStr) + if err != nil { + return "", err + } + + var buf bytes.Buffer + + if err := tmpl.Execute(&buf, ctx); err != nil { + return "", err + } + + return buf.String(), nil +} diff --git a/internal/infra/hook/template_property_test.go b/internal/infra/hook/template_property_test.go new file mode 100644 index 0000000..cef3d62 --- /dev/null +++ b/internal/infra/hook/template_property_test.go @@ -0,0 +1,115 @@ +// Feature: partition-hooks, Property 16: Undefined Template Variable Error +package hook + +import ( + "testing" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 7.9** +// +// Property 16: Undefined Template Variable Error +// For any template string containing a reference to an undefined variable name, +// the template rendering SHALL return an error and the hook SHALL NOT be executed. + +// undefinedFieldNames contains field names that do NOT exist in PartitionContext. +var undefinedFieldNames = []string{ + "NonExistentField", + "Foo", + "Bar", + "Baz", + "Unknown", + "InvalidField", + "Missing", + "Port", + "Password", + "User", + "Host", + "Bucket", + "Region", + "Prefix", + "Format", + "Command", + "Args", + "Query", + "Name", + "Type", + "Enabled", + "Timeout", +} + +// genPartitionContext generates a random valid PartitionContext. +func genPartitionContext(t *rapid.T) PartitionContext { + return PartitionContext{ + Schema: rapid.StringMatching(`[a-z][a-z0-9_]{0,15}`).Draw(t, "schema"), + Table: rapid.StringMatching(`[a-z][a-z0-9_]{0,20}`).Draw(t, "table"), + ParentTable: rapid.StringMatching(`[a-z][a-z0-9_]{0,20}`).Draw(t, "parentTable"), + LowerBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "lowerBound"), + UpperBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "upperBound"), + PartitionName: rapid.StringMatching(`[a-z][a-z0-9_]{0,15}`).Draw(t, "partitionName"), + Retention: rapid.StringMatching(`\d{1,3}`).Draw(t, "retention"), + Interval: rapid.SampledFrom([]string{"daily", "weekly", "monthly"}).Draw(t, "interval"), + DatabaseName: rapid.StringMatching(`[a-z][a-z0-9_]{0,15}`).Draw(t, "databaseName"), + Hostname: rapid.StringMatching(`[a-z][a-z0-9\-]{0,20}\.example\.com`).Draw(t, "hostname"), + } +} + +func TestProperty_UndefinedTemplateVariableError(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Pick a random undefined field name + undefinedField := rapid.SampledFrom(undefinedFieldNames).Draw(t, "undefinedField") + + // Generate a template string that references the undefined field + // Optionally include some valid fields mixed in + includeValidPrefix := rapid.Bool().Draw(t, "includeValidPrefix") + + var templateStr string + if includeValidPrefix { + templateStr = "{{.Schema}}.{{." + undefinedField + "}}" + } else { + templateStr = "{{." + undefinedField + "}}" + } + + ctx := genPartitionContext(t) + + // Render must return an error for undefined variables + result, err := Render(templateStr, ctx) + if err == nil { + t.Fatalf("expected error for undefined template variable %q, got result: %q", undefinedField, result) + } + }) +} + +func TestProperty_UndefinedTemplateVariableError_InComplexTemplate(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + // Generate a more complex template with an undefined variable embedded + undefinedField := rapid.SampledFrom(undefinedFieldNames).Draw(t, "undefinedField") + + // Build a template with a prefix, the undefined variable, and a suffix + prefix := rapid.SampledFrom([]string{ + "pg_dump --host={{.Hostname}} --dbname={{.DatabaseName}} ", + "VACUUM ANALYZE {{.Schema}}.", + "/usr/local/bin/archive --table=", + "SELECT * FROM {{.Schema}}.", + }).Draw(t, "prefix") + + suffix := rapid.SampledFrom([]string{ + " --output=/tmp/backup", + "", + " WHERE 1=1", + " | gzip > /tmp/out.gz", + }).Draw(t, "suffix") + + templateStr := prefix + "{{." + undefinedField + "}}" + suffix + + ctx := genPartitionContext(t) + + // Render must return an error for undefined variables + result, err := Render(templateStr, ctx) + if err == nil { + t.Fatalf("expected error for undefined template variable %q in complex template %q, got result: %q", + undefinedField, templateStr, result) + } + }) +} diff --git a/internal/infra/hook/template_rendering_property_test.go b/internal/infra/hook/template_rendering_property_test.go new file mode 100644 index 0000000..104afbe --- /dev/null +++ b/internal/infra/hook/template_rendering_property_test.go @@ -0,0 +1,213 @@ +// Feature: partition-hooks, Property 15: Template Variable Rendering +package hook + +import ( + "fmt" + "strings" + "testing" + + "pgregory.net/rapid" +) + +// **Validates: Requirements 7.1, 7.2, 7.8, 3.5, 4.3** +// +// Property 15: Template Variable Rendering +// For any valid partition context and any template string containing {{.VariableName}} +// references to defined variables (Schema, Table, ParentTable, LowerBound, UpperBound, +// PartitionName, DatabaseName, Hostname, Retention, Interval), the rendered output SHALL +// contain the actual values substituted for each variable reference. + +// allTemplateVariables lists all valid template variable names in PartitionContext. +var allTemplateVariables = []string{ + "Schema", + "Table", + "ParentTable", + "LowerBound", + "UpperBound", + "PartitionName", + "DatabaseName", + "Hostname", + "Retention", + "Interval", +} + +// genRenderingPartitionContext generates a random PartitionContext with non-empty values for all fields. +func genRenderingPartitionContext(t *rapid.T) PartitionContext { + return PartitionContext{ + Schema: rapid.StringMatching(`[a-zA-Z][a-zA-Z0-9_]{0,15}`).Draw(t, "schema"), + Table: rapid.StringMatching(`[a-zA-Z][a-zA-Z0-9_]{0,20}`).Draw(t, "table"), + ParentTable: rapid.StringMatching(`[a-zA-Z][a-zA-Z0-9_]{0,20}`).Draw(t, "parentTable"), + LowerBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "lowerBound"), + UpperBound: rapid.StringMatching(`\d{4}-\d{2}-\d{2}`).Draw(t, "upperBound"), + PartitionName: rapid.StringMatching(`[a-zA-Z][a-zA-Z0-9_]{0,15}`).Draw(t, "partitionName"), + DatabaseName: rapid.StringMatching(`[a-zA-Z][a-zA-Z0-9_]{0,15}`).Draw(t, "databaseName"), + Hostname: rapid.StringMatching(`[a-z][a-z0-9\-]{0,10}\.[a-z]{2,5}`).Draw(t, "hostname"), + Retention: rapid.StringMatching(`\d{1,4}`).Draw(t, "retention"), + Interval: rapid.SampledFrom([]string{"daily", "weekly", "monthly", "quarterly", "yearly"}).Draw(t, "interval"), + } +} + +// getContextFieldValue returns the value of a PartitionContext field by name. +func getContextFieldValue(ctx PartitionContext, fieldName string) string { + switch fieldName { + case "Schema": + return ctx.Schema + case "Table": + return ctx.Table + case "ParentTable": + return ctx.ParentTable + case "LowerBound": + return ctx.LowerBound + case "UpperBound": + return ctx.UpperBound + case "PartitionName": + return ctx.PartitionName + case "DatabaseName": + return ctx.DatabaseName + case "Hostname": + return ctx.Hostname + case "Retention": + return ctx.Retention + case "Interval": + return ctx.Interval + default: + return "" + } +} + +func TestProperty_TemplateVariableRendering_SingleVariable(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + ctx := genRenderingPartitionContext(t) + + // Pick a random variable to test + varName := rapid.SampledFrom(allTemplateVariables).Draw(t, "varName") + templateStr := fmt.Sprintf("prefix-{{.%s}}-suffix", varName) + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("Render returned unexpected error: %v", err) + } + + expectedValue := getContextFieldValue(ctx, varName) + expectedOutput := fmt.Sprintf("prefix-%s-suffix", expectedValue) + + if result != expectedOutput { + t.Fatalf("expected %q, got %q", expectedOutput, result) + } + }) +} + +func TestProperty_TemplateVariableRendering_MultipleVariables(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + ctx := genRenderingPartitionContext(t) + + // Pick a random number of variables (2 to all) + numVars := rapid.IntRange(2, len(allTemplateVariables)).Draw(t, "numVars") + + // Select a random subset of variables + selectedVars := make([]string, numVars) + used := make(map[int]bool) + + for i := 0; i < numVars; i++ { + for { + idx := rapid.IntRange(0, len(allTemplateVariables)-1).Draw(t, fmt.Sprintf("idx_%d", i)) + if !used[idx] { + used[idx] = true + selectedVars[i] = allTemplateVariables[idx] + + break + } + } + } + + // Build a template string with all selected variables separated by "/" + var templateParts []string + for _, varName := range selectedVars { + templateParts = append(templateParts, fmt.Sprintf("{{.%s}}", varName)) + } + + templateStr := strings.Join(templateParts, "/") + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("Render returned unexpected error: %v", err) + } + + // Verify each variable value appears in the output + for _, varName := range selectedVars { + expectedValue := getContextFieldValue(ctx, varName) + if !strings.Contains(result, expectedValue) { + t.Fatalf("rendered output %q does not contain value %q for variable %s", + result, expectedValue, varName) + } + } + + // Verify the full expected output matches exactly + var expectedParts []string + for _, varName := range selectedVars { + expectedParts = append(expectedParts, getContextFieldValue(ctx, varName)) + } + + expectedOutput := strings.Join(expectedParts, "/") + if result != expectedOutput { + t.Fatalf("expected %q, got %q", expectedOutput, result) + } + }) +} + +func TestProperty_TemplateVariableRendering_AllVariables(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + ctx := genRenderingPartitionContext(t) + + // Build a template using all variables in a realistic format + templateStr := "{{.Schema}}.{{.Table}} parent={{.ParentTable}} bounds=[{{.LowerBound}},{{.UpperBound}}) partition={{.PartitionName}} db={{.DatabaseName}} host={{.Hostname}} retention={{.Retention}} interval={{.Interval}}" + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("Render returned unexpected error: %v", err) + } + + // Verify each variable value is present in the output + for _, varName := range allTemplateVariables { + expectedValue := getContextFieldValue(ctx, varName) + if !strings.Contains(result, expectedValue) { + t.Fatalf("rendered output %q does not contain value %q for variable %s", + result, expectedValue, varName) + } + } + + // Verify exact expected output + expectedOutput := fmt.Sprintf("%s.%s parent=%s bounds=[%s,%s) partition=%s db=%s host=%s retention=%s interval=%s", + ctx.Schema, ctx.Table, ctx.ParentTable, ctx.LowerBound, ctx.UpperBound, + ctx.PartitionName, ctx.DatabaseName, ctx.Hostname, ctx.Retention, ctx.Interval) + + if result != expectedOutput { + t.Fatalf("expected %q, got %q", expectedOutput, result) + } + }) +} + +func TestProperty_TemplateVariableRendering_WithStaticText(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + ctx := genRenderingPartitionContext(t) + + // Generate random static prefix and suffix + prefix := rapid.StringMatching(`[a-zA-Z0-9/\-_]{1,20}`).Draw(t, "prefix") + suffix := rapid.StringMatching(`[a-zA-Z0-9/\-_]{1,20}`).Draw(t, "suffix") + varName := rapid.SampledFrom(allTemplateVariables).Draw(t, "varName") + + templateStr := fmt.Sprintf("%s{{.%s}}%s", prefix, varName, suffix) + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("Render returned unexpected error: %v", err) + } + + expectedValue := getContextFieldValue(ctx, varName) + expectedOutput := fmt.Sprintf("%s%s%s", prefix, expectedValue, suffix) + + if result != expectedOutput { + t.Fatalf("expected %q, got %q", expectedOutput, result) + } + }) +} diff --git a/internal/infra/hook/template_test.go b/internal/infra/hook/template_test.go new file mode 100644 index 0000000..b2e4c66 --- /dev/null +++ b/internal/infra/hook/template_test.go @@ -0,0 +1,160 @@ +package hook + +import ( + "strings" + "testing" +) + +// **Validates: Requirements 7.1, 7.2, 7.8, 7.9** + +func TestRender_AllVariablesPresent(t *testing.T) { + ctx := PartitionContext{ + Schema: "public", + Table: "events_2024_01", + ParentTable: "events", + LowerBound: "2024-01-01", + UpperBound: "2024-02-01", + PartitionName: "events", + Retention: "30", + Interval: "daily", + DatabaseName: "mydb", + Hostname: "db.example.com", + } + + templateStr := "{{.Schema}}.{{.Table}} parent={{.ParentTable}} bounds=[{{.LowerBound}},{{.UpperBound}}) partition={{.PartitionName}} retention={{.Retention}} interval={{.Interval}} db={{.DatabaseName}} host={{.Hostname}}" + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + expected := "public.events_2024_01 parent=events bounds=[2024-01-01,2024-02-01) partition=events retention=30 interval=daily db=mydb host=db.example.com" + if result != expected { + t.Fatalf("expected %q, got %q", expected, result) + } +} + +func TestRender_PartialVariables(t *testing.T) { + ctx := PartitionContext{ + Schema: "analytics", + Table: "metrics_2024_03", + } + + templateStr := "VACUUM ANALYZE {{.Schema}}.{{.Table}}" + + result, err := Render(templateStr, ctx) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + expected := "VACUUM ANALYZE analytics.metrics_2024_03" + if result != expected { + t.Fatalf("expected %q, got %q", expected, result) + } +} + +func TestRender_SpecialCharactersInValues(t *testing.T) { + tests := []struct { + name string + ctx PartitionContext + template string + expected string + }{ + { + name: "quotes in values", + ctx: PartitionContext{ + Schema: `"public"`, + Table: `events'2024`, + }, + template: "{{.Schema}}.{{.Table}}", + expected: `"public".events'2024`, + }, + { + name: "slashes and dots", + ctx: PartitionContext{ + Hostname: "db.prod.example.com/path", + DatabaseName: "my.db/name", + }, + template: "host={{.Hostname}} db={{.DatabaseName}}", + expected: "host=db.prod.example.com/path db=my.db/name", + }, + { + name: "special characters in bounds", + ctx: PartitionContext{ + LowerBound: "2024-01-01 00:00:00+00", + UpperBound: "2024-02-01 00:00:00+00", + }, + template: "[{{.LowerBound}}, {{.UpperBound}})", + expected: "[2024-01-01 00:00:00+00, 2024-02-01 00:00:00+00)", + }, + { + name: "backslashes and dollar signs", + ctx: PartitionContext{ + Schema: `my\schema`, + Table: `table$name`, + }, + template: "{{.Schema}}.{{.Table}}", + expected: `my\schema.table$name`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := Render(tt.template, tt.ctx) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + if result != tt.expected { + t.Fatalf("expected %q, got %q", tt.expected, result) + } + }) + } +} + +func TestRender_UndefinedVariableError(t *testing.T) { + ctx := PartitionContext{ + Schema: "public", + Table: "events_2024_01", + } + + templateStr := "{{.Schema}}.{{.NonExistent}}" + + _, err := Render(templateStr, ctx) + if err == nil { + t.Fatal("expected error for undefined variable, got nil") + } + + if !strings.Contains(err.Error(), "NonExistent") { + t.Fatalf("expected error to mention 'NonExistent', got: %v", err) + } +} + +func TestRender_InvalidTemplateSyntax(t *testing.T) { + ctx := PartitionContext{ + Schema: "public", + } + + templateStr := "{{.Schema}.{{invalid" + + _, err := Render(templateStr, ctx) + if err == nil { + t.Fatal("expected error for invalid template syntax, got nil") + } +} + +func TestRender_EmptyTemplateString(t *testing.T) { + ctx := PartitionContext{ + Schema: "public", + Table: "events_2024_01", + } + + result, err := Render("", ctx) + if err != nil { + t.Fatalf("expected no error for empty template, got: %v", err) + } + + if result != "" { + t.Fatalf("expected empty string, got %q", result) + } +} diff --git a/internal/infra/hook/testdata/rapid/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses-20260529095617-62134.fail b/internal/infra/hook/testdata/rapid/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses-20260529095617-62134.fail new file mode 100644 index 0000000..6dc53b9 --- /dev/null +++ b/internal/infra/hook/testdata/rapid/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses/TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses-20260529095617-62134.fail @@ -0,0 +1,14 @@ +# 2026/05/29 09:56:17.551564 [TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses] [rapid] draw event: "before-detach" +# 2026/05/29 09:56:17.551570 [TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses] [rapid] draw numPartitions: 3 +# 2026/05/29 09:56:17.551571 [TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses] [rapid] draw failPartitionIdx: 0 +# 2026/05/29 09:56:17.551574 [TestProperty_PartitionIsolation_SharedOrchestratorStillProcesses] expected error for failing partition 0, got nil +# +v0.4.8#4657605549209261549 +0x0 +0x0 +0x0 +0x0 +0x0 +0x0 +0x0 +0x0 \ No newline at end of file diff --git a/internal/infra/partition/configuration.go b/internal/infra/partition/configuration.go index f1240ff..5befe34 100644 --- a/internal/infra/partition/configuration.go +++ b/internal/infra/partition/configuration.go @@ -3,6 +3,8 @@ package partition import ( "fmt" "time" + + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" ) type ( @@ -15,13 +17,14 @@ const ( ) type Configuration struct { - Schema string `mapstructure:"schema" validate:"required"` - Table string `mapstructure:"table" validate:"required"` - PartitionKey string `mapstructure:"partitionKey" validate:"required"` - Interval Interval `mapstructure:"interval" validate:"required,oneof=daily weekly monthly quarterly yearly"` - Retention int `mapstructure:"retention" validate:"required,gt=0"` - PreProvisioned int `mapstructure:"preProvisioned" validate:"required,gt=0"` - CleanupPolicy CleanupPolicy `mapstructure:"cleanupPolicy" validate:"required,oneof=drop detach"` + Schema string `mapstructure:"schema" validate:"required"` + Table string `mapstructure:"table" validate:"required"` + PartitionKey string `mapstructure:"partitionKey" validate:"required"` + Interval Interval `mapstructure:"interval" validate:"required,oneof=daily weekly monthly quarterly yearly"` + Retention int `mapstructure:"retention" validate:"required,gt=0"` + PreProvisioned int `mapstructure:"preProvisioned" validate:"required,gt=0"` + CleanupPolicy CleanupPolicy `mapstructure:"cleanupPolicy" validate:"required,oneof=drop detach"` + Hooks *hook.HooksConfig `mapstructure:"hooks"` } func (p Configuration) GeneratePartition(forDate time.Time) (Partition, error) { diff --git a/mkdocs.yml b/mkdocs.yml index bc9ed96..010f7c8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,8 +12,6 @@ theme: name: material features: - content.code.copy - - navigation.sections - - navigation.expand - search.suggest - search.highlight @@ -41,4 +39,11 @@ nav: - Configuration: configuration.md - CLI Reference: cli-reference.md - Examples: examples.md + - Hooks: + - Overview: hooks/index.md + - Shell: hooks/shell.md + - PostgreSQL: hooks/postgresql.md + - Development: + - Overview: development/index.md + - Developing a Hook Type: development/hook-types.md - Troubleshooting: troubleshooting.md diff --git a/pkg/ppm/checkpartition_test.go b/pkg/ppm/checkpartition_test.go index 04b93c8..e426462 100644 --- a/pkg/ppm/checkpartition_test.go +++ b/pkg/ppm/checkpartition_test.go @@ -105,7 +105,7 @@ func TestCheckPartitions(t *testing.T) { postgreSQLMock.On("ListPartitions", p.Schema, p.Table).Return(convertedTables, nil).Once() } - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) assert.NilError(t, checker.CheckPartitions(), "Partitions should succeed") } @@ -161,7 +161,7 @@ func TestCheckMissingPartitions(t *testing.T) { tables := partitionResultToPartition(t, tc.tables) postgreSQLMock.On("ListPartitions", config.Schema, config.Table).Return(tables, nil).Once() - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, map[string]partition.Configuration{"test": config}, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, map[string]partition.Configuration{"test": config}, time.Now(), "", nil, false) assert.Error(t, checker.CheckPartitions(), "at least one partition contains an invalid configuration") }) } @@ -201,7 +201,7 @@ func TestUnsupportedPartitionsStrategy(t *testing.T) { postgreSQLMock.On("GetColumnDataType", config.Schema, config.Table, config.PartitionKey).Return(postgresql.Date, nil).Once() postgreSQLMock.On("GetPartitionSettings", config.Schema, config.Table).Return(string(tc.strategy), tc.key, nil).Once() - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, map[string]partition.Configuration{"test": config}, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, map[string]partition.Configuration{"test": config}, time.Now(), "", nil, false) assert.Error(t, checker.CheckPartitions(), "at least one partition contains an invalid configuration") }) } diff --git a/pkg/ppm/checkserver_test.go b/pkg/ppm/checkserver_test.go index 15cf8de..a6ed19d 100644 --- a/pkg/ppm/checkserver_test.go +++ b/pkg/ppm/checkserver_test.go @@ -49,7 +49,7 @@ func TestServerRequirements(t *testing.T) { t.Run(tc.name, func(t *testing.T) { // Reset mock on every test case logger, postgreSQLMock := setupMocks(t) - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, nil, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, nil, time.Now(), "", nil, false) postgreSQLMock.On("GetEngineVersion").Return(tc.serverVersion, nil).Once() postgreSQLMock.On("GetServerTime").Return(tc.serverTime, nil).Once() diff --git a/pkg/ppm/cleanup.go b/pkg/ppm/cleanup.go index a96de82..e8e1273 100644 --- a/pkg/ppm/cleanup.go +++ b/pkg/ppm/cleanup.go @@ -4,88 +4,197 @@ import ( "errors" "fmt" + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" partition_pkg "github.com/qonto/postgresql-partition-manager/internal/infra/partition" "github.com/qonto/postgresql-partition-manager/internal/infra/retry" ) var ErrPartitionCleanupFailed = errors.New("at least one partition could not be cleaned") +// cleanupState accumulates failure signals across the whole cleanup run to compute the exit code. +type cleanupState struct { + hookFailure bool + partitionError bool +} + func (p PPM) CleanupPartitions() error { - partitionContainAnError := false + metrics := hook.NewMetricsCollector(p.logger) + state := &cleanupState{} - for name, config := range p.partitions { - p.logger.Info("Cleaning partition", "partition", name) + if p.dryRun { + p.logger.Info("[DRY-RUN] Starting dry-run cleanup - no partitions will be modified, no hooks will be executed") + } - // Existing - foundPartitions, err := p.ListPartitions(config.Schema, config.Table) + for name, config := range p.partitions { + aborted, err := p.cleanupPartitionSet(name, config, metrics, state) if err != nil { - return fmt.Errorf("could not list partitions: %w", err) + return err } - currentRange, err := p.getGlobalRange(foundPartitions) - if err != nil { - return fmt.Errorf("could not evaluate existing ranges: %w", err) + if aborted { + metrics.LogSummary() + + return fmt.Errorf("%w: %w", ErrPartitionCleanupFailed, hook.ErrAbort) } + } + + // Log execution summary at end of cleanup + if metrics.Summary().TotalExecuted > 0 { + metrics.LogSummary() + } - p.logger.Info("Current ", "c_range", currentRange.String()) + // Return error if any hook or operation failed during the run (for non-zero exit code). + // In dry-run mode, only template/configuration errors set hookFailure (Requirement 17.6). + if state.hookFailure || state.partitionError { + return ErrPartitionCleanupFailed + } - // Expected - expectedPartitions, err := getExpectedPartitions(config, p.workDate) - if err != nil { - return fmt.Errorf("could not generate expected partitions: %w", err) - } + if p.dryRun { + p.logger.Info("[DRY-RUN] Dry-run cleanup complete - no changes were made") + } else { + p.logger.Info("All partitions are cleaned") + } - expectedRange, err := p.getGlobalRange(expectedPartitions) - if err != nil { - return fmt.Errorf("could not evaluate ranges to create: %w", err) + return nil +} + +// cleanupPartitionSet processes one partition configuration: it computes which existing +// partitions fall outside the expected retention range and removes each of them. +// It returns true if a hook requested an abort of the entire cleanup process. +func (p PPM) cleanupPartitionSet(name string, config partition_pkg.Configuration, metrics *hook.MetricsCollector, state *cleanupState) (abort bool, err error) { + p.logger.Info("Cleaning partition", "partition", name) + + foundPartitions, err := p.ListPartitions(config.Schema, config.Table) + if err != nil { + return false, fmt.Errorf("could not list partitions: %w", err) + } + + currentRange, err := p.getGlobalRange(foundPartitions) + if err != nil { + return false, fmt.Errorf("could not evaluate existing ranges: %w", err) + } + + p.logger.Info("Current ", "c_range", currentRange.String()) + + expectedPartitions, err := getExpectedPartitions(config, p.workDate) + if err != nil { + return false, fmt.Errorf("could not generate expected partitions: %w", err) + } + + expectedRange, err := p.getGlobalRange(expectedPartitions) + if err != nil { + return false, fmt.Errorf("could not evaluate ranges to create: %w", err) + } + + p.logger.Info("Expected", "e_range", expectedRange) + + if expectedRange.IsEqual(currentRange) { + return false, nil // nothing to do on this partition set + } + + orchestrator := p.newHookOrchestrator(name, config, metrics) + + for _, part := range foundPartitions { + if !isOutsideExpectedRange(part, expectedRange) { + continue } - p.logger.Info("Expected", "e_range", expectedRange) + p.logger.Info("No intersection", "remove-range", partition_pkg.Bounds(part.LowerBound, part.UpperBound)) - if expectedRange.IsEqual(currentRange) { - continue // nothing to do on this partition set + if p.removePartition(name, config, part, orchestrator, state) == outcomeAbort { + return true, nil } + } - // Each partition whose bounds are entirely outside of expectedRange can be removed + return false, nil +} - for _, part := range foundPartitions { - if !part.UpperBound.After(expectedRange.LowerBound) || !part.LowerBound.Before(expectedRange.UpperBound) { - p.logger.Info("No intersection", "remove-range", partition_pkg.Bounds(part.LowerBound, part.UpperBound)) +// removePartition runs the full detach/drop lifecycle (with surrounding hooks) for a single +// partition. Hooks run outside any PostgreSQL transaction. +func (p PPM) removePartition(name string, config partition_pkg.Configuration, part partition_pkg.Partition, orchestrator *hook.Orchestrator, state *cleanupState) partitionOutcome { + partCtx := p.buildPartitionContext(name, config, part) - err := p.DetachPartition(part) - if err != nil { - partitionContainAnError = true + if o := p.runHook(func() error { return orchestrator.ExecuteBeforeDetach(p.ctx, partCtx) }, &state.hookFailure, part, "Before-detach hook failed, skipping detach"); o != outcomeCompleted { + return o + } - p.logger.Error("Failed to detach partition", "schema", part.Schema, "table", part.Name, "error", err) + if !p.performDetach(part, state) { + return outcomeSkipped + } - continue - } + if o := p.runHook(func() error { return orchestrator.ExecuteAfterDetach(p.ctx, partCtx) }, &state.hookFailure, part, "After-detach hook failed, skipping drop"); o != outcomeCompleted { + return o + } - p.logger.Info("Partition detached", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) + // Drop-related operations only when cleanup policy is drop + if config.CleanupPolicy != partition_pkg.Drop { + return outcomeCompleted + } - if config.CleanupPolicy == partition_pkg.Drop { - err := p.DeletePartition(part) - if err != nil { - partitionContainAnError = true + if o := p.runHook(func() error { return orchestrator.ExecuteBeforeDrop(p.ctx, partCtx) }, &state.hookFailure, part, "Before-drop hook failed, skipping drop"); o != outcomeCompleted { + return o + } - p.logger.Error("Failed to delete partition", "schema", part.Schema, "table", part.Name, "error", err) + if !p.performDrop(part, state) { + return outcomeSkipped + } - continue - } + // After-drop failure: log warning, operation already done + if o := p.runHook(func() error { return orchestrator.ExecuteAfterDrop(p.ctx, partCtx) }, &state.hookFailure, part, "After-drop hook failed"); o == outcomeAbort { + return outcomeAbort + } - p.logger.Info("Partition deleted", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) - } - } - } + return outcomeCompleted +} + +// performDetach detaches the partition, or logs the intended action in dry-run mode. +// It returns false if the detach failed and the partition should be skipped. +func (p PPM) performDetach(part partition_pkg.Partition, state *cleanupState) bool { + if p.dryRun { + p.logger.Info("[DRY-RUN] Would detach partition", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) + + return true } - if partitionContainAnError { - return ErrPartitionCleanupFailed + if err := p.DetachPartition(part); err != nil { + state.partitionError = true + + p.logger.Error("Failed to detach partition", "schema", part.Schema, "table", part.Name, "error", err) + + return false } - p.logger.Info("All partitions are cleaned") + p.logger.Info("Partition detached", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) - return nil + return true +} + +// performDrop drops the partition, or logs the intended action in dry-run mode. +// It returns false if the drop failed and the partition should be skipped. +func (p PPM) performDrop(part partition_pkg.Partition, state *cleanupState) bool { + if p.dryRun { + p.logger.Info("[DRY-RUN] Would drop partition", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) + + return true + } + + if err := p.DeletePartition(part); err != nil { + state.partitionError = true + + p.logger.Error("Failed to delete partition", "schema", part.Schema, "table", part.Name, "error", err) + + return false + } + + p.logger.Info("Partition deleted", "schema", part.Schema, "table", part.Name, "parent_table", part.ParentTable) + + return true +} + +// isOutsideExpectedRange reports whether the partition's bounds fall entirely outside the +// expected retention range, meaning the partition can be removed. +func isOutsideExpectedRange(part partition_pkg.Partition, expectedRange partition_pkg.PartitionRange) bool { + return !part.UpperBound.After(expectedRange.LowerBound) || !part.LowerBound.Before(expectedRange.UpperBound) } func (p PPM) DetachPartition(partition partition_pkg.Partition) error { diff --git a/pkg/ppm/cleanup_dryrun_test.go b/pkg/ppm/cleanup_dryrun_test.go new file mode 100644 index 0000000..62f98b5 --- /dev/null +++ b/pkg/ppm/cleanup_dryrun_test.go @@ -0,0 +1,280 @@ +package ppm_test + +import ( + "context" + "testing" + "time" + + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" + "github.com/qonto/postgresql-partition-manager/internal/infra/partition" + "github.com/qonto/postgresql-partition-manager/pkg/ppm" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- Cleanup dry-run integration tests --- +// Validates: Requirements 17.2, 17.3, 17.4, 17.5 + +// TestCleanupDryRun_PartitionsNotModified verifies that with dryRun=true, +// DetachPartitionConcurrently and DropTable are never called. +// Validates: Requirement 17.4 +func TestCleanupDryRun_PartitionsNotModified(t *testing.T) { + config := partition.Configuration{ + Schema: "public", + Table: "my_table", + PartitionKey: "created_at", + Interval: "daily", + Retention: 1, + PreProvisioned: 1, + CleanupPolicy: partition.Drop, + } + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + // Generate partitions: dayBeforeYesterday should be cleaned in normal mode + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // Create PPM with dryRun=true + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, true) + err := checker.CleanupPartitions() + + // Should succeed without error + assert.Nil(t, err, "CleanupPartitions in dry-run mode should succeed") + postgreSQLMock.AssertExpectations(t) + + // Verify no partition modifications occurred + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// TestCleanupDryRun_WithHooksNoExecution verifies that the full cleanup flow +// works correctly in dry-run mode with hooks configured (no errors, no execution). +// Validates: Requirements 17.3, 17.5 +func TestCleanupDryRun_WithHooksNoExecution(t *testing.T) { + globalHooks := &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + { + Name: "backup-before-detach", + Type: hook.ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/backup", + "args": []interface{}{"--schema", "{{.Schema}}", "--table", "{{.Table}}"}, + "propagate-credentials": true, + }, + Retry: hook.RetryConfig{Attempts: 0, Backoff: hook.BackoffFixed, InitialDelay: hook.DefaultInitialDelay, MaxDelay: hook.DefaultMaxDelay}, + }, + }, + AfterDetach: []hook.HookEntry{ + { + Name: "vacuum-after-detach", + Type: hook.PostgreSQLType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "sql_query": "VACUUM ANALYZE {{.Schema}}.{{.Table}}", + }, + Retry: hook.RetryConfig{Attempts: 0, Backoff: hook.BackoffFixed, InitialDelay: hook.DefaultInitialDelay, MaxDelay: hook.DefaultMaxDelay}, + }, + }, + BeforeDrop: []hook.HookEntry{ + { + Name: "notify-before-drop", + Type: hook.ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/notify", + "args": []interface{}{"--partition", "{{.PartitionName}}"}, + }, + Retry: hook.RetryConfig{Attempts: 0, Backoff: hook.BackoffFixed, InitialDelay: hook.DefaultInitialDelay, MaxDelay: hook.DefaultMaxDelay}, + }, + }, + AfterDrop: []hook.HookEntry{ + { + Name: "cleanup-after-drop", + Type: hook.ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/cleanup-metadata", + }, + Retry: hook.RetryConfig{Attempts: 0, Backoff: hook.BackoffFixed, InitialDelay: hook.DefaultInitialDelay, MaxDelay: hook.DefaultMaxDelay}, + }, + }, + } + + config := partition.Configuration{ + Schema: "public", + Table: "my_table", + PartitionKey: "created_at", + Interval: "daily", + Retention: 1, + PreProvisioned: 1, + CleanupPolicy: partition.Drop, + } + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + connURL := "postgresql://user:pass@localhost:5432/mydb" + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), connURL, globalHooks, true) + err := checker.CleanupPartitions() + + // Should succeed without error (all templates resolve correctly) + assert.Nil(t, err, "CleanupPartitions in dry-run mode with hooks should succeed") + postgreSQLMock.AssertExpectations(t) + + // Verify no partition modifications occurred + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// TestCleanupDryRun_ExitsZeroWithValidConfig verifies that CleanupPartitions returns nil +// when dry-run is enabled and the configuration is valid. +// Validates: Requirement 17.3 +func TestCleanupDryRun_ExitsZeroWithValidConfig(t *testing.T) { + config := partition.Configuration{ + Schema: "public", + Table: "my_table", + PartitionKey: "created_at", + Interval: "daily", + Retention: 1, + PreProvisioned: 1, + CleanupPolicy: partition.Detach, + } + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, true) + err := checker.CleanupPartitions() + + // Must return nil (exit code 0) + require.NoError(t, err, "CleanupPartitions in dry-run should return nil for valid config") + postgreSQLMock.AssertExpectations(t) + + // Verify no detach occurred (detach policy, but dry-run skips it) + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") +} + +// TestCleanupDryRun_TemplateErrorReturnsError verifies that when hooks have undefined +// template variables, dry-run still returns an error (non-zero exit). +// Validates: Requirement 17.2 (template errors are still reported) +func TestCleanupDryRun_TemplateErrorReturnsError(t *testing.T) { + globalHooks := &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + { + Name: "bad-template-hook", + Type: hook.ShellType, + Enabled: boolPtr(true), + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "echo {{.UndefinedVariable}}", + }, + Retry: hook.RetryConfig{Attempts: 0, Backoff: hook.BackoffFixed, InitialDelay: hook.DefaultInitialDelay, MaxDelay: hook.DefaultMaxDelay}, + }, + }, + } + + config := partition.Configuration{ + Schema: "public", + Table: "my_table", + PartitionKey: "created_at", + Interval: "daily", + Retention: 1, + PreProvisioned: 1, + CleanupPolicy: partition.Drop, + } + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", globalHooks, true) + err := checker.CleanupPartitions() + + // Should return an error due to undefined template variable + require.Error(t, err, "CleanupPartitions in dry-run should return error for undefined template variables") + postgreSQLMock.AssertExpectations(t) + + // Verify no partition modifications occurred + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// boolPtr is a helper to create a pointer to a bool value. +func boolPtr(b bool) *bool { + return &b +} diff --git a/pkg/ppm/cleanup_hooks_test.go b/pkg/ppm/cleanup_hooks_test.go new file mode 100644 index 0000000..9b98145 --- /dev/null +++ b/pkg/ppm/cleanup_hooks_test.go @@ -0,0 +1,318 @@ +package ppm_test + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" + "github.com/qonto/postgresql-partition-manager/internal/infra/partition" + "github.com/qonto/postgresql-partition-manager/pkg/ppm" + "github.com/stretchr/testify/assert" +) + +// hookPartitionConfig returns a partition configuration with the given cleanup policy and hooks. +func hookPartitionConfig(cleanupPolicy partition.CleanupPolicy, hooks *hook.HooksConfig) partition.Configuration { + return partition.Configuration{ + Schema: "public", + Table: "my_table", + PartitionKey: "created_at", + Interval: "daily", + Retention: 1, + PreProvisioned: 1, + CleanupPolicy: cleanupPolicy, + Hooks: hooks, + } +} + +// failingShellHook returns a HookEntry with a shell command that always fails. +func failingShellHook(name string, onFailure hook.OnFailure) hook.HookEntry { + return hook.HookEntry{ + Name: name, + Type: hook.ShellType, + Timeout: 30 * time.Second, + OnFailure: onFailure, + Config: map[string]interface{}{ + "command": "/usr/bin/false", + }, + } +} + +// succeedingShellHook returns a HookEntry with a shell command that always succeeds. +func succeedingShellHook(name string) hook.HookEntry { + return hook.HookEntry{ + Name: name, + Type: hook.ShellType, + Timeout: 30 * time.Second, + Config: map[string]interface{}{ + "command": "/usr/bin/true", + }, + } +} + +// TestCleanupWithBeforeDetachHookFailure verifies that when a before-detach hook fails, +// the detach operation is skipped for that partition but the process continues. +// Requirements: 6.1 +func TestCleanupWithBeforeDetachHookFailure(t *testing.T) { + config := hookPartitionConfig(partition.Drop, &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + failingShellHook("fail-before-detach", ""), + }, + }) + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + // Generate partitions: dayBeforeYesterday should be cleaned + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // DetachPartitionConcurrently should NOT be called because before-detach hook fails + // DropTable should NOT be called either + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) + err := checker.CleanupPartitions() + + // Should return error because hook failure occurred + assert.NotNil(t, err, "CleanupPartitions should report an error due to hook failure") + assert.True(t, errors.Is(err, ppm.ErrPartitionCleanupFailed), "Error should be ErrPartitionCleanupFailed") + postgreSQLMock.AssertExpectations(t) + + // Verify DetachPartitionConcurrently was never called (detach skipped) + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// TestCleanupWithAfterDetachHookFailure verifies that when an after-detach hook fails, +// the drop operation is skipped for that partition. +// Requirements: 6.5 +func TestCleanupWithAfterDetachHookFailure(t *testing.T) { + config := hookPartitionConfig(partition.Drop, &hook.HooksConfig{ + AfterDetach: []hook.HookEntry{ + failingShellHook("fail-after-detach", ""), + }, + }) + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // Detach should succeed + postgreSQLMock.On("DetachPartitionConcurrently", + dayBeforeYesterdayPartition.Schema, + dayBeforeYesterdayPartition.Name, + dayBeforeYesterdayPartition.ParentTable, + ).Return(nil).Once() + + // DropTable should NOT be called because after-detach hook fails + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) + err := checker.CleanupPartitions() + + // Should return error because hook failure occurred + assert.NotNil(t, err, "CleanupPartitions should report an error due to hook failure") + assert.True(t, errors.Is(err, ppm.ErrPartitionCleanupFailed), "Error should be ErrPartitionCleanupFailed") + postgreSQLMock.AssertExpectations(t) + + // Verify DropTable was never called (drop skipped due to after-detach failure) + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// TestCleanupWithOnFailureAbort verifies that when a hook with on_failure=abort fails, +// the entire cleanup process stops immediately. +// Requirements: 6.8 +func TestCleanupWithOnFailureAbort(t *testing.T) { + config := hookPartitionConfig(partition.Drop, &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + failingShellHook("abort-hook", hook.OnFailureAbort), + }, + }) + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // No detach or drop operations should be attempted + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) + err := checker.CleanupPartitions() + + // Should return error containing ErrAbort + assert.NotNil(t, err, "CleanupPartitions should report an error due to abort") + assert.True(t, errors.Is(err, ppm.ErrPartitionCleanupFailed), "Error should be ErrPartitionCleanupFailed") + assert.True(t, errors.Is(err, hook.ErrAbort), "Error should contain ErrAbort") + postgreSQLMock.AssertExpectations(t) + + // Verify no partition operations were attempted + postgreSQLMock.AssertNotCalled(t, "DetachPartitionConcurrently") + postgreSQLMock.AssertNotCalled(t, "DropTable") +} + +// TestCleanupWithOnFailureContinue verifies that when a before-hook with on_failure=continue +// fails, the operation still proceeds. +// Requirements: 6.9 +func TestCleanupWithOnFailureContinue(t *testing.T) { + config := hookPartitionConfig(partition.Drop, &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + failingShellHook("continue-hook", hook.OnFailureContinue), + }, + }) + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // Detach should still happen because on_failure=continue + postgreSQLMock.On("DetachPartitionConcurrently", + dayBeforeYesterdayPartition.Schema, + dayBeforeYesterdayPartition.Name, + dayBeforeYesterdayPartition.ParentTable, + ).Return(nil).Once() + + // Drop should also happen since detach succeeded and no after-detach hook failure + postgreSQLMock.On("DropTable", + dayBeforeYesterdayPartition.Schema, + dayBeforeYesterdayPartition.Name, + ).Return(nil).Once() + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) + err := checker.CleanupPartitions() + + // on_failure=continue means the hook failure is recorded but operations proceed + // The orchestrator returns nil so hookFailureOccurred stays false for the before-detach + // However, HasFailures() might still track it... let's check the actual behavior: + // In the orchestrator, handleFailure with OnFailureContinue returns nil, + // so hookErr == nil in CleanupPartitions, and hookFailureOccurred is NOT set. + // The operation proceeds normally. + assert.Nil(t, err, "CleanupPartitions should succeed when on_failure=continue") + postgreSQLMock.AssertExpectations(t) +} + +// TestCleanupWithDetachPolicy verifies that when cleanup policy is "detach", +// drop-related hooks (before-drop, after-drop) are never executed. +// Requirements: 11.4 +func TestCleanupWithDetachPolicy(t *testing.T) { + config := hookPartitionConfig(partition.Detach, &hook.HooksConfig{ + BeforeDetach: []hook.HookEntry{ + succeedingShellHook("pre-detach"), + }, + AfterDetach: []hook.HookEntry{ + succeedingShellHook("post-detach"), + }, + BeforeDrop: []hook.HookEntry{ + // This hook should cause test failure if it were actually executed + // (since it uses /bin/false), but it should be ignored with detach policy + failingShellHook("should-not-run-before-drop", ""), + }, + AfterDrop: []hook.HookEntry{ + failingShellHook("should-not-run-after-drop", ""), + }, + }) + + partitions := map[string]partition.Configuration{ + "unittest": config, + } + + logger, postgreSQLMock := setupMocks(t) + + dayBeforeYesterdayPartition, _ := config.GeneratePartition(dayBeforeYesterday) + yesterdayPartition, _ := config.GeneratePartition(yesterday) + currentPartition, _ := config.GeneratePartition(today) + tomorrowPartition, _ := config.GeneratePartition(tomorrow) + + existingPartitions := []partition.Partition{ + dayBeforeYesterdayPartition, + yesterdayPartition, + currentPartition, + tomorrowPartition, + } + + postgreSQLMock.On("ListPartitions", config.Schema, config.Table). + Return(partitionResultToPartition(t, existingPartitions), nil).Once() + + // Detach should be called (detach policy still detaches) + postgreSQLMock.On("DetachPartitionConcurrently", + dayBeforeYesterdayPartition.Schema, + dayBeforeYesterdayPartition.Name, + dayBeforeYesterdayPartition.ParentTable, + ).Return(nil).Once() + + // DropTable should NOT be called (detach policy, no drop) + + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, partitions, time.Now(), "", nil, false) + err := checker.CleanupPartitions() + + // Should succeed: before-detach and after-detach hooks succeed, drop hooks are ignored + assert.Nil(t, err, "CleanupPartitions should succeed with detach policy") + postgreSQLMock.AssertExpectations(t) + + // Verify drop was never attempted + postgreSQLMock.AssertNotCalled(t, "DropTable") +} diff --git a/pkg/ppm/cleanup_test.go b/pkg/ppm/cleanup_test.go index fc863a1..6d76c11 100644 --- a/pkg/ppm/cleanup_test.go +++ b/pkg/ppm/cleanup_test.go @@ -85,7 +85,7 @@ func TestCleanupPartitions(t *testing.T) { } } - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, tc.partitions, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, tc.partitions, time.Now(), "", nil, false) err := checker.CleanupPartitions() assert.Nil(t, err, "CleanupPartitions should succeed") @@ -157,7 +157,7 @@ func TestCleanupPartitionsFailover(t *testing.T) { postgreSQLMock.On("DetachPartitionConcurrently", successPartition.Schema, successPartition.Name, successPartition.ParentTable).Return(nil).Once() postgreSQLMock.On("DropTable", successPartition.Schema, successPartition.Name).Return(nil).Once() - checker := ppm.New(context.TODO(), *logger, postgreSQLMock, configuration, time.Now()) + checker := ppm.New(context.TODO(), *logger, postgreSQLMock, configuration, time.Now(), "", nil, false) err := checker.CleanupPartitions() assert.NotNil(t, err, "CleanupPartitions should report an error") diff --git a/pkg/ppm/hooks.go b/pkg/ppm/hooks.go new file mode 100644 index 0000000..a9ddf81 --- /dev/null +++ b/pkg/ppm/hooks.go @@ -0,0 +1,94 @@ +package ppm + +import ( + "errors" + "strconv" + + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" + partition_pkg "github.com/qonto/postgresql-partition-manager/internal/infra/partition" +) + +// partitionOutcome describes the result of running a hook around a partition operation. +type partitionOutcome int + +const ( + // outcomeCompleted means the hook succeeded and the operation may proceed. + outcomeCompleted partitionOutcome = iota + // outcomeSkipped means the remaining steps for this partition were skipped, but the run continues. + outcomeSkipped + // outcomeAbort means a hook requested that the entire process stop immediately. + outcomeAbort +) + +// isHookAbort reports whether the hook error signals an abort of the entire process +// (on_failure=abort). It logs the abort event so callers only need to act on the result. +func (p PPM) isHookAbort(hookErr error) bool { + if errors.Is(hookErr, hook.ErrAbort) { + p.logger.Error("Hook abort triggered, stopping entire run", "error", hookErr) + + return true + } + + return false +} + +// runHook executes a lifecycle hook and classifies the result against the on_failure policy. +// On failure it flips hookFailure and returns outcomeAbort (stop everything) or, for a +// non-abort failure, logs skipMsg and returns outcomeSkipped (skip remaining steps). +func (p PPM) runHook(exec func() error, hookFailure *bool, part partition_pkg.Partition, skipMsg string) partitionOutcome { + hookErr := exec() + if hookErr == nil { + return outcomeCompleted + } + + *hookFailure = true + + if p.isHookAbort(hookErr) { + return outcomeAbort + } + + p.logger.Warn(skipMsg, "partition", part.Name, "error", hookErr) + + return outcomeSkipped +} + +// newHookOrchestrator builds the hook orchestrator for a partition set, resolving global vs +// partition-level hooks and wiring the runners/executor (or a no-op dry-run orchestrator). +func (p PPM) newHookOrchestrator(name string, config partition_pkg.Configuration, metrics *hook.MetricsCollector) *hook.Orchestrator { + resolvedHooks := hook.Resolve(name, p.globalHooks, config.Hooks) + if resolvedHooks != nil { + resolvedHooks.ApplyDefaults() + } + + if p.dryRun { + return hook.NewDryRunOrchestrator(resolvedHooks, metrics, p.logger, p.connectionURL) + } + + executor := hook.NewExecutor(hook.NewRegistryRunner(p.logger), p.logger) + + return hook.NewOrchestrator(resolvedHooks, executor, metrics, p.logger, p.connectionURL) +} + +// buildPartitionContext assembles the template context exposed to hooks for a partition. +func (p PPM) buildPartitionContext(name string, config partition_pkg.Configuration, part partition_pkg.Partition) hook.PartitionContext { + partCtx := hook.PartitionContext{ + Schema: part.Schema, + Table: part.Name, + ParentTable: part.ParentTable, + LowerBound: part.LowerBound.Format("2006-01-02"), + UpperBound: part.UpperBound.Format("2006-01-02"), + PartitionName: name, + Retention: strconv.Itoa(config.Retention), + Interval: string(config.Interval), + } + + // Populate connection metadata if connection URL is available + if p.connectionURL != "" { + if creds, credErr := hook.ExtractCredentials(p.connectionURL); credErr == nil { + partCtx.DatabaseName = creds["PGDATABASE"] + partCtx.Hostname = creds["PGHOST"] + } + } + + return partCtx +} diff --git a/pkg/ppm/ppm.go b/pkg/ppm/ppm.go index b87d217..af7ebfe 100644 --- a/pkg/ppm/ppm.go +++ b/pkg/ppm/ppm.go @@ -7,6 +7,7 @@ import ( "log/slog" "time" + "github.com/qonto/postgresql-partition-manager/internal/infra/hook" "github.com/qonto/postgresql-partition-manager/internal/infra/partition" "github.com/qonto/postgresql-partition-manager/internal/infra/postgresql" ) @@ -28,20 +29,26 @@ type PostgreSQLClient interface { } type PPM struct { - ctx context.Context - db PostgreSQLClient - partitions map[string]partition.Configuration - logger slog.Logger - workDate time.Time + ctx context.Context + db PostgreSQLClient + partitions map[string]partition.Configuration + logger slog.Logger + workDate time.Time + connectionURL string + globalHooks *hook.HooksConfig + dryRun bool } -func New(context context.Context, logger slog.Logger, db PostgreSQLClient, partitions map[string]partition.Configuration, workDate time.Time) *PPM { +func New(context context.Context, logger slog.Logger, db PostgreSQLClient, partitions map[string]partition.Configuration, workDate time.Time, connectionURL string, globalHooks *hook.HooksConfig, dryRun bool) *PPM { return &PPM{ - partitions: partitions, - ctx: context, - db: db, - logger: logger, - workDate: workDate, + partitions: partitions, + ctx: context, + db: db, + logger: logger, + workDate: workDate, + connectionURL: connectionURL, + globalHooks: globalHooks, + dryRun: dryRun, } }