Skip to content

Commit 6443da3

Browse files
committed
db/import: Consolidate PostgreSQL memory config with dynamic scaling
- Add session-level import optimization function to migration (admin.set_optimal_import_session_settings) that boosts work_mem and maintenance_work_mem during batch imports - Consolidate all PostgreSQL memory settings into single source of truth: - DB_MEM_LIMIT in .env.config drives all derived values - CLI calculates: shared_buffers (25%), effective_cache_size (75%), maintenance_work_mem (25%), work_mem (1%), temp_buffers (12.5%), wal_buffers (1.5%) - start-postgres.sh applies settings via command-line flags - Fix temp_buffers runtime error: move from session-level SET to server startup (cannot be changed after temp tables are accessed) - Add wal_buffers to dynamic scaling (was hardcoded at 64MB) - Remove obsolete hotpatch files (now integrated into migrations) - Delete redundant migrations that were created with future timestamps
1 parent 6d76c73 commit 6443da3

13 files changed

+127
-464
lines changed

cli/src/manage.cr

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,14 +192,18 @@ module Statbus
192192
db_mem_limit : String
193193

194194
# Type-safe derived memory configuration structure
195+
# All memory settings are derived from DB_MEM_LIMIT for consistent scaling.
196+
# See tmp/db-memory-todo.md for tuning rationale.
195197
record DbMemoryEnv,
196198
db_shm_size : String,
197199
db_mem_limit : String,
198200
db_mem_reservation : String,
199201
db_shared_buffers : String,
200202
db_maintenance_work_mem : String,
201203
db_effective_cache_size : String,
202-
db_work_mem : String
204+
db_work_mem : String,
205+
db_temp_buffers : String,
206+
db_wal_buffers : String
203207

204208
# Configuration values that are derived from other settings
205209
record DerivedEnv,
@@ -396,12 +400,28 @@ module Statbus
396400
end
397401

398402
# Calculate derived memory values based on DB_MEM_LIMIT
399-
# See tmp/db-memory-todo.md for rationale.
403+
# All PostgreSQL memory settings scale from this single source of truth.
404+
# See tmp/db-memory-todo.md for detailed rationale.
405+
#
406+
# Memory allocation strategy (for 4GB total):
407+
# shared_buffers: 25% = 1GB (main buffer cache)
408+
# effective_cache_size: 75% = 3GB (planner hint for OS cache)
409+
# maintenance_work_mem: 25% = 1GB (VACUUM, CREATE INDEX, etc.)
410+
# work_mem: 1% = 40MB (per-operation sorts/hashes, multiplied by queries)
411+
# temp_buffers: 12.5% = 512MB (temporary tables, min 256MB for imports)
412+
# wal_buffers: ~1.5% = 64MB (WAL write buffering, min 16MB, max 256MB)
413+
#
400414
mem_limit_mb = parse_mem_size_to_mb(config.db_mem_limit)
401415
shared_buffers_mb = (mem_limit_mb * 0.25).to_i64
402416
maintenance_work_mem_mb = (mem_limit_mb * 0.25).to_i64
403417
effective_cache_size_mb = (mem_limit_mb * 0.75).to_i64
404418
work_mem_mb = Math.max(4_i64, mem_limit_mb // 100) # Min 4MB for safety
419+
# temp_buffers: ~12.5% of memory, min 256MB for import temp tables
420+
# Must be set at server startup (can't be changed after temp tables are accessed)
421+
temp_buffers_mb = Math.max(256_i64, mem_limit_mb // 8)
422+
# wal_buffers: ~1.5% of memory, clamped between 16MB and 256MB
423+
# Larger values reduce disk I/O by buffering more WAL before write
424+
wal_buffers_mb = Math.min(256_i64, Math.max(16_i64, (mem_limit_mb * 0.015).to_i64))
405425
reservation_mb = (mem_limit_mb // 2).to_i64
406426

407427
db_mem = DbMemoryEnv.new(
@@ -411,7 +431,9 @@ module Statbus
411431
db_shared_buffers: format_mb_for_pg(shared_buffers_mb),
412432
db_maintenance_work_mem: format_mb_for_pg(maintenance_work_mem_mb),
413433
db_effective_cache_size: format_mb_for_pg(effective_cache_size_mb),
414-
db_work_mem: format_mb_for_pg(work_mem_mb)
434+
db_work_mem: format_mb_for_pg(work_mem_mb),
435+
db_temp_buffers: format_mb_for_pg(temp_buffers_mb),
436+
db_wal_buffers: format_mb_for_pg(wal_buffers_mb)
415437
)
416438

417439
# Calculate derived values
@@ -606,14 +628,16 @@ module Statbus
606628

607629
# PostgreSQL memory configuration
608630
# These control the docker container resource limits and postgresql.conf settings.
609-
# They are derived from DB_MEM_LIMIT in .env.config.
631+
# All values are derived from DB_MEM_LIMIT in .env.config (single source of truth).
610632
env.set("DB_MEM_LIMIT", db_mem.db_mem_limit)
611633
env.set("DB_SHM_SIZE", db_mem.db_shm_size)
612634
env.set("DB_MEM_RESERVATION", db_mem.db_mem_reservation)
613635
env.set("DB_SHARED_BUFFERS", db_mem.db_shared_buffers)
614636
env.set("DB_MAINTENANCE_WORK_MEM", db_mem.db_maintenance_work_mem)
615637
env.set("DB_EFFECTIVE_CACHE_SIZE", db_mem.db_effective_cache_size)
616638
env.set("DB_WORK_MEM", db_mem.db_work_mem)
639+
env.set("DB_TEMP_BUFFERS", db_mem.db_temp_buffers)
640+
env.set("DB_WAL_BUFFERS", db_mem.db_wal_buffers)
617641

618642
env.set("ACCESS_JWT_EXPIRY", config.access_jwt_expiry)
619643
env.set("REFRESH_JWT_EXPIRY", config.refresh_jwt_expiry)

doc/performance/hotpatches-applied.md

Lines changed: 0 additions & 78 deletions
This file was deleted.

migrations/20250423000000_add_import_jobs.down.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ DROP FUNCTION IF EXISTS admin.import_job_notify();
108108
DROP FUNCTION IF EXISTS admin.import_job_derive();
109109
DROP PROCEDURE IF EXISTS worker.notify_is_importing_start();
110110
DROP PROCEDURE IF EXISTS worker.notify_is_importing_stop();
111+
DROP FUNCTION IF EXISTS admin.set_optimal_import_session_settings();
111112
DROP FUNCTION IF EXISTS admin.trigger_validate_import_definition();
112113
DROP FUNCTION IF EXISTS admin.validate_import_definition(INT);
113114
DROP FUNCTION IF EXISTS admin.import_job_next_state(public.import_job);

migrations/20250423000000_add_import_jobs.up.sql

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,73 @@ COMMENT ON TABLE public.import_definition_step IS 'Connects an import definition
175175

176176
-- Removed trigger prevent_non_draft_definition_step_changes
177177

178+
-- Session-Level Import Optimization Function
179+
-- Called by worker when processing import tasks to temporarily boost PostgreSQL settings
180+
-- for heavy import operations. These are session-level overrides that revert after the
181+
-- transaction completes, allowing aggressive memory usage during imports without
182+
-- affecting other concurrent operations.
183+
--
184+
-- Server-level memory settings (configured via DB_MEM_LIMIT in .env.config):
185+
-- work_mem, maintenance_work_mem, temp_buffers, wal_buffers, etc.
186+
-- are set conservatively to allow multiple concurrent operations.
187+
--
188+
-- This function boosts memory settings specifically for import batch processing.
189+
CREATE OR REPLACE FUNCTION admin.set_optimal_import_session_settings()
190+
RETURNS void
191+
LANGUAGE plpgsql
192+
SECURITY DEFINER -- Required: regular users can't change PostgreSQL settings
193+
SET search_path = public, admin, pg_temp
194+
AS $set_optimal_import_session_settings$
195+
BEGIN
196+
-- Memory boosts for import operations (session-level, reverts after transaction)
197+
-- These override the conservative server defaults during batch imports.
198+
-- Note: temp_buffers and wal_buffers cannot be changed at runtime.
199+
SET LOCAL work_mem = '1GB'; -- Boost for large hash joins and sorts
200+
SET LOCAL maintenance_work_mem = '2GB'; -- Boost for index operations during temporal_merge
201+
202+
-- Join strategy optimization (session-level, reverts after transaction)
203+
SET LOCAL enable_hashjoin = on; -- Prefer hash joins for large lookups
204+
SET LOCAL enable_nestloop = off; -- Avoid nested loops for large datasets
205+
SET LOCAL enable_mergejoin = off; -- Avoid expensive sort-based merge joins
206+
207+
-- Query optimizer hints for import workloads (session-level)
208+
SET LOCAL random_page_cost = 1.1; -- Optimize for modern storage (SSD)
209+
SET LOCAL cpu_tuple_cost = 0.01; -- Slight preference for CPU over I/O
210+
SET LOCAL hash_mem_multiplier = 8.0; -- Allow very large hash tables
211+
212+
-- Enable more aggressive query optimization for complex import operations
213+
SET LOCAL from_collapse_limit = 20; -- Allow more complex query flattening
214+
SET LOCAL join_collapse_limit = 20; -- Allow more join reordering for optimization
215+
216+
-- Log the optimization application for debugging
217+
RAISE DEBUG 'Import session optimization applied: work_mem=1GB, maintenance_work_mem=2GB, hash_mem_multiplier=8x';
218+
END;
219+
$set_optimal_import_session_settings$;
220+
221+
-- Grant execution to application user role (worker processes run as statbus_dev)
222+
GRANT EXECUTE ON FUNCTION admin.set_optimal_import_session_settings() TO statbus_dev;
223+
224+
-- Comment explaining the function's purpose and usage
225+
COMMENT ON FUNCTION admin.set_optimal_import_session_settings() IS
226+
'Applies session-level PostgreSQL optimizations for import operations.
227+
Called by worker when processing import queue tasks. Boosts work_mem and
228+
maintenance_work_mem beyond server defaults for batch processing. Uses
229+
SECURITY DEFINER since regular users cannot change PostgreSQL settings.
230+
All settings use SET LOCAL and automatically revert after the transaction completes.';
231+
178232
-- Procedure to notify about import_job_process start
179233
CREATE PROCEDURE worker.notify_is_importing_start()
180234
LANGUAGE plpgsql AS $procedure$
181235
BEGIN
236+
-- Apply session-level PostgreSQL optimizations for import operations
237+
-- This ensures all subsequent queries in this transaction benefit from:
238+
-- - Increased work_mem (1GB) for large hash tables and sorts
239+
-- - Optimized join strategies (hash joins preferred over merge/nested loops)
240+
-- - Large hash_mem_multiplier (8x) for complex operations
241+
-- Settings automatically revert when transaction completes
242+
PERFORM admin.set_optimal_import_session_settings();
243+
244+
-- Notify that importing has started
182245
PERFORM pg_notify('worker_status', json_build_object('type', 'is_importing', 'status', true)::text);
183246
END;
184247
$procedure$;

postgres/postgresql.conf

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ client_connection_check_interval = 5s
2525
# These are now set dynamically in start-postgres.sh based on environment variables.
2626

2727
# Write-ahead log settings
28-
wal_buffers = 64MB # Set WAL buffers to 64MB, allowing more WAL data to be stored in memory before being written to disk, reducing write frequency.
28+
# wal_buffers is now set dynamically in start-postgres.sh based on DB_WAL_BUFFERS env var.
2929
synchronous_commit = off # Disable synchronous_commit to avoid waiting for WAL flush to disk, increasing performance at the cost of durability.
3030
wal_writer_delay = 500ms # Delay WAL writes to disk by 500ms to batch more transactions and reduce disk I/O pressure.
3131

@@ -49,7 +49,8 @@ log_min_messages = fatal
4949
log_min_duration_statement = 1000 # Log queries slower than 1000ms by default.
5050

5151
# Other settings
52-
temp_buffers = 256MB # Allocate memory for temporary tables operations.
52+
# temp_buffers is now set dynamically in start-postgres.sh based on DB_TEMP_BUFFERS env var.
53+
# Note: temp_buffers cannot be changed after any temporary tables have been accessed in a session.
5354
max_stack_depth = 7MB # Increase stack depth for complex recursive queries (7MB is below the 7680kB limit).
5455
max_locks_per_transaction = 2048 # Increased to handle batch processing
5556

postgres/start-postgres.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@ PG_PARAMS="$PG_PARAMS -c logging_collector=off"
1313
PG_PARAMS="$PG_PARAMS -c log_destination=stderr"
1414

1515
# Dynamic memory configuration (overrides postgresql.conf)
16-
# These can be set in the .env file. See tmp/db-memory-todo.md for tuning guidance.
16+
# All memory settings are derived from DB_MEM_LIMIT in .env.config (single source of truth).
17+
# These are calculated by the CLI and passed via environment variables.
18+
# See tmp/db-memory-todo.md for tuning rationale.
1719
PG_PARAMS="$PG_PARAMS -c shared_buffers=${DB_SHARED_BUFFERS:-1GB}"
1820
PG_PARAMS="$PG_PARAMS -c maintenance_work_mem=${DB_MAINTENANCE_WORK_MEM:-1GB}"
1921
PG_PARAMS="$PG_PARAMS -c effective_cache_size=${DB_EFFECTIVE_CACHE_SIZE:-3GB}"
20-
PG_PARAMS="$PG_PARAMS -c work_mem=${DB_WORK_MEM:-100MB}"
22+
PG_PARAMS="$PG_PARAMS -c work_mem=${DB_WORK_MEM:-40MB}"
23+
# temp_buffers must be set at server startup; cannot be changed after temp tables are accessed
24+
PG_PARAMS="$PG_PARAMS -c temp_buffers=${DB_TEMP_BUFFERS:-512MB}"
25+
# wal_buffers controls WAL write buffering; larger values reduce disk I/O
26+
PG_PARAMS="$PG_PARAMS -c wal_buffers=${DB_WAL_BUFFERS:-64MB}"
2127

2228
# Default logging levels (these match postgresql.conf but will be overridden if DEBUG=true)
2329
# These values are set here to be passed as command-line arguments,

0 commit comments

Comments
 (0)