Skip to content

Commit 20138ce

Browse files
committed
horreum db analysis
1 parent d13fdc3 commit 20138ce

1 file changed

Lines changed: 123 additions & 0 deletions

File tree

docs/horreum-storage-analysis.sql

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
-- Horreum Storage Analysis for a specific test
2+
-- Usage: psql -h <host> -p <port> -U <user> -d horreum -v testid=339 -v run_limit=10 -f horreum-storage-analysis.sql
3+
4+
-- Parameters (change these or pass via -v):
5+
-- :testid - the Horreum test ID
6+
-- :run_limit - how many recent runs to analyze (use 0 for all)
7+
8+
\echo '============================================='
9+
\echo 'Horreum Storage Analysis'
10+
\echo '============================================='
11+
\echo ''
12+
13+
-- Select the runs to analyze
14+
CREATE TEMP TABLE analyzed_runs AS
15+
SELECT id FROM run
16+
WHERE testid = :testid AND trashed = false
17+
ORDER BY id DESC
18+
LIMIT CASE WHEN :run_limit > 0 THEN :run_limit ELSE 2147483647 END;
19+
20+
CREATE TEMP TABLE analyzed_datasets AS
21+
SELECT d.id FROM dataset d
22+
WHERE d.runid IN (SELECT id FROM analyzed_runs);
23+
24+
\echo '--- Scope ---'
25+
SELECT :testid as test_id, count(*) as runs_analyzed FROM analyzed_runs;
26+
27+
\echo ''
28+
\echo '--- Per-table storage ---'
29+
30+
SELECT 'run (raw upload data)' as table_name,
31+
count(*) as rows,
32+
pg_size_pretty(sum(pg_column_size(r.*))::bigint) as total_size,
33+
pg_size_pretty(sum(pg_column_size(r.data))::bigint) as data_column,
34+
sum(pg_column_size(r.*)) as total_bytes
35+
FROM run r WHERE r.id IN (SELECT id FROM analyzed_runs)
36+
37+
UNION ALL
38+
SELECT 'dataset (extracted data)',
39+
count(*),
40+
pg_size_pretty(sum(pg_column_size(d.*))::bigint),
41+
pg_size_pretty(sum(pg_column_size(d.data))::bigint),
42+
sum(pg_column_size(d.*))
43+
FROM dataset d WHERE d.runid IN (SELECT id FROM analyzed_runs)
44+
45+
UNION ALL
46+
SELECT 'label_values (computed)',
47+
count(*),
48+
pg_size_pretty(sum(pg_column_size(lv.*))::bigint),
49+
pg_size_pretty(coalesce(sum(pg_column_size(lv.value))::bigint, 0)),
50+
sum(pg_column_size(lv.*))
51+
FROM label_values lv WHERE lv.dataset_id IN (SELECT id FROM analyzed_datasets)
52+
53+
UNION ALL
54+
SELECT 'fingerprint',
55+
count(*),
56+
pg_size_pretty(sum(pg_column_size(fp.*))::bigint),
57+
pg_size_pretty(coalesce(sum(pg_column_size(fp.fingerprint))::bigint, 0)),
58+
sum(pg_column_size(fp.*))
59+
FROM fingerprint fp WHERE fp.dataset_id IN (SELECT id FROM analyzed_datasets)
60+
61+
UNION ALL
62+
SELECT 'datapoint',
63+
count(*),
64+
pg_size_pretty(coalesce(sum(pg_column_size(dp.*))::bigint, 0)),
65+
'-',
66+
coalesce(sum(pg_column_size(dp.*)), 0)
67+
FROM datapoint dp WHERE dp.dataset_id IN (SELECT id FROM analyzed_datasets)
68+
69+
UNION ALL
70+
SELECT 'change (detected changes)',
71+
count(*),
72+
pg_size_pretty(coalesce(sum(pg_column_size(c.*))::bigint, 0)),
73+
'-',
74+
coalesce(sum(pg_column_size(c.*)), 0)
75+
FROM change c WHERE c.dataset_id IN (SELECT id FROM analyzed_datasets)
76+
77+
UNION ALL
78+
SELECT 'changedetection (config)',
79+
count(*),
80+
pg_size_pretty(coalesce(sum(pg_column_size(cd.*))::bigint, 0)),
81+
'-',
82+
coalesce(sum(pg_column_size(cd.*)), 0)
83+
FROM changedetection cd
84+
WHERE cd.variable_id IN (SELECT v.id FROM variable v WHERE v.testid = :testid)
85+
86+
ORDER BY total_bytes DESC;
87+
88+
\echo ''
89+
\echo '--- Total ---'
90+
SELECT pg_size_pretty(total::bigint) as total_storage, total as total_bytes
91+
FROM (
92+
SELECT
93+
coalesce((SELECT sum(pg_column_size(r.*)) FROM run r WHERE r.id IN (SELECT id FROM analyzed_runs)), 0) +
94+
coalesce((SELECT sum(pg_column_size(d.*)) FROM dataset d WHERE d.runid IN (SELECT id FROM analyzed_runs)), 0) +
95+
coalesce((SELECT sum(pg_column_size(lv.*)) FROM label_values lv WHERE lv.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
96+
coalesce((SELECT sum(pg_column_size(fp.*)) FROM fingerprint fp WHERE fp.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
97+
coalesce((SELECT sum(pg_column_size(dp.*)) FROM datapoint dp WHERE dp.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
98+
coalesce((SELECT sum(pg_column_size(c.*)) FROM change c WHERE c.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
99+
coalesce((SELECT sum(pg_column_size(cd.*)) FROM changedetection cd WHERE cd.variable_id IN (SELECT v.id FROM variable v WHERE v.testid = :testid)), 0)
100+
as total
101+
) t;
102+
103+
\echo ''
104+
\echo '--- Raw data vs computed ---'
105+
SELECT
106+
pg_size_pretty(raw_bytes::bigint) as raw_upload_data,
107+
pg_size_pretty((total_bytes - raw_bytes)::bigint) as computed_data,
108+
round(total_bytes::numeric / raw_bytes, 1) as overhead_ratio
109+
FROM (
110+
SELECT
111+
coalesce((SELECT sum(pg_column_size(r.data)) FROM run r WHERE r.id IN (SELECT id FROM analyzed_runs)), 0) as raw_bytes,
112+
coalesce((SELECT sum(pg_column_size(r.*)) FROM run r WHERE r.id IN (SELECT id FROM analyzed_runs)), 0) +
113+
coalesce((SELECT sum(pg_column_size(d.*)) FROM dataset d WHERE d.runid IN (SELECT id FROM analyzed_runs)), 0) +
114+
coalesce((SELECT sum(pg_column_size(lv.*)) FROM label_values lv WHERE lv.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
115+
coalesce((SELECT sum(pg_column_size(fp.*)) FROM fingerprint fp WHERE fp.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
116+
coalesce((SELECT sum(pg_column_size(dp.*)) FROM datapoint dp WHERE dp.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
117+
coalesce((SELECT sum(pg_column_size(c.*)) FROM change c WHERE c.dataset_id IN (SELECT id FROM analyzed_datasets)), 0) +
118+
coalesce((SELECT sum(pg_column_size(cd.*)) FROM changedetection cd WHERE cd.variable_id IN (SELECT v.id FROM variable v WHERE v.testid = :testid)), 0)
119+
as total_bytes
120+
) t;
121+
122+
DROP TABLE analyzed_runs;
123+
DROP TABLE analyzed_datasets;

0 commit comments

Comments
 (0)