Skip to content

Commit 59e4289

Browse files
jhfclaude
andcommitted
perf: Optimize statistical_unit_stats and relevant_statistical_units
Eliminate full-table CTE materialization that caused O(n) overhead: - relevant_statistical_units: resolve enterprise via temporal PK index, then join back once for full rows instead of materializing all valid units (100+ columns each) in a CTE scanned repeatedly - statistical_unit_stats: bypass relevant_statistical_units entirely, fetching only the 6 needed columns with the same identity-resolution pattern Add EXPLAIN ANALYZE benchmarks to test 109 writing to perf file. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b052514 commit 59e4289

5 files changed

+652
-20
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
-- Down Migration 20260224113002: optimize_statistical_unit_stats_and_relevant_statistical_units
2+
BEGIN;
3+
4+
-- Restore original relevant_statistical_units: full-table CTE approach
5+
CREATE OR REPLACE FUNCTION public.relevant_statistical_units(unit_type statistical_unit_type, unit_id integer, valid_on date DEFAULT CURRENT_DATE)
6+
RETURNS SETOF statistical_unit
7+
LANGUAGE sql
8+
STABLE
9+
AS $relevant_statistical_units$
10+
WITH valid_units AS (
11+
SELECT * FROM public.statistical_unit
12+
WHERE valid_from <= $3 AND $3 < valid_until
13+
), root_unit AS (
14+
SELECT * FROM valid_units
15+
WHERE unit_type = 'enterprise'
16+
AND unit_id = public.statistical_unit_enterprise_id($1, $2, $3)
17+
), related_units AS (
18+
SELECT * FROM valid_units
19+
WHERE unit_type = 'legal_unit'
20+
AND unit_id IN (SELECT unnest(related_legal_unit_ids) FROM root_unit)
21+
UNION ALL
22+
SELECT * FROM valid_units
23+
WHERE unit_type = 'establishment'
24+
AND unit_id IN (SELECT unnest(related_establishment_ids) FROM root_unit)
25+
), relevant_units AS (
26+
SELECT * FROM root_unit
27+
UNION ALL
28+
SELECT * FROM related_units
29+
), ordered_units AS (
30+
SELECT ru.*
31+
, first_external.ident AS first_external_ident
32+
FROM relevant_units ru
33+
LEFT JOIN LATERAL (
34+
SELECT eit.code, (ru.external_idents->>eit.code)::text AS ident
35+
FROM public.external_ident_type eit
36+
ORDER BY eit.priority
37+
LIMIT 1
38+
) first_external ON true
39+
ORDER BY unit_type, first_external_ident NULLS LAST, unit_id
40+
)
41+
SELECT unit_type
42+
, unit_id
43+
, valid_from
44+
, valid_to
45+
, valid_until
46+
, external_idents
47+
, name
48+
, birth_date
49+
, death_date
50+
, search
51+
, primary_activity_category_id
52+
, primary_activity_category_path
53+
, primary_activity_category_code
54+
, secondary_activity_category_id
55+
, secondary_activity_category_path
56+
, secondary_activity_category_code
57+
, activity_category_paths
58+
, sector_id
59+
, sector_path
60+
, sector_code
61+
, sector_name
62+
, data_source_ids
63+
, data_source_codes
64+
, legal_form_id
65+
, legal_form_code
66+
, legal_form_name
67+
--
68+
, physical_address_part1
69+
, physical_address_part2
70+
, physical_address_part3
71+
, physical_postcode
72+
, physical_postplace
73+
, physical_region_id
74+
, physical_region_path
75+
, physical_region_code
76+
, physical_country_id
77+
, physical_country_iso_2
78+
, physical_latitude
79+
, physical_longitude
80+
, physical_altitude
81+
--
82+
, domestic
83+
--
84+
, postal_address_part1
85+
, postal_address_part2
86+
, postal_address_part3
87+
, postal_postcode
88+
, postal_postplace
89+
, postal_region_id
90+
, postal_region_path
91+
, postal_region_code
92+
, postal_country_id
93+
, postal_country_iso_2
94+
, postal_latitude
95+
, postal_longitude
96+
, postal_altitude
97+
--
98+
, web_address
99+
, email_address
100+
, phone_number
101+
, landline
102+
, mobile_number
103+
, fax_number
104+
--
105+
, unit_size_id
106+
, unit_size_code
107+
--
108+
, status_id
109+
, status_code
110+
, used_for_counting
111+
--
112+
, last_edit_comment
113+
, last_edit_by_user_id
114+
, last_edit_at
115+
--
116+
, has_legal_unit
117+
, related_establishment_ids
118+
, excluded_establishment_ids
119+
, included_establishment_ids
120+
, related_legal_unit_ids
121+
, excluded_legal_unit_ids
122+
, included_legal_unit_ids
123+
, related_enterprise_ids
124+
, excluded_enterprise_ids
125+
, included_enterprise_ids
126+
, stats
127+
, stats_summary
128+
, included_establishment_count
129+
, included_legal_unit_count
130+
, included_enterprise_count
131+
, tag_paths
132+
, daterange(valid_from, valid_until) AS valid_range
133+
, report_partition_seq
134+
FROM ordered_units;
135+
$relevant_statistical_units$;
136+
137+
-- Restore original statistical_unit_stats: delegates to relevant_statistical_units
138+
CREATE OR REPLACE FUNCTION public.statistical_unit_stats(
139+
unit_type public.statistical_unit_type,
140+
unit_id INTEGER,
141+
valid_on DATE DEFAULT current_date
142+
) RETURNS SETOF public.statistical_unit_stats LANGUAGE sql STABLE AS $statistical_unit_stats$
143+
SELECT unit_type, unit_id, valid_from, valid_to, stats, stats_summary FROM public.relevant_statistical_units(unit_type, unit_id, valid_on);
144+
$statistical_unit_stats$;
145+
146+
END;
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
-- Migration 20260224113002: optimize_statistical_unit_stats_and_relevant_statistical_units
2+
--
3+
-- Performance optimization: eliminate full-table CTE materialization in
4+
-- relevant_statistical_units and statistical_unit_stats.
5+
--
6+
-- Before: valid_units CTE materializes ALL valid rows (100+ columns, width ~6KB each),
7+
-- then scans every row calling statistical_unit_enterprise_id() per row = O(n).
8+
--
9+
-- After: Call enterprise_id() once, look up root by temporal PK index,
10+
-- resolve related IDs from arrays, join back once for full rows.
11+
-- statistical_unit_stats bypasses relevant_statistical_units entirely,
12+
-- fetching only 6 needed columns.
13+
BEGIN;
14+
15+
-- Optimized relevant_statistical_units: targeted PK lookups instead of full-table CTE
16+
CREATE OR REPLACE FUNCTION public.relevant_statistical_units(unit_type statistical_unit_type, unit_id integer, valid_on date DEFAULT CURRENT_DATE)
17+
RETURNS SETOF statistical_unit
18+
LANGUAGE sql
19+
STABLE
20+
AS $relevant_statistical_units$
21+
-- Step 1: Find the enterprise row directly via temporal PK index
22+
WITH root_unit AS (
23+
SELECT su.unit_type, su.unit_id,
24+
su.related_legal_unit_ids,
25+
su.related_establishment_ids,
26+
su.external_idents
27+
FROM public.statistical_unit AS su
28+
WHERE su.unit_type = 'enterprise'
29+
AND su.unit_id = public.statistical_unit_enterprise_id($1, $2, $3)
30+
AND su.valid_from <= $3 AND $3 < su.valid_until
31+
-- Step 2: Collect all relevant (unit_type, unit_id) pairs from arrays
32+
), relevant_ids AS (
33+
SELECT 'enterprise'::statistical_unit_type AS unit_type, ru.unit_id FROM root_unit AS ru
34+
UNION ALL
35+
SELECT 'legal_unit'::statistical_unit_type, unnest(ru.related_legal_unit_ids) FROM root_unit AS ru
36+
UNION ALL
37+
SELECT 'establishment'::statistical_unit_type, unnest(ru.related_establishment_ids) FROM root_unit AS ru
38+
-- Step 3: Single join back to get full rows, ordered by external ident priority
39+
), full_units AS (
40+
SELECT su.*
41+
, first_external.ident AS first_external_ident
42+
FROM relevant_ids AS ri
43+
JOIN public.statistical_unit AS su
44+
ON su.unit_type = ri.unit_type
45+
AND su.unit_id = ri.unit_id
46+
AND su.valid_from <= $3 AND $3 < su.valid_until
47+
LEFT JOIN LATERAL (
48+
SELECT eit.code, (su.external_idents->>eit.code)::text AS ident
49+
FROM public.external_ident_type AS eit
50+
ORDER BY eit.priority
51+
LIMIT 1
52+
) first_external ON true
53+
ORDER BY su.unit_type, first_external_ident NULLS LAST, su.unit_id
54+
)
55+
SELECT unit_type
56+
, unit_id
57+
, valid_from
58+
, valid_to
59+
, valid_until
60+
, external_idents
61+
, name
62+
, birth_date
63+
, death_date
64+
, search
65+
, primary_activity_category_id
66+
, primary_activity_category_path
67+
, primary_activity_category_code
68+
, secondary_activity_category_id
69+
, secondary_activity_category_path
70+
, secondary_activity_category_code
71+
, activity_category_paths
72+
, sector_id
73+
, sector_path
74+
, sector_code
75+
, sector_name
76+
, data_source_ids
77+
, data_source_codes
78+
, legal_form_id
79+
, legal_form_code
80+
, legal_form_name
81+
--
82+
, physical_address_part1
83+
, physical_address_part2
84+
, physical_address_part3
85+
, physical_postcode
86+
, physical_postplace
87+
, physical_region_id
88+
, physical_region_path
89+
, physical_region_code
90+
, physical_country_id
91+
, physical_country_iso_2
92+
, physical_latitude
93+
, physical_longitude
94+
, physical_altitude
95+
--
96+
, domestic
97+
--
98+
, postal_address_part1
99+
, postal_address_part2
100+
, postal_address_part3
101+
, postal_postcode
102+
, postal_postplace
103+
, postal_region_id
104+
, postal_region_path
105+
, postal_region_code
106+
, postal_country_id
107+
, postal_country_iso_2
108+
, postal_latitude
109+
, postal_longitude
110+
, postal_altitude
111+
--
112+
, web_address
113+
, email_address
114+
, phone_number
115+
, landline
116+
, mobile_number
117+
, fax_number
118+
--
119+
, unit_size_id
120+
, unit_size_code
121+
--
122+
, status_id
123+
, status_code
124+
, used_for_counting
125+
--
126+
, last_edit_comment
127+
, last_edit_by_user_id
128+
, last_edit_at
129+
--
130+
, has_legal_unit
131+
, related_establishment_ids
132+
, excluded_establishment_ids
133+
, included_establishment_ids
134+
, related_legal_unit_ids
135+
, excluded_legal_unit_ids
136+
, included_legal_unit_ids
137+
, related_enterprise_ids
138+
, excluded_enterprise_ids
139+
, included_enterprise_ids
140+
, stats
141+
, stats_summary
142+
, included_establishment_count
143+
, included_legal_unit_count
144+
, included_enterprise_count
145+
, tag_paths
146+
, daterange(valid_from, valid_until) AS valid_range
147+
, report_partition_seq
148+
FROM full_units;
149+
$relevant_statistical_units$;
150+
151+
-- Optimized statistical_unit_stats: bypass relevant_statistical_units entirely,
152+
-- fetching only the 6 columns needed instead of 100+.
153+
CREATE OR REPLACE FUNCTION public.statistical_unit_stats(
154+
unit_type public.statistical_unit_type,
155+
unit_id INTEGER,
156+
valid_on DATE DEFAULT current_date
157+
) RETURNS SETOF public.statistical_unit_stats LANGUAGE sql STABLE AS $statistical_unit_stats$
158+
WITH root_unit AS (
159+
SELECT su.unit_id,
160+
su.related_legal_unit_ids,
161+
su.related_establishment_ids
162+
FROM public.statistical_unit AS su
163+
WHERE su.unit_type = 'enterprise'
164+
AND su.unit_id = public.statistical_unit_enterprise_id($1, $2, $3)
165+
AND su.valid_from <= $3 AND $3 < su.valid_until
166+
), relevant_ids AS (
167+
SELECT 'enterprise'::statistical_unit_type AS unit_type, ru.unit_id FROM root_unit AS ru
168+
UNION ALL
169+
SELECT 'legal_unit'::statistical_unit_type, unnest(ru.related_legal_unit_ids) FROM root_unit AS ru
170+
UNION ALL
171+
SELECT 'establishment'::statistical_unit_type, unnest(ru.related_establishment_ids) FROM root_unit AS ru
172+
)
173+
SELECT su.unit_type, su.unit_id, su.valid_from, su.valid_to, su.stats, su.stats_summary
174+
FROM relevant_ids AS ri
175+
JOIN public.statistical_unit AS su
176+
ON su.unit_type = ri.unit_type
177+
AND su.unit_id = ri.unit_id
178+
AND su.valid_from <= $3 AND $3 < su.valid_until
179+
ORDER BY su.unit_type, su.unit_id;
180+
$statistical_unit_stats$;
181+
182+
END;

0 commit comments

Comments
 (0)