This repository was archived by the owner on Dec 16, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfmtm_algorithm.sql
More file actions
466 lines (419 loc) · 15.4 KB
/
fmtm_algorithm.sql
File metadata and controls
466 lines (419 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
DROP TABLE IF EXISTS polygonsnocount;
-- Create a new polygon layer of splits by lines
DO $$
DECLARE
lines_count INTEGER;
BEGIN
-- Check if ways_line has any data
SELECT COUNT(*) INTO lines_count
FROM "ways_line" l
WHERE (
(l.tags->>'highway' IS NOT NULL AND
l.tags->>'highway' NOT IN ('unclassified', 'residential', 'service', 'pedestrian', 'track', 'bus_guideway'))
OR l.tags->>'waterway' IS NOT NULL
OR l.tags->>'railway' IS NOT NULL
);
IF lines_count > 0 THEN
CREATE TABLE polygonsnocount AS (
-- The Area of Interest provided by the person creating the project
WITH aoi AS (
SELECT * FROM "project_aoi"
)
-- Extract all lines to be used as splitlines from a table of lines
-- with the schema from Underpass (all tags as jsonb column called 'tags')
-- TODO: add polygons (closed ways in OSM) with a 'highway' tag;
-- some features such as roundabouts appear as polygons.
-- TODO: add waterway polygons; now a beach doesn't show up as a splitline.
-- TODO: these tags should come from another table rather than hardcoded
-- so that they're easily configured during project creation.
, splitlines AS (
SELECT ST_Intersection(a.geom, l.geom) AS geom
FROM aoi a
JOIN "ways_line" l ON ST_Intersects(a.geom, l.geom)
WHERE (
(l.tags->>'highway' IS NOT NULL AND
l.tags->>'highway' NOT IN ('unclassified', 'residential', 'service', 'pedestrian', 'track', 'bus_guideway')) -- TODO: update(add/remove) this based on the requirements later
OR l.tags->>'waterway' IS NOT NULL
OR l.tags->>'railway' IS NOT NULL
)
)
-- SELECT * from lines_view l
-- WHERE (
-- (l.tags->>'highway' IS NOT NULL AND
-- l.tags->>'highway' NOT IN ('unclassified', 'residential', 'service', 'pedestrian', 'track', 'bus_guideway'))
-- OR l.tags->>'waterway' IS NOT NULL
-- OR l.tags->>'railway' IS NOT NULL
-- )
-- )
-- Merge all lines, necessary so that the polygonize function works later
,merged AS (
SELECT ST_LineMerge(ST_Union(splitlines.geom)) AS geom
FROM splitlines
)
-- Combine the boundary of the AOI with the splitlines
-- First extract the Area of Interest boundary as a line
,boundary AS (
SELECT ST_Boundary(geom) AS geom
FROM aoi
)
-- Then combine it with the splitlines
,comb AS (
SELECT ST_Union(boundary.geom, merged.geom) AS geom
FROM boundary, merged
)
-- TODO add closed ways from OSM to lines (roundabouts etc)
-- Create a polygon for each area enclosed by the splitlines
,splitpolysnoindex AS (
SELECT (ST_Dump(ST_Polygonize(comb.geom))).geom as geom
FROM comb
)
-- Add an index column to the split polygons
,splitpolygons AS(
SELECT
row_number () over () as polyid,
ST_Transform(spni.geom,4326)::geography AS geog,
spni.*
from splitpolysnoindex spni
)
SELECT * FROM splitpolygons
);
ELSE
-- Calculate number of buildings per cluster
CREATE TABLE polygonsnocount AS (
WITH aoi AS (
SELECT * FROM "project_aoi"
)
,transformed_aoi AS(
SELECT
row_number () over () as polyid,
ST_Transform(aoi.geom,4326)::geography AS geog,
aoi.geom
from aoi
)
SELECT * FROM transformed_aoi
);
END IF;
END $$;
-- Make that index column a primary key
ALTER TABLE polygonsnocount ADD PRIMARY KEY (polyid);
-- Properly register geometry column (makes QGIS happy)
SELECT POPULATE_GEOMETRY_COLUMNS('public.polygonsnocount'::regclass);
-- Add a spatial index (vastly improves performance for a lot of operations)
CREATE INDEX polygonsnocount_idx
ON polygonsnocount
USING gist (geom);
-- Clean up the table which may have gaps and stuff from spatial indexing
-- VACUUM ANALYZE polygonsnocount;
DROP TABLE IF EXISTS buildings;
CREATE TABLE buildings AS (
SELECT
b.*,
polys.polyid
FROM ways_poly AS b, polygonsnocount AS polys
WHERE
ST_INTERSECTS(polys.geom, ST_CENTROID(b.geom))
AND b.tags ->> 'building' IS NOT NULL
);
-- ALTER TABLE buildings ADD PRIMARY KEY(osm_id);
-- Properly register geometry column (makes QGIS happy)
SELECT POPULATE_GEOMETRY_COLUMNS('public.buildings'::regclass);
-- Add a spatial index (vastly improves performance for a lot of operations)
CREATE INDEX buildings_idx
ON buildings
USING gist (geom);
-- Clean up the table which may have gaps and stuff from spatial indexing
-- VACUUM ANALYZE buildings;
DROP TABLE IF EXISTS splitpolygons;
CREATE TABLE splitpolygons AS (
WITH polygonsfeaturecount AS (
SELECT
sp.polyid,
sp.geom,
sp.geog,
COUNT(b.geom) AS numfeatures,
ST_AREA(sp.geog) AS area
FROM polygonsnocount AS sp
LEFT JOIN buildings AS b
ON sp.polyid = b.polyid
GROUP BY sp.polyid, sp.geom
)
SELECT * FROM polygonsfeaturecount
);
ALTER TABLE splitpolygons ADD PRIMARY KEY (polyid);
SELECT POPULATE_GEOMETRY_COLUMNS('public.splitpolygons'::regclass);
CREATE INDEX splitpolygons_idx
ON splitpolygons
USING gist (geom);
-- VACUUM ANALYZE splitpolygons;
DROP TABLE polygonsnocount;
-- DROP TABLE IF EXISTS lowfeaturecountpolygons;
-- CREATE TABLE lowfeaturecountpolygons AS (
-- -- Grab the polygons with fewer than the requisite number of features
-- WITH lowfeaturecountpolys AS (
-- SELECT *
-- FROM splitpolygons AS p
-- -- TODO: feature count should not be hard-coded
-- WHERE p.numfeatures < %(num_buildings)s
-- ),
-- -- Find the neighbors of the low-feature-count polygons
-- -- Store their ids as n_polyid, numfeatures as n_numfeatures, etc
-- allneighborlist AS (
-- SELECT
-- p.*,
-- pf.polyid AS n_polyid,
-- pf.area AS n_area,
-- p.numfeatures AS n_numfeatures,
-- -- length of shared boundary to make nice merge decisions
-- ST_LENGTH2D(ST_INTERSECTION(p.geom, pf.geom)) AS sharedbound
-- FROM lowfeaturecountpolys AS p
-- INNER JOIN splitpolygons AS pf
-- -- Anything that touches
-- ON ST_TOUCHES(p.geom, pf.geom)
-- -- But eliminate those whose intersection is a point, because
-- -- polygons that only touch at a corner shouldn't be merged
-- AND ST_GEOMETRYTYPE(ST_INTERSECTION(p.geom, pf.geom)) != 'ST_Point'
-- -- Sort first by polyid of the low-feature-count polygons
-- -- Then by descending featurecount and area of the
-- -- high-feature-count neighbors (area is in case of equal
-- -- featurecounts, we'll just pick the biggest to add to)
-- ORDER BY p.polyid ASC, p.numfeatures DESC, pf.area DESC
-- -- OR, maybe for more aesthetic merges:
-- -- order by p.polyid, sharedbound desc
-- )
-- SELECT DISTINCT ON (a.polyid) * FROM allneighborlist AS a
-- );
-- ALTER TABLE lowfeaturecountpolygons ADD PRIMARY KEY (polyid);
-- SELECT POPULATE_GEOMETRY_COLUMNS('public.lowfeaturecountpolygons'::regclass);
-- CREATE INDEX lowfeaturecountpolygons_idx
-- ON lowfeaturecountpolygons
-- USING gist (geom);
-- VACUUM ANALYZE lowfeaturecountpolygons;
DROP TABLE IF EXISTS clusteredbuildings;
CREATE TABLE clusteredbuildings AS (
WITH splitpolygonswithcontents AS (
SELECT *
FROM splitpolygons AS sp
WHERE sp.numfeatures > 0
),
-- Add the count of features in the splitpolygon each building belongs to
-- to the buildings table; sets us up to be able to run the clustering.
buildingswithcount AS (
SELECT
b.*,
p.numfeatures
FROM buildings AS b
LEFT JOIN splitpolygons AS p
ON b.polyid = p.polyid
),
-- Cluster the buildings within each splitpolygon. The second term in the
-- call to the ST_ClusterKMeans function is the number of clusters to
-- create, so we're dividing the number of features by a constant
-- (10 in this case) to get the number of clusters required to get close
-- to the right number of features per cluster.
-- TODO: This should certainly not be a hardcoded, the number of features
-- per cluster should come from a project configuration table
buildingstocluster AS (
SELECT * FROM buildingswithcount AS bc
WHERE bc.numfeatures > 0
),
clusteredbuildingsnocombineduid AS (
SELECT
*,
ST_CLUSTERKMEANS(
b.geom,
CAST((b.numfeatures / b.%(num_buildings)s) + 1 AS integer)
)
OVER (PARTITION BY b.polyid)
AS cid
FROM buildingstocluster AS b
),
-- uid combining the id of the outer splitpolygon and inner cluster
clusteredbuildings AS (
SELECT
*,
polyid::text || '-' || cid AS clusteruid
FROM clusteredbuildingsnocombineduid
)
SELECT * FROM clusteredbuildings
);
-- ALTER TABLE clusteredbuildings ADD PRIMARY KEY(osm_id);
SELECT POPULATE_GEOMETRY_COLUMNS('public.clusteredbuildings'::regclass);
CREATE INDEX clusteredbuildings_idx
ON clusteredbuildings
USING gist (geom);
-- VACUUM ANALYZE clusteredbuildings;
DROP TABLE IF EXISTS dumpedpoints;
CREATE TABLE dumpedpoints AS (
SELECT
cb.osm_id,
cb.polyid,
cb.cid,
cb.clusteruid,
-- POSSIBLE BUG: PostGIS' Voronoi implementation seems to panic
-- with segments less than 0.00004 degrees.
-- Should probably use geography instead of geometry
(ST_DUMPPOINTS(ST_SEGMENTIZE(cb.geom, 0.00004))).geom
FROM clusteredbuildings AS cb
);
SELECT POPULATE_GEOMETRY_COLUMNS('public.dumpedpoints'::regclass);
CREATE INDEX dumpedpoints_idx
ON dumpedpoints
USING gist (geom);
-- VACUUM ANALYZE dumpedpoints;
DROP TABLE IF EXISTS voronoids;
CREATE TABLE voronoids AS (
SELECT
ST_INTERSECTION((ST_DUMP(ST_VORONOIPOLYGONS(
ST_COLLECT(points.geom)
))).geom,
sp.geom) AS geom
FROM dumpedpoints AS points,
splitpolygons AS sp
WHERE ST_CONTAINS(sp.geom, points.geom)
GROUP BY sp.geom
);
CREATE INDEX voronoids_idx
ON voronoids
USING gist (geom);
-- VACUUM ANALYZE voronoids;
DROP TABLE IF EXISTS voronois;
CREATE TABLE voronois AS (
SELECT
p.clusteruid,
v.geom
FROM voronoids AS v, dumpedpoints AS p
WHERE ST_WITHIN(p.geom, v.geom)
);
CREATE INDEX voronois_idx
ON voronois
USING gist (geom);
-- VACUUM ANALYZE voronois;
DROP TABLE voronoids;
DROP TABLE IF EXISTS unsimplifiedtaskpolygons;
CREATE TABLE unsimplifiedtaskpolygons AS (
SELECT
clusteruid,
ST_UNION(geom) AS geom
FROM voronois
GROUP BY clusteruid
);
CREATE INDEX unsimplifiedtaskpolygons_idx
ON unsimplifiedtaskpolygons
USING gist (geom);
--VACUUM ANALYZE unsimplifiedtaskpolygons;
--*****************************Simplify*******************************
-- Extract unique line segments
DROP TABLE IF EXISTS taskpolygons;
CREATE TABLE taskpolygons AS (
--Convert task polygon boundaries to linestrings
WITH rawlines AS (
SELECT
utp.clusteruid,
ST_BOUNDARY(utp.geom) AS geom
FROM unsimplifiedtaskpolygons AS utp
),
-- Union, which eliminates duplicates from adjacent polygon boundaries
unionlines AS (
SELECT ST_UNION(l.geom) AS geom FROM rawlines AS l
),
-- Dump, which gives unique segments.
segments AS (
SELECT (ST_DUMP(l.geom)).geom AS geom
FROM unionlines AS l
),
agglomerated AS (
SELECT ST_LINEMERGE(ST_UNARYUNION(ST_COLLECT(s.geom))) AS geom
FROM segments AS s
),
simplifiedlines AS (
SELECT ST_SIMPLIFY(a.geom, 0.000075) AS geom
FROM agglomerated AS a
),
taskpolygonsnoindex AS (
SELECT (ST_DUMP(ST_POLYGONIZE(s.geom))).geom AS geom
FROM simplifiedlines AS s
)
SELECT
tpni.*,
ROW_NUMBER() OVER () AS taskid
FROM taskpolygonsnoindex AS tpni
);
ALTER TABLE taskpolygons ADD PRIMARY KEY (taskid);
SELECT POPULATE_GEOMETRY_COLUMNS('public.taskpolygons'::regclass);
CREATE INDEX taskpolygons_idx
ON taskpolygons
USING gist (geom);
-- Merge least feature polygons with neighbouring polygons
DO $$
DECLARE
num_buildings INTEGER := %(num_buildings)s;
min_area NUMERIC; -- Set the minimum area threshold
mean_area NUMERIC;
stddev_area NUMERIC; -- Set the standard deviation
min_buildings INTEGER; -- Set the minimum number of buildings threshold
small_polygon RECORD; -- set small_polygon and nearest_neighbor as record
nearest_neighbor RECORD; -- in order to use them in the loop
BEGIN
min_buildings := num_buildings * 0.5;
-- Find the mean and standard deviation of the area
SELECT
AVG(ST_Area(geom)),
STDDEV_POP(ST_Area(geom))
INTO mean_area, stddev_area
FROM taskpolygons;
-- Set the threshold as mean - standard deviation
min_area := mean_area - stddev_area;
DROP TABLE IF EXISTS leastfeaturepolygons;
CREATE TABLE leastfeaturepolygons AS
SELECT taskid, geom
FROM taskpolygons
WHERE ST_Area(geom) < min_area OR (
SELECT COUNT(b.id) FROM buildings b
WHERE ST_Contains(taskpolygons.geom, b.geom)
) < min_buildings; -- find least feature polygons based on the area and feature
FOR small_polygon IN
SELECT * FROM leastfeaturepolygons
LOOP
-- Find the nearest neighbor to merge the small polygon with
FOR nearest_neighbor IN
SELECT taskid, geom, ST_LENGTH2D(ST_Intersection(small_polygon.geom, geom)) as shared_bound
FROM taskpolygons
WHERE taskid NOT IN (SELECT taskid FROM leastfeaturepolygons)
AND ST_Touches(small_polygon.geom, geom)
AND ST_GEOMETRYTYPE(ST_INTERSECTION(small_polygon.geom, geom)) != 'ST_Point'
ORDER BY shared_bound DESC -- Find neighbor polygon based on shared boundary distance
LIMIT 1
LOOP
-- Merge the small polygon into the neighboring polygon
UPDATE taskpolygons
SET geom = ST_Union(geom, small_polygon.geom)
WHERE taskid = nearest_neighbor.taskid;
DELETE FROM taskpolygons WHERE taskid = small_polygon.taskid;
-- Exit the neighboring polygon loop after one successful merge
EXIT;
END LOOP;
END LOOP;
END $$;
DROP TABLE IF EXISTS leastfeaturepolygons;
-- VACUUM ANALYZE taskpolygons;
-- Generate GeoJSON output
SELECT
JSONB_BUILD_OBJECT(
'type', 'FeatureCollection',
'features', JSONB_AGG(feature)
)
FROM (
SELECT
JSONB_BUILD_OBJECT(
'type', 'Feature',
'geometry', ST_ASGEOJSON(t.geom)::jsonb,
'properties', JSONB_BUILD_OBJECT(
'building_count', (
SELECT COUNT(b.id)
FROM buildings AS b
WHERE ST_CONTAINS(t.geom, b.geom)
)
)
) AS feature
FROM taskpolygons AS t
) AS features;