Skip to content

Commit a4a2aea

Browse files
Merge branch '150-baden-würtenberg-lod2' into 'develop'
Resolve "Baden Würtenberg" Closes #150 See merge request need/NEED-infdb!121
2 parents 9ecfec4 + 513d47f commit a4a2aea

4 files changed

Lines changed: 257 additions & 43 deletions

File tree

configs/config-infdb-import.yml.template

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ infdb-import:
105105
tile_size_m: 2000
106106
filename_template: "{e_km}_{n_km}.gml"
107107

108+
baden_wuerttemberg:
109+
status: active
110+
state_prefix: "08"
111+
base_url: "https://opengeodata.lgl-bw.de/data/lod2/"
112+
tile_size_m: 2000
113+
tile_origin_offset_x_m: 1000
114+
tile_origin_offset_y_m: 0
115+
filename_template: "LoD2_32_{e_km}_{n_km}_2_bw.zip"
116+
108117
bkg:
109118
status: active
110119
path:

services/infdb-import/sql/bld.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ WITH base_buildings AS (
4545
GROUP BY f.id, f.objectclass_id, f.objectid
4646
HAVING MAX(CASE WHEN p.name = 'function' THEN p.val_string END) >= '31001_'
4747
AND MAX(CASE WHEN p.name = 'function' THEN p.val_string END) < '31002'
48-
-- AND MAX(CASE WHEN p.name = 'Gemeindeschluessel' THEN p.val_string END) IN ({ags})
48+
AND MAX(CASE WHEN p.name = 'Gemeindeschluessel' THEN p.val_string END) IN ({ags})
4949
)
5050
SELECT
5151
bb.ags_id,

services/infdb-import/src/lod2.py

Lines changed: 242 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,212 @@
11
import os
22
import sys
3+
from zipfile import ZipFile, BadZipFile
34

45
from infdb import InfDB
56
from shapely import wkt as shapely_wkt
67
from shapely.geometry import box
8+
import multiprocessing as mp
79

810
from . import utils
911

1012

11-
def _iter_tile_origins_for_geom(geom, tile_size_m: int):
12-
"""Yield lower-left tile origin coordinates (in meters) for all tiles
13+
def _urls_to_local_citygml_paths(urls: list[str], gml_path: str, log) -> list[str]:
14+
"""Resolve current-run URLs to existing local CityGML file paths.
15+
16+
Supports:
17+
- direct .gml files
18+
- .zip files containing .gml files
19+
20+
Important:
21+
This function only resolves files belonging to the current URL list.
22+
It does not import every file in the shared CityGML folder.
23+
"""
24+
local_files = []
25+
missing_files = []
26+
27+
for url in urls:
28+
filename = os.path.basename(url)
29+
local_path = os.path.join(gml_path, filename)
30+
31+
if not os.path.isfile(local_path):
32+
missing_files.append(local_path)
33+
continue
34+
35+
if filename.lower().endswith(".gml"):
36+
local_files.append(local_path)
37+
continue
38+
39+
if filename.lower().endswith(".zip"):
40+
try:
41+
with ZipFile(local_path, "r") as zf:
42+
gml_members = [
43+
member
44+
for member in zf.namelist()
45+
if member.lower().endswith(".gml")
46+
]
47+
48+
if not gml_members:
49+
log.warning("LoD2 ZIP contains no GML files: %s", local_path)
50+
continue
51+
52+
for member in gml_members:
53+
extracted_filename = os.path.basename(member)
54+
55+
if not extracted_filename:
56+
continue
57+
58+
extracted_path = os.path.join(gml_path, extracted_filename)
59+
60+
if not os.path.isfile(extracted_path):
61+
with zf.open(member) as src, open(extracted_path, "wb") as dst:
62+
dst.write(src.read())
63+
64+
local_files.append(extracted_path)
65+
66+
except BadZipFile:
67+
log.warning("LoD2 invalid ZIP file: %s", local_path)
68+
69+
continue
70+
71+
log.warning("LoD2 unsupported file type: %s", local_path)
72+
73+
if missing_files:
74+
log.warning("LoD2: %d expected files are missing after download.", len(missing_files))
75+
for path in missing_files[:20]:
76+
log.warning("Missing file: %s", path)
77+
if len(missing_files) > 20:
78+
log.warning("... and %d more missing files.", len(missing_files) - 20)
79+
80+
return sorted(set(local_files))
81+
82+
83+
def _chunk_list(items: list[str], chunk_size: int) -> list[list[str]]:
84+
"""Split items into fixed-size chunks."""
85+
if chunk_size <= 0:
86+
raise ValueError("chunk_size must be > 0")
87+
88+
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
89+
90+
91+
def _import_lod2_batch(
92+
batch_files: list[str],
93+
tool_name: str,
94+
batch_index: int,
95+
total_batches: int,
96+
) -> bool:
97+
"""Import one batch of GML files directly with citydb."""
98+
try:
99+
infdb = InfDB(tool_name=tool_name, config_path="../configs/config-infdb-import.yml")
100+
log = infdb.get_worker_logger()
101+
102+
if not batch_files:
103+
log.info("LoD2 batch %d/%d: empty batch, skipping.", batch_index, total_batches)
104+
return True
105+
106+
source_cfg = [infdb.get_toolname(), "sources", "lod2"]
107+
params = infdb.get_db_parameters_dict()
108+
import_mode = infdb.get_config_value(source_cfg + ["import-mode"]) or "skip"
109+
110+
log.info(
111+
"LoD2 batch %d/%d: importing %d files.",
112+
batch_index,
113+
total_batches,
114+
len(batch_files),
115+
)
116+
117+
cmd_parts = [
118+
"citydb",
119+
"import",
120+
"citygml",
121+
"-H",
122+
params["host"],
123+
"-d",
124+
params["db"],
125+
"-u",
126+
params["user"],
127+
"-p",
128+
params["password"],
129+
"-P",
130+
str(params["exposed_port"]),
131+
f"--import-mode={import_mode}",
132+
*batch_files,
133+
]
134+
135+
return_code = utils.do_cmd(infdb, cmd_parts)
136+
137+
if return_code != 0:
138+
log.error(
139+
"LoD2 batch %d/%d failed with return code %d.",
140+
batch_index,
141+
total_batches,
142+
return_code,
143+
)
144+
return False
145+
146+
log.info("LoD2 batch %d/%d imported successfully.", batch_index, total_batches)
147+
return True
148+
149+
except Exception:
150+
if "log" in locals():
151+
log.exception("LoD2 batch %d/%d failed unexpectedly.", batch_index, total_batches)
152+
return False
153+
154+
155+
def _import_lod2_files_in_parallel(
156+
infdb: InfDB,
157+
gml_files: list[str],
158+
batch_size: int = 200,
159+
processes: int | None = None,
160+
) -> bool:
161+
"""Import current-run GML files in parallel batches."""
162+
log = infdb.get_worker_logger()
163+
164+
if not gml_files:
165+
log.warning("LoD2: no GML files to import.")
166+
return True
167+
168+
batches = _chunk_list(gml_files, batch_size)
169+
total_batches = len(batches)
170+
171+
if processes is None:
172+
processes = utils.get_number_processes(infdb)
173+
174+
processes = max(1, min(processes, total_batches))
175+
176+
log.info(
177+
"LoD2: importing %d files in %d batches with %d worker(s).",
178+
len(gml_files),
179+
total_batches,
180+
processes,
181+
)
182+
183+
with mp.Pool(processes=processes) as pool:
184+
results = pool.starmap(
185+
_import_lod2_batch,
186+
[
187+
(batch, infdb.get_toolname(), i + 1, total_batches)
188+
for i, batch in enumerate(batches)
189+
],
190+
)
191+
192+
return all(results)
193+
194+
195+
def _iter_tile_origins_for_geom(
196+
geom,
197+
tile_size_m: int,
198+
offset_x_m: int = 0,
199+
offset_y_m: int = 0,
200+
):
201+
"""Yield lower-left tile origin coordinates in meters for all tiles
13202
intersecting the given geometry.
14203
"""
15204
minx, miny, maxx, maxy = geom.bounds
16-
start_x = int(minx // tile_size_m) * tile_size_m
17-
start_y = int(miny // tile_size_m) * tile_size_m
18-
end_x = int(maxx // tile_size_m) * tile_size_m
19-
end_y = int(maxy // tile_size_m) * tile_size_m
205+
206+
start_x = int((minx - offset_x_m) // tile_size_m) * tile_size_m + offset_x_m
207+
start_y = int((miny - offset_y_m) // tile_size_m) * tile_size_m + offset_y_m
208+
end_x = int((maxx - offset_x_m) // tile_size_m) * tile_size_m + offset_x_m
209+
end_y = int((maxy - offset_y_m) // tile_size_m) * tile_size_m + offset_y_m
20210

21211
for x in range(start_x, end_x + tile_size_m, tile_size_m):
22212
for y in range(start_y, end_y + tile_size_m, tile_size_m):
@@ -32,21 +222,20 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
32222
tiled OpenData sources, for example:
33223
- LoD2 NRW
34224
- LoD2 Bavaria
35-
- DGM1 Bavaria
225+
- LoD2 Baden-Württemberg
36226
37227
Expected config keys in region_cfg:
38228
- status: "active" / "not-active"
39229
- state_prefix: AGS prefix used to resolve the clip geometry
40230
- base_url: URL prefix of the tiled dataset
41-
- tile_size_m: tile size in meters (e.g. 1000 or 2000)
231+
- tile_size_m: tile size in meters
42232
- filename_template: filename pattern using:
43233
{e_km} = easting in km
44234
{n_km} = northing in km
45235
46-
Example filename_template values:
47-
- LoD2 NRW: "LoD2_32_{e_km}_{n_km}_1_NW.gml"
48-
- LoD2 Bavaria: "{e_km}_{n_km}.gml"
49-
- DGM1 Bavaria: "{e_km}_{n_km}.tif"
236+
Optional config keys:
237+
- tile_origin_offset_x_m
238+
- tile_origin_offset_y_m
50239
"""
51240
if region_cfg.get("status") != "active":
52241
log.info("%s: inactive, skipping.", region_name)
@@ -55,26 +244,37 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
55244
state_prefix = region_cfg.get("state_prefix")
56245
base_url = str(region_cfg.get("base_url", "")).rstrip("/") + "/"
57246
tile_size_m = int(region_cfg.get("tile_size_m") or 0)
247+
offset_x_m = int(region_cfg.get("tile_origin_offset_x_m") or 0)
248+
offset_y_m = int(region_cfg.get("tile_origin_offset_y_m") or 0)
58249
template = region_cfg.get("filename_template")
59250

60251
if not state_prefix or not base_url or not tile_size_m or not template:
61252
log.warning("%s: incomplete tiled dataset configuration, skipping.", region_name)
62253
return []
63254

64255
# Resolve the scoped geometry once for the configured state/region.
65-
# We use EPSG:25832 because the tile grids for these Bavaria/NRW datasets
66-
# are aligned in meter-based projected coordinates.
67-
clip_wkt, _, _ = utils.get_clip_geometry(target_crs=25832, infdb=infdb, state_prefix=state_prefix)
256+
# We use EPSG:25832 because these datasets use meter-based projected tiles.
257+
clip_wkt, _, _ = utils.get_clip_geometry(
258+
target_crs=25832,
259+
infdb=infdb,
260+
state_prefix=state_prefix,
261+
)
262+
68263
if not clip_wkt:
69264
log.info("%s: no scope geometry resolved for state prefix %s, skipping.", region_name, state_prefix)
70265
return []
71266

72267
scope_geom = shapely_wkt.loads(clip_wkt)
73268

74269
urls = []
75-
for x, y in _iter_tile_origins_for_geom(scope_geom, tile_size_m=tile_size_m):
270+
for x, y in _iter_tile_origins_for_geom(
271+
scope_geom,
272+
tile_size_m=tile_size_m,
273+
offset_x_m=offset_x_m,
274+
offset_y_m=offset_y_m,
275+
):
76276
# Convert tile origin coordinates from meters to kilometer indices,
77-
# because Bavaria/NRW filenames are based on km grid references.
277+
# because Bavaria/NRW/BW filenames are based on km grid references.
78278
fname = template.format(
79279
e_km=x // 1000,
80280
n_km=y // 1000,
@@ -86,6 +286,7 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
86286
return urls
87287

88288

289+
89290
def load(infdb: InfDB) -> bool:
90291
"""Download LoD2 CityGML tiles for all active configured regions, import them via citydb,
91292
then create the flat LoD2 building table.
@@ -95,6 +296,8 @@ def load(infdb: InfDB) -> bool:
95296
- Resolves scope geometry per region/state in EPSG:25832.
96297
- Computes intersecting tiles using regular grid logic.
97298
- Deduplicates URLs globally, so the same file is not downloaded twice.
299+
- Supports direct .gml files and .zip files containing .gml files.
300+
- Imports only the current-run files, not the whole folder.
98301
"""
99302
log = infdb.get_worker_logger()
100303

@@ -109,10 +312,12 @@ def load(infdb: InfDB) -> bool:
109312

110313
nrw_cfg = infdb.get_config_value(source_cfg + ["nrw"]) or {}
111314
bavaria_cfg = infdb.get_config_value(source_cfg + ["bavaria"]) or {}
315+
bw_cfg = infdb.get_config_value(source_cfg + ["baden_wuerttemberg"]) or {}
112316

113317
urls = []
114318
urls.extend(_build_urls_for_region("NRW", nrw_cfg, infdb, log))
115319
urls.extend(_build_urls_for_region("Bavaria", bavaria_cfg, infdb, log))
320+
urls.extend(_build_urls_for_region("Baden-Württemberg", bw_cfg, infdb, log))
116321

117322
urls = sorted(set(urls))
118323
log.info("LoD2: %d unique tiles to download across all active regions.", len(urls))
@@ -121,32 +326,29 @@ def load(infdb: InfDB) -> bool:
121326
log.warning("LoD2: no tiles resolved for any active region; skipping import.")
122327
return True
123328

124-
# Download all unique tiles into one shared folder
329+
# Download all unique tiles into one shared folder.
330+
# NRW / Bavaria download .gml files.
331+
# Baden-Württemberg downloads .zip files.
125332
utils.download_aria2c_many(infdb, urls, output_dir=gml_path)
126333

127-
# Import all downloaded CityGML files from the shared folder
128-
params = infdb.get_db_parameters_dict()
129-
import_mode = infdb.get_config_value(source_cfg + ["import-mode"]) or "skip"
334+
# Resolve only the files for the current run / current scope.
335+
# ZIP files are extracted into the same CityGML folder and their
336+
# extracted .gml files are returned.
337+
gml_files = _urls_to_local_citygml_paths(urls, gml_path, log)
130338

131-
cmd_parts = [
132-
"citydb",
133-
"import",
134-
"citygml",
135-
"-H",
136-
params["host"],
137-
"-d",
138-
params["db"],
139-
"-u",
140-
params["user"],
141-
"-p",
142-
params["password"],
143-
"-P",
144-
str(params["exposed_port"]),
145-
f"--import-mode={import_mode}",
146-
# "--log-level=warn",
147-
str(gml_path),
148-
]
149-
utils.do_cmd(infdb, cmd_parts)
339+
if not gml_files:
340+
log.warning("LoD2: no downloaded/extracted GML files found for current scope; skipping import.")
341+
return True
342+
343+
success = _import_lod2_files_in_parallel(
344+
infdb=infdb,
345+
gml_files=gml_files,
346+
batch_size=200,
347+
processes=utils.get_number_processes(infdb),
348+
)
349+
350+
if not success:
351+
raise RuntimeError("LoD2: one or more import batches failed")
150352

151353
# Create flat building table
152354
object_id_prefix = infdb.get_config_value(source_cfg + ["object_id_prefix"]) or "DE"

0 commit comments

Comments
 (0)