11import os
22import sys
3+ from zipfile import ZipFile , BadZipFile
34
45from infdb import InfDB
56from shapely import wkt as shapely_wkt
67from shapely .geometry import box
8+ import multiprocessing as mp
79
810from . import utils
911
1012
11- def _iter_tile_origins_for_geom (geom , tile_size_m : int ):
12- """Yield lower-left tile origin coordinates (in meters) for all tiles
13+ def _urls_to_local_citygml_paths (urls : list [str ], gml_path : str , log ) -> list [str ]:
14+ """Resolve current-run URLs to existing local CityGML file paths.
15+
16+ Supports:
17+ - direct .gml files
18+ - .zip files containing .gml files
19+
20+ Important:
21+ This function only resolves files belonging to the current URL list.
22+ It does not import every file in the shared CityGML folder.
23+ """
24+ local_files = []
25+ missing_files = []
26+
27+ for url in urls :
28+ filename = os .path .basename (url )
29+ local_path = os .path .join (gml_path , filename )
30+
31+ if not os .path .isfile (local_path ):
32+ missing_files .append (local_path )
33+ continue
34+
35+ if filename .lower ().endswith (".gml" ):
36+ local_files .append (local_path )
37+ continue
38+
39+ if filename .lower ().endswith (".zip" ):
40+ try :
41+ with ZipFile (local_path , "r" ) as zf :
42+ gml_members = [
43+ member
44+ for member in zf .namelist ()
45+ if member .lower ().endswith (".gml" )
46+ ]
47+
48+ if not gml_members :
49+ log .warning ("LoD2 ZIP contains no GML files: %s" , local_path )
50+ continue
51+
52+ for member in gml_members :
53+ extracted_filename = os .path .basename (member )
54+
55+ if not extracted_filename :
56+ continue
57+
58+ extracted_path = os .path .join (gml_path , extracted_filename )
59+
60+ if not os .path .isfile (extracted_path ):
61+ with zf .open (member ) as src , open (extracted_path , "wb" ) as dst :
62+ dst .write (src .read ())
63+
64+ local_files .append (extracted_path )
65+
66+ except BadZipFile :
67+ log .warning ("LoD2 invalid ZIP file: %s" , local_path )
68+
69+ continue
70+
71+ log .warning ("LoD2 unsupported file type: %s" , local_path )
72+
73+ if missing_files :
74+ log .warning ("LoD2: %d expected files are missing after download." , len (missing_files ))
75+ for path in missing_files [:20 ]:
76+ log .warning ("Missing file: %s" , path )
77+ if len (missing_files ) > 20 :
78+ log .warning ("... and %d more missing files." , len (missing_files ) - 20 )
79+
80+ return sorted (set (local_files ))
81+
82+
83+ def _chunk_list (items : list [str ], chunk_size : int ) -> list [list [str ]]:
84+ """Split items into fixed-size chunks."""
85+ if chunk_size <= 0 :
86+ raise ValueError ("chunk_size must be > 0" )
87+
88+ return [items [i :i + chunk_size ] for i in range (0 , len (items ), chunk_size )]
89+
90+
91+ def _import_lod2_batch (
92+ batch_files : list [str ],
93+ tool_name : str ,
94+ batch_index : int ,
95+ total_batches : int ,
96+ ) -> bool :
97+ """Import one batch of GML files directly with citydb."""
98+ try :
99+ infdb = InfDB (tool_name = tool_name , config_path = "../configs/config-infdb-import.yml" )
100+ log = infdb .get_worker_logger ()
101+
102+ if not batch_files :
103+ log .info ("LoD2 batch %d/%d: empty batch, skipping." , batch_index , total_batches )
104+ return True
105+
106+ source_cfg = [infdb .get_toolname (), "sources" , "lod2" ]
107+ params = infdb .get_db_parameters_dict ()
108+ import_mode = infdb .get_config_value (source_cfg + ["import-mode" ]) or "skip"
109+
110+ log .info (
111+ "LoD2 batch %d/%d: importing %d files." ,
112+ batch_index ,
113+ total_batches ,
114+ len (batch_files ),
115+ )
116+
117+ cmd_parts = [
118+ "citydb" ,
119+ "import" ,
120+ "citygml" ,
121+ "-H" ,
122+ params ["host" ],
123+ "-d" ,
124+ params ["db" ],
125+ "-u" ,
126+ params ["user" ],
127+ "-p" ,
128+ params ["password" ],
129+ "-P" ,
130+ str (params ["exposed_port" ]),
131+ f"--import-mode={ import_mode } " ,
132+ * batch_files ,
133+ ]
134+
135+ return_code = utils .do_cmd (infdb , cmd_parts )
136+
137+ if return_code != 0 :
138+ log .error (
139+ "LoD2 batch %d/%d failed with return code %d." ,
140+ batch_index ,
141+ total_batches ,
142+ return_code ,
143+ )
144+ return False
145+
146+ log .info ("LoD2 batch %d/%d imported successfully." , batch_index , total_batches )
147+ return True
148+
149+ except Exception :
150+ if "log" in locals ():
151+ log .exception ("LoD2 batch %d/%d failed unexpectedly." , batch_index , total_batches )
152+ return False
153+
154+
155+ def _import_lod2_files_in_parallel (
156+ infdb : InfDB ,
157+ gml_files : list [str ],
158+ batch_size : int = 200 ,
159+ processes : int | None = None ,
160+ ) -> bool :
161+ """Import current-run GML files in parallel batches."""
162+ log = infdb .get_worker_logger ()
163+
164+ if not gml_files :
165+ log .warning ("LoD2: no GML files to import." )
166+ return True
167+
168+ batches = _chunk_list (gml_files , batch_size )
169+ total_batches = len (batches )
170+
171+ if processes is None :
172+ processes = utils .get_number_processes (infdb )
173+
174+ processes = max (1 , min (processes , total_batches ))
175+
176+ log .info (
177+ "LoD2: importing %d files in %d batches with %d worker(s)." ,
178+ len (gml_files ),
179+ total_batches ,
180+ processes ,
181+ )
182+
183+ with mp .Pool (processes = processes ) as pool :
184+ results = pool .starmap (
185+ _import_lod2_batch ,
186+ [
187+ (batch , infdb .get_toolname (), i + 1 , total_batches )
188+ for i , batch in enumerate (batches )
189+ ],
190+ )
191+
192+ return all (results )
193+
194+
195+ def _iter_tile_origins_for_geom (
196+ geom ,
197+ tile_size_m : int ,
198+ offset_x_m : int = 0 ,
199+ offset_y_m : int = 0 ,
200+ ):
201+ """Yield lower-left tile origin coordinates in meters for all tiles
13202 intersecting the given geometry.
14203 """
15204 minx , miny , maxx , maxy = geom .bounds
16- start_x = int (minx // tile_size_m ) * tile_size_m
17- start_y = int (miny // tile_size_m ) * tile_size_m
18- end_x = int (maxx // tile_size_m ) * tile_size_m
19- end_y = int (maxy // tile_size_m ) * tile_size_m
205+
206+ start_x = int ((minx - offset_x_m ) // tile_size_m ) * tile_size_m + offset_x_m
207+ start_y = int ((miny - offset_y_m ) // tile_size_m ) * tile_size_m + offset_y_m
208+ end_x = int ((maxx - offset_x_m ) // tile_size_m ) * tile_size_m + offset_x_m
209+ end_y = int ((maxy - offset_y_m ) // tile_size_m ) * tile_size_m + offset_y_m
20210
21211 for x in range (start_x , end_x + tile_size_m , tile_size_m ):
22212 for y in range (start_y , end_y + tile_size_m , tile_size_m ):
@@ -32,21 +222,20 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
32222 tiled OpenData sources, for example:
33223 - LoD2 NRW
34224 - LoD2 Bavaria
35- - DGM1 Bavaria
225+ - LoD2 Baden-Württemberg
36226
37227 Expected config keys in region_cfg:
38228 - status: "active" / "not-active"
39229 - state_prefix: AGS prefix used to resolve the clip geometry
40230 - base_url: URL prefix of the tiled dataset
41- - tile_size_m: tile size in meters (e.g. 1000 or 2000)
231+ - tile_size_m: tile size in meters
42232 - filename_template: filename pattern using:
43233 {e_km} = easting in km
44234 {n_km} = northing in km
45235
46- Example filename_template values:
47- - LoD2 NRW: "LoD2_32_{e_km}_{n_km}_1_NW.gml"
48- - LoD2 Bavaria: "{e_km}_{n_km}.gml"
49- - DGM1 Bavaria: "{e_km}_{n_km}.tif"
236+ Optional config keys:
237+ - tile_origin_offset_x_m
238+ - tile_origin_offset_y_m
50239 """
51240 if region_cfg .get ("status" ) != "active" :
52241 log .info ("%s: inactive, skipping." , region_name )
@@ -55,26 +244,37 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
55244 state_prefix = region_cfg .get ("state_prefix" )
56245 base_url = str (region_cfg .get ("base_url" , "" )).rstrip ("/" ) + "/"
57246 tile_size_m = int (region_cfg .get ("tile_size_m" ) or 0 )
247+ offset_x_m = int (region_cfg .get ("tile_origin_offset_x_m" ) or 0 )
248+ offset_y_m = int (region_cfg .get ("tile_origin_offset_y_m" ) or 0 )
58249 template = region_cfg .get ("filename_template" )
59250
60251 if not state_prefix or not base_url or not tile_size_m or not template :
61252 log .warning ("%s: incomplete tiled dataset configuration, skipping." , region_name )
62253 return []
63254
64255 # Resolve the scoped geometry once for the configured state/region.
65- # We use EPSG:25832 because the tile grids for these Bavaria/NRW datasets
66- # are aligned in meter-based projected coordinates.
67- clip_wkt , _ , _ = utils .get_clip_geometry (target_crs = 25832 , infdb = infdb , state_prefix = state_prefix )
256+ # We use EPSG:25832 because these datasets use meter-based projected tiles.
257+ clip_wkt , _ , _ = utils .get_clip_geometry (
258+ target_crs = 25832 ,
259+ infdb = infdb ,
260+ state_prefix = state_prefix ,
261+ )
262+
68263 if not clip_wkt :
69264 log .info ("%s: no scope geometry resolved for state prefix %s, skipping." , region_name , state_prefix )
70265 return []
71266
72267 scope_geom = shapely_wkt .loads (clip_wkt )
73268
74269 urls = []
75- for x , y in _iter_tile_origins_for_geom (scope_geom , tile_size_m = tile_size_m ):
270+ for x , y in _iter_tile_origins_for_geom (
271+ scope_geom ,
272+ tile_size_m = tile_size_m ,
273+ offset_x_m = offset_x_m ,
274+ offset_y_m = offset_y_m ,
275+ ):
76276 # Convert tile origin coordinates from meters to kilometer indices,
77- # because Bavaria/NRW filenames are based on km grid references.
277+ # because Bavaria/NRW/BW filenames are based on km grid references.
78278 fname = template .format (
79279 e_km = x // 1000 ,
80280 n_km = y // 1000 ,
@@ -86,6 +286,7 @@ def _build_urls_for_region(region_name: str, region_cfg: dict, infdb: InfDB, log
86286 return urls
87287
88288
289+
89290def load (infdb : InfDB ) -> bool :
90291 """Download LoD2 CityGML tiles for all active configured regions, import them via citydb,
91292 then create the flat LoD2 building table.
@@ -95,6 +296,8 @@ def load(infdb: InfDB) -> bool:
95296 - Resolves scope geometry per region/state in EPSG:25832.
96297 - Computes intersecting tiles using regular grid logic.
97298 - Deduplicates URLs globally, so the same file is not downloaded twice.
299+ - Supports direct .gml files and .zip files containing .gml files.
300+ - Imports only the current-run files, not the whole folder.
98301 """
99302 log = infdb .get_worker_logger ()
100303
@@ -109,10 +312,12 @@ def load(infdb: InfDB) -> bool:
109312
110313 nrw_cfg = infdb .get_config_value (source_cfg + ["nrw" ]) or {}
111314 bavaria_cfg = infdb .get_config_value (source_cfg + ["bavaria" ]) or {}
315+ bw_cfg = infdb .get_config_value (source_cfg + ["baden_wuerttemberg" ]) or {}
112316
113317 urls = []
114318 urls .extend (_build_urls_for_region ("NRW" , nrw_cfg , infdb , log ))
115319 urls .extend (_build_urls_for_region ("Bavaria" , bavaria_cfg , infdb , log ))
320+ urls .extend (_build_urls_for_region ("Baden-Württemberg" , bw_cfg , infdb , log ))
116321
117322 urls = sorted (set (urls ))
118323 log .info ("LoD2: %d unique tiles to download across all active regions." , len (urls ))
@@ -121,32 +326,29 @@ def load(infdb: InfDB) -> bool:
121326 log .warning ("LoD2: no tiles resolved for any active region; skipping import." )
122327 return True
123328
124- # Download all unique tiles into one shared folder
329+ # Download all unique tiles into one shared folder.
330+ # NRW / Bavaria download .gml files.
331+ # Baden-Württemberg downloads .zip files.
125332 utils .download_aria2c_many (infdb , urls , output_dir = gml_path )
126333
127- # Import all downloaded CityGML files from the shared folder
128- params = infdb .get_db_parameters_dict ()
129- import_mode = infdb .get_config_value (source_cfg + ["import-mode" ]) or "skip"
334+ # Resolve only the files for the current run / current scope.
335+ # ZIP files are extracted into the same CityGML folder and their
336+ # extracted .gml files are returned.
337+ gml_files = _urls_to_local_citygml_paths (urls , gml_path , log )
130338
131- cmd_parts = [
132- "citydb" ,
133- "import" ,
134- "citygml" ,
135- "-H" ,
136- params ["host" ],
137- "-d" ,
138- params ["db" ],
139- "-u" ,
140- params ["user" ],
141- "-p" ,
142- params ["password" ],
143- "-P" ,
144- str (params ["exposed_port" ]),
145- f"--import-mode={ import_mode } " ,
146- # "--log-level=warn",
147- str (gml_path ),
148- ]
149- utils .do_cmd (infdb , cmd_parts )
339+ if not gml_files :
340+ log .warning ("LoD2: no downloaded/extracted GML files found for current scope; skipping import." )
341+ return True
342+
343+ success = _import_lod2_files_in_parallel (
344+ infdb = infdb ,
345+ gml_files = gml_files ,
346+ batch_size = 200 ,
347+ processes = utils .get_number_processes (infdb ),
348+ )
349+
350+ if not success :
351+ raise RuntimeError ("LoD2: one or more import batches failed" )
150352
151353 # Create flat building table
152354 object_id_prefix = infdb .get_config_value (source_cfg + ["object_id_prefix" ]) or "DE"
0 commit comments