Skip to content

Commit 82628cd

Browse files
committed
[SL] Use geojson from Geoportal
1 parent a8df519 commit 82628cd

1 file changed

Lines changed: 14 additions & 37 deletions

File tree

jedeschule/spiders/saarland.py

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,36 @@
1-
import xmltodict
21
from scrapy import Item
32

43
from jedeschule.items import School
54
from jedeschule.spiders.school_spider import SchoolSpider
5+
from jedeschule.wfs_basic_parsers import parse_geojson_features
66

77

88
class SaarlandSpider(SchoolSpider):
99
name = "saarland"
1010
start_urls = [
11-
"https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?"
12-
"SERVICE=WFS&REQUEST=GetFeature&typeName=Staatliche%5FDienste:Schulen%5FSL&srsname=EPSG:4326"
11+
"https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500"
1312
]
1413

1514
def parse(self, response, **kwargs):
16-
data = xmltodict.parse(response.text)
17-
members = data.get("wfs:FeatureCollection", {}).get("wfs:member", [])
18-
19-
if not isinstance(members, list):
20-
members = [members]
21-
22-
for member in members:
23-
school = member.get("Staatliche_Dienste:Schulen_SL", {})
24-
data_elem = {}
25-
26-
for key, value in school.items():
27-
if key == "Staatliche_Dienste:SHAPE":
28-
pos = (value.get("gml:Point", {})
29-
.get("gml:pos", "")
30-
.strip())
31-
if pos:
32-
lat, lon = pos.split()
33-
data_elem["lat"] = float(lat)
34-
data_elem["lon"] = float(lon)
35-
36-
continue
37-
38-
clean_key = key.split(":")[-1]
39-
if clean_key == "PLZ":
40-
value = value.split(".")[0]
41-
42-
data_elem[clean_key] = value
43-
44-
yield data_elem
15+
yield from parse_geojson_features(response)
4516

4617
@staticmethod
4718
def normalize(item: Item) -> School:
48-
# The data also contains a field called `SCHULKENNZ` which implies that it might be an id
49-
# that could be used, but some schools share ids (especially `0` or `000000`) which makes for collisions
19+
# The data also contains a field called `Schulkennz` which implies that it might be an id
20+
# that could be used, but some schools share ids (especially `0` or `000000`) or
21+
# do not have any set at all which makes for collisions
5022
school_id = item.get("OBJECTID")
5123

5224
return School(
53-
name=item.get("Bezeichnun"),
5425
address=item.get("Straße", "").strip(),
5526
city=item.get("Ort"),
56-
zip=item.get("PLZ"),
57-
school_type=item.get("Schulform"),
27+
fax=item.get("Fax"),
5828
id=f"SL-{school_id}",
29+
latitude=item.get("lat"),
30+
longitude=item.get("lon"),
31+
name=item.get("Bezeichnung"),
32+
phone=item.get("Telefon"),
33+
school_type=item.get("Schulform"),
34+
website=item.get("Homepage"),
35+
zip=item.get("PLZ"),
5936
)

0 commit comments

Comments
 (0)