|
1 | | -import xmltodict |
2 | 1 | from scrapy import Item |
3 | 2 |
|
4 | 3 | from jedeschule.items import School |
5 | 4 | from jedeschule.spiders.school_spider import SchoolSpider |
| 5 | +from jedeschule.wfs_basic_parsers import parse_geojson_features |
6 | 6 |
|
7 | 7 |
|
8 | 8 | class SaarlandSpider(SchoolSpider): |
9 | 9 | name = "saarland" |
10 | 10 | start_urls = [ |
11 | | - "https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?" |
12 | | - "SERVICE=WFS&REQUEST=GetFeature&typeName=Staatliche%5FDienste:Schulen%5FSL&srsname=EPSG:4326" |
| 11 | + "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500" |
13 | 12 | ] |
14 | 13 |
|
15 | 14 | def parse(self, response, **kwargs): |
16 | | - data = xmltodict.parse(response.text) |
17 | | - members = data.get("wfs:FeatureCollection", {}).get("wfs:member", []) |
18 | | - |
19 | | - if not isinstance(members, list): |
20 | | - members = [members] |
21 | | - |
22 | | - for member in members: |
23 | | - school = member.get("Staatliche_Dienste:Schulen_SL", {}) |
24 | | - data_elem = {} |
25 | | - |
26 | | - for key, value in school.items(): |
27 | | - if key == "Staatliche_Dienste:SHAPE": |
28 | | - pos = (value.get("gml:Point", {}) |
29 | | - .get("gml:pos", "") |
30 | | - .strip()) |
31 | | - if pos: |
32 | | - lat, lon = pos.split() |
33 | | - data_elem["lat"] = float(lat) |
34 | | - data_elem["lon"] = float(lon) |
35 | | - |
36 | | - continue |
37 | | - |
38 | | - clean_key = key.split(":")[-1] |
39 | | - if clean_key == "PLZ": |
40 | | - value = value.split(".")[0] |
41 | | - |
42 | | - data_elem[clean_key] = value |
43 | | - |
44 | | - yield data_elem |
| 15 | + yield from parse_geojson_features(response) |
45 | 16 |
|
46 | 17 | @staticmethod |
47 | 18 | def normalize(item: Item) -> School: |
48 | | - # The data also contains a field called `SCHULKENNZ` which implies that it might be an id |
49 | | - # that could be used, but some schools share ids (especially `0` or `000000`) which makes for collisions |
| 19 | + # The data also contains a field called `Schulkennz` which implies that it might be an id |
| 20 | + # that could be used, but some schools share ids (especially `0` or `000000`) or |
| 21 | + # do not have any set at all which makes for collisions |
50 | 22 | school_id = item.get("OBJECTID") |
51 | 23 |
|
52 | 24 | return School( |
53 | | - name=item.get("Bezeichnun"), |
54 | 25 | address=item.get("Straße", "").strip(), |
55 | 26 | city=item.get("Ort"), |
56 | | - zip=item.get("PLZ"), |
57 | | - school_type=item.get("Schulform"), |
| 27 | + fax=item.get("Fax"), |
58 | 28 | id=f"SL-{school_id}", |
| 29 | + latitude=item.get("lat"), |
| 30 | + longitude=item.get("lon"), |
| 31 | + name=item.get("Bezeichnung"), |
| 32 | + phone=item.get("Telefon"), |
| 33 | + school_type=item.get("Schulform"), |
| 34 | + website=item.get("Homepage"), |
| 35 | + zip=item.get("PLZ"), |
59 | 36 | ) |
0 commit comments