Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 14 additions & 37 deletions jedeschule/spiders/saarland.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,36 @@
import xmltodict
from scrapy import Item

from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.wfs_basic_parsers import parse_geojson_features


class SaarlandSpider(SchoolSpider):
name = "saarland"
start_urls = [
"https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?"
"SERVICE=WFS&REQUEST=GetFeature&typeName=Staatliche%5FDienste:Schulen%5FSL&srsname=EPSG:4326"
"https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500"
]

def parse(self, response, **kwargs):
data = xmltodict.parse(response.text)
members = data.get("wfs:FeatureCollection", {}).get("wfs:member", [])

if not isinstance(members, list):
members = [members]

for member in members:
school = member.get("Staatliche_Dienste:Schulen_SL", {})
data_elem = {}

for key, value in school.items():
if key == "Staatliche_Dienste:SHAPE":
pos = (value.get("gml:Point", {})
.get("gml:pos", "")
.strip())
if pos:
lat, lon = pos.split()
data_elem["lat"] = float(lat)
data_elem["lon"] = float(lon)

continue

clean_key = key.split(":")[-1]
if clean_key == "PLZ":
value = value.split(".")[0]

data_elem[clean_key] = value

yield data_elem
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
# The data also contains a field called `SCHULKENNZ` which implies that it might be an id
# that could be used, but some schools share ids (especially `0` or `000000`) which makes for collisions
# The data also contains a field called `Schulkennz` which implies that it might be an id
Comment thread
k-nut marked this conversation as resolved.
# that could be used, but some schools share ids (especially `0` or `000000`) or
# do not have any set at all which makes for collisions
school_id = item.get("OBJECTID")

return School(
name=item.get("Bezeichnun"),
address=item.get("Straße", "").strip(),
city=item.get("Ort"),
zip=item.get("PLZ"),
school_type=item.get("Schulform"),
fax=item.get("Fax"),
id=f"SL-{school_id}",
latitude=item.get("lat"),
longitude=item.get("lon"),
name=item.get("Bezeichnung"),
phone=item.get("Telefon"),
school_type=item.get("Schulform"),
website=item.get("Homepage"),
zip=item.get("PLZ"),
)
149 changes: 100 additions & 49 deletions test/test_saarland.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,63 +7,114 @@

class TestSaarlandSpider(unittest.TestCase):
def test_parse(self):
xml_response = """<?xml version="1.0" encoding="utf-8" ?>
<wfs:FeatureCollection xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:wfs="http://www.opengis.net/wfs/2.0" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:Staatliche_Dienste="https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" timeStamp="2025-07-20T17:40:21Z" numberMatched="317" numberReturned="1" xsi:schemaLocation="http://www.opengis.net/wfs/2.0 http://schemas.opengis.net/wfs/2.0/wfs.xsd http://www.opengis.net/gml/3.2 http://schemas.opengis.net/gml/3.2.1/gml.xsd https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer https://geoportal.saarland.de/arcgis/services/Internet/Staatliche_Dienste/MapServer/WFSServer?service=wfs%26version=2.0.0%26request=DescribeFeatureType">
<wfs:member>
<Staatliche_Dienste:Schulen_SL gml:id="Schulen_SL.1">
<Staatliche_Dienste:SHAPE>
<gml:Point gml:id="Schulen_SL.1.pn.0" srsName="urn:ogc:def:crs:EPSG::4326">
<gml:pos>49.24067452 7.02085050</gml:pos>
</gml:Point>
</Staatliche_Dienste:SHAPE>
<Staatliche_Dienste:OBJECTID>1</Staatliche_Dienste:OBJECTID>
<Staatliche_Dienste:fid>1.00000000</Staatliche_Dienste:fid>
<Staatliche_Dienste:Gemeindenu>1100.00000000</Staatliche_Dienste:Gemeindenu>
<Staatliche_Dienste:PLZ>66123.00000000</Staatliche_Dienste:PLZ>
<Staatliche_Dienste:Ort>Saarbrücken</Staatliche_Dienste:Ort>
<Staatliche_Dienste:Straße>Kohlweg 7</Staatliche_Dienste:Straße>
<Staatliche_Dienste:Bezeichnun>Deutsch-Französiche Hochschule, Université franco-allemande</Staatliche_Dienste:Bezeichnun>
<Staatliche_Dienste:Telefon>0681-93812100</Staatliche_Dienste:Telefon>
<Staatliche_Dienste:Fax>0681-93812111</Staatliche_Dienste:Fax>
<Staatliche_Dienste:Email>info@dfh-ufa.org</Staatliche_Dienste:Email>
<Staatliche_Dienste:Schulform>Hochschule</Staatliche_Dienste:Schulform>
<Staatliche_Dienste:Homepage>https://www.dfh-ufa.org/</Staatliche_Dienste:Homepage>
<Staatliche_Dienste:Schulregio>Saarbrücken</Staatliche_Dienste:Schulregio>
<Staatliche_Dienste:KARTENERST>Hochschule</Staatliche_Dienste:KARTENERST>
<Staatliche_Dienste:Rechtswert>355942.97630000</Staatliche_Dienste:Rechtswert>
<Staatliche_Dienste:Hochwert>5456095.93600000</Staatliche_Dienste:Hochwert>
<Staatliche_Dienste:Aktualisie>20.05.2025</Staatliche_Dienste:Aktualisie>
</Staatliche_Dienste:Schulen_SL>
</wfs:member>
</wfs:FeatureCollection>
json_response = """
{
"serviceTitle": "Staatliche_Dienste",
"collectionId": "1125",
"collectionName": "Staatliche_Dienste:Schulen_SL",
"collectionTitle": "Schulen_SL",
"title": "Schulen_SL",
"id": "Staatliche_Dienste:Schulen_SL",
"description": "Schulen im Saarland",
"extent": {
"spatial": {
"minx": "6.37990222",
"miny": "49.10626268",
"maxx": "7.37397862",
"maxy": "49.61541418"
},
"temporal": []
},
"type": "FeatureCollection",
"links": [
{
"rel": "self",
"type": "application/geo+json",
"title": "this document",
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500"
},
{
"rel": "next",
"type": "application/geo+json",
"title": "next page",
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500&offset=2500"
},
{
"rel": "last",
"type": "application/geo+json",
"title": "last page",
"href": "https://geoportal.saarland.de/spatial-objects/257/collections/Staatliche_Dienste:Schulen_SL/items?f=json&limit=2500&offset=0"
}
],
"numberMatched": 368,
"numberReturned": 368,
"timeStamp": "2025-08-20T12:21:36.7200Z",
"genTime": 0.7143468856811523,
"features": [
{
"type": "Feature",
"properties": {
"gml_id": "Schulen_SL.1",
"OBJECTID": 1,
"Gemeindenr": 1100,
"PLZ": 66123,
"Ort": "Saarbrücken",
"Straße": "Kohlweg 7",
"Bezeichnung": "Deutsch-Französiche Hochschule, Université franco-allemande",
"Telefon": "0681-93812100",
"Fax": "0681-93812111",
"Email": "info@dfh-ufa.org",
"Schulform": "Hochschule",
"Homepage": "https://www.dfh-ufa.org/",
"Schulregion": "Saarbrücken",
"KARTENERST": "Hochschule",
"Ost": 355942.9763,
"Nord": 5456095.936,
"ERFASSUNG": "20.05.2025"
},
"bbox": [
7.0208505,
49.24067452,
7.0208505,
49.24067452
],
"geometry": {
"type": "Point",
"coordinates": [
7.0208505,
49.24067452
]
},
"$schema": null,
"$context": null
}
]
}
"""

spider = SaarlandSpider()
response = TextResponse(url="https://test.com", body=xml_response, encoding="utf-8")
response = TextResponse(url="https://test.com", body=json_response, encoding="utf-8")
schools = list(spider.parse(response))
self.assertEqual(len(schools), 1)

school = schools[0]
parsed_school = spider.normalize(school)

self.assertEqual(parsed_school["id"], "SL-1")
self.assertEqual(parsed_school["name"], "Deutsch-Französiche Hochschule, Université franco-allemande")
self.assertEqual(parsed_school["address"], "Kohlweg 7")
self.assertEqual(parsed_school["city"], "Saarbrücken")
self.assertEqual(parsed_school["fax"], "0681-93812111")
self.assertEqual(parsed_school["phone"], "0681-93812100")
self.assertEqual(parsed_school["school_type"], "Hochschule")
self.assertEqual(parsed_school["website"], "https://www.dfh-ufa.org/")
self.assertEqual(parsed_school["zip"], 66123)
self.assertEqual(parsed_school["latitude"], 49.24067452)
self.assertEqual(parsed_school["longitude"], 7.0208505)




self.assertEqual(school["OBJECTID"], "1")
self.assertEqual(school["fid"], "1.00000000")
self.assertEqual(school["Gemeindenu"], "1100.00000000")
self.assertEqual(school["PLZ"], "66123")
self.assertEqual(school["Ort"], "Saarbrücken")
self.assertEqual(school["Straße"], "Kohlweg 7")
self.assertEqual(school["Bezeichnun"], "Deutsch-Französiche Hochschule, Université franco-allemande")
self.assertEqual(school["Telefon"], "0681-93812100")
self.assertEqual(school["Fax"], "0681-93812111")
self.assertEqual(school["Email"], "info@dfh-ufa.org")
self.assertEqual(school["Schulform"], "Hochschule")
self.assertEqual(school["Homepage"], "https://www.dfh-ufa.org/")
self.assertEqual(school["Schulregio"], "Saarbrücken")
self.assertEqual(school["KARTENERST"], "Hochschule")
self.assertEqual(school["Rechtswert"], "355942.97630000")
self.assertEqual(school["Hochwert"], "5456095.93600000")
self.assertEqual(school["Aktualisie"], "20.05.2025")
self.assertAlmostEqual(school["lat"], 49.24067452)
self.assertAlmostEqual(school["lon"], 7.02085050)


if __name__ == "__main__":
Expand Down