Skip to content

Commit 29f4ebb

Browse files
committed
Rename marimo notebook such that it doesn't conflict with Jules' CPT interpretation notebook
1 parent fab7601 commit 29f4ebb

File tree

1 file changed

+186
-117
lines changed

1 file changed

+186
-117
lines changed

examples/nl_amsterdam/nl_amsterdam_bro.py renamed to examples/nl_amsterdam/get_cpts_from_bro.py

Lines changed: 186 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,167 @@
1313
import marimo
1414

1515
__generated_with = "0.17.0"
16-
app = marimo.App(width="medium")
16+
app = marimo.App(width="columns")
17+
18+
19+
@app.cell(column=0, hide_code=True)
20+
def _(mo):
21+
mo.md(
22+
r"""
23+
How to access BRO data: [Handreiking Afname BRO Gegevens](https://www.bro-productomgeving.nl/bpo/latest/handreiking-afname-bro-gegevens)
24+
25+
1. [BROloket](https://www.broloket.nl/ondergrondgegevens)
26+
2. BRO APIs
27+
1. SOAP - impractical, because requires a digital "PKI" certificate.
28+
2. [REST](https://www.bro-productomgeving.nl/bpo/latest/url-s-publieke-rest-services)
29+
1. CPT: <https://publiek.broservices.nl/sr/cpt/v1>
30+
2. Goetechnical boreholes: <https://publiek.broservices.nl/sr/bhrgt/v2>
31+
3. [PDOK](https://app.pdok.nl/viewer)
32+
1. WMS - This is useful for quickly viewing the location of historic CPTs or geotechnical boreholes.
33+
2. ATOM feed - For downloading the whole dataset, i.e. to download all CPTs or all geotechnical boreholes in BRO.
34+
"""
35+
)
36+
return
37+
38+
39+
@app.cell(hide_code=True)
40+
def _(Request, bounds, datetime, json, minidom, mo, pl, urlopen, xmltodict):
41+
cpt_search_url = "https://publiek.broservices.nl/sr/cpt/v1/characteristics/searches"
42+
api_request_data = json.dumps(
43+
{
44+
"registrationPeriod": {
45+
"beginDate": "2017-01-01",
46+
"endDate": datetime.date.today().isoformat(),
47+
},
48+
"area": {
49+
"boundingBox": {
50+
"lowerCorner": {
51+
"lon": bounds[0],
52+
"lat": bounds[1],
53+
},
54+
"upperCorner": {
55+
"lon": bounds[2],
56+
"lat": bounds[3],
57+
},
58+
}
59+
},
60+
}
61+
).encode("utf-8")
62+
63+
cpt_search_req = Request(
64+
cpt_search_url,
65+
data=api_request_data,
66+
headers={"Content-Type": "application/json"},
67+
method="POST",
68+
)
69+
with urlopen(cpt_search_req, timeout=30) as cpt_search_resp:
70+
xml = cpt_search_resp.read()
71+
72+
cpt_search_dict = xmltodict.parse(
73+
xml,
74+
xml_attribs=False,
75+
process_namespaces=True,
76+
namespaces={
77+
"http://www.opengis.net/gml/3.2": None,
78+
"http://www.broservices.nl/xsd/dscpt/1.1": None,
79+
"http://www.broservices.nl/xsd/brocommon/3.0": None,
80+
},
81+
)
82+
83+
def to_bool(col: str) -> pl.Expr:
84+
truthy = ["ja", "yes", "true", "1"]
85+
return pl.col(col).str.to_lowercase().is_in(truthy)
86+
87+
def parse_pos(col: str) -> pl.Expr:
88+
return (
89+
pl.col(col)
90+
.struct.field("pos")
91+
.str.split(" ")
92+
.cast(pl.Array(float, 2))
93+
.alias(col)
94+
)
95+
96+
cpt_search_df = (
97+
pl.DataFrame(
98+
cpt_search_dict["dispatchCharacteristicsResponse"]["dispatchDocument"]
99+
)
100+
.unnest("CPT_C")
101+
.with_columns(
102+
# Casts
103+
pl.col("deliveryAccountableParty").cast(int),
104+
pl.col("objectRegistrationTime").cast(pl.Datetime),
105+
pl.col("offset").cast(float),
106+
pl.col("startTime").cast(pl.Date),
107+
pl.col("predrilledDepth").cast(float),
108+
pl.col("finalDepth").cast(float),
109+
# "ja" / "nee" -> boolean
110+
to_bool("deregistered"),
111+
to_bool("underReview"),
112+
to_bool("dissipationTestPerformed"),
113+
# {"pos":"lon lat"} -> pl.Array[float,2 ] (keeps [lon, lat])
114+
parse_pos("standardizedLocation"),
115+
parse_pos("deliveredLocation"),
116+
# {"date":"YYYY-MM-DD"} -> pl.Date
117+
pl.col("researchReportDate")
118+
.struct.field("date")
119+
.cast(pl.Date)
120+
.alias("researchReportDate"),
121+
)
122+
)
123+
cpt_search_table = mo.ui.table(cpt_search_df)
124+
125+
tabs = mo.ui.tabs(
126+
{
127+
"DataFrame": cpt_search_table,
128+
"XML": mo.md(
129+
f"```xml\n{minidom.parseString(xml).toprettyxml(indent=' ')}\n```"
130+
),
131+
"JSON": xmltodict.parse(xml),
132+
"JSON, no XML attributes": cpt_search_dict,
133+
}
134+
)
135+
136+
tabs
137+
return (cpt_search_df,)
17138

18139

19140
@app.cell
141+
def _(BytesIO, Request, cpt_search_df, pl, read_cpt, urlopen):
142+
cpts = []
143+
cpt_data = []
144+
for bro_id in cpt_search_df["broId"]:
145+
cpt_get_url = f"https://publiek.broservices.nl/sr/cpt/v1/objects/{bro_id}"
146+
cpt_get_req = Request(cpt_get_url, method="GET")
147+
with urlopen(cpt_get_req, timeout=30) as resp:
148+
cpt_xml = resp.read()
149+
cpt_obj = read_cpt(BytesIO(cpt_xml))
150+
cpt_data.append(
151+
cpt_obj.data.with_columns(pl.lit(cpt_obj.bro_id).alias("broId"))
152+
)
153+
cpt_dict = cpt_obj.__dict__.copy()
154+
del cpt_dict["data"]
155+
cpt_dict["delivered_location"] = [
156+
cpt_dict["delivered_location"].x,
157+
cpt_dict["delivered_location"].y,
158+
]
159+
cpt_dict["standardized_location"] = [
160+
cpt_dict["standardized_location"].x,
161+
cpt_dict["standardized_location"].y,
162+
]
163+
cpt_dict["delivered_vertical_position_datum"] = cpt_dict["delivered_vertical_position_datum"]._name_
164+
cpts.append(cpt_dict)
165+
166+
cpts_df = pl.DataFrame(cpts).cast(
167+
{
168+
"delivered_location": pl.Array(float, 2),
169+
"standardized_location": pl.Array(float, 2),
170+
}
171+
)
172+
cpt_data_df = pl.concat(cpt_data, how="diagonal_relaxed")
173+
return (cpt_data_df,)
174+
175+
176+
@app.cell(hide_code=True)
20177
def _():
21178
import datetime
22179
import json
@@ -27,7 +184,9 @@ def _():
27184
import folium
28185
import geopandas as gpd
29186
import marimo as mo
187+
import polars as pl
30188
import xmltodict
189+
31190
from folium.plugins import Draw
32191
from lxml import etree
33192
from pygef import read_cpt
@@ -39,40 +198,19 @@ def _():
39198
Draw,
40199
Request,
41200
datetime,
42-
etree,
43201
folium,
44202
gpd,
45203
json,
46204
minidom,
47205
mo,
48-
plot_cpt,
206+
pl,
49207
read_cpt,
50208
urlopen,
51209
xmltodict,
52210
)
53211

54212

55-
@app.cell
56-
def _(mo):
57-
mo.md(
58-
r"""
59-
How to access BRO data: [Handreiking Afname BRO Gegevens](https://www.bro-productomgeving.nl/bpo/latest/handreiking-afname-bro-gegevens)
60-
61-
1. [BROloket](https://www.broloket.nl/ondergrondgegevens)
62-
2. BRO APIs
63-
1. SOAP - impractical, because requires a digital "PKI" certificate.
64-
2. [REST](https://www.bro-productomgeving.nl/bpo/latest/url-s-publieke-rest-services)
65-
1. CPT: <https://publiek.broservices.nl/sr/cpt/v1>
66-
2. Goetechnical boreholes: <https://publiek.broservices.nl/sr/bhrgt/v2>
67-
3. [PDOK](https://app.pdok.nl/viewer)
68-
1. WMS - This is useful for quickly viewing the location of historic CPTs or geotechnical boreholes.
69-
2. ATOM feed - For downloading the whole dataset, i.e. to download all CPTs or all geotechnical boreholes in BRO.
70-
"""
71-
)
72-
return
73-
74-
75-
@app.cell(hide_code=True)
213+
@app.cell(column=1, hide_code=True)
76214
def _(Draw, buffer, folium, geojson_text_area, gpd, json, site_geojson):
77215
geojson = geojson_text_area.value if geojson_text_area.value else site_geojson
78216
# Create a folium interactive map (leaflet.js maps)
@@ -87,16 +225,18 @@ def _(Draw, buffer, folium, geojson_text_area, gpd, json, site_geojson):
87225
buffered_site = site.geometry.buffer(buffer.value)
88226
bounds = buffered_site.to_crs(4326).bounds.to_numpy()[0]
89227
folium_map = buffered_site.explore(
90-
tiles="https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}",
228+
# tiles="https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}",
229+
tiles="CartoDB positron",
91230
style_kwds={"fillOpacity": 0.1},
92231
attr=("Esri.WorldStreetMap"),
93232
)
94233
site.explore(m=folium_map, color="red", style_kwds={"fill": False})
95-
folium.Rectangle(
234+
bounds_rectangle = folium.Rectangle(
96235
bounds=[[bounds[1], bounds[0]], [bounds[3], bounds[2]]],
97236
color="black",
98237
weight=0.5,
99-
).add_to(folium_map)
238+
)
239+
bounds_rectangle.add_to(folium_map)
100240

101241
# Add PDOK's CPT WMS layer
102242
folium.WmsTileLayer(
@@ -132,10 +272,10 @@ def _(Draw, buffer, folium, geojson_text_area, gpd, json, site_geojson):
132272
).add_to(folium_map)
133273

134274
folium_map
135-
return (bounds,)
275+
return bounds, bounds_rectangle, site
136276

137277

138-
@app.cell
278+
@app.cell(hide_code=True)
139279
def _(mo):
140280
site_geojson = '{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[4.902889698063478,52.385435257999404],[4.902923632305726,52.385509906165986],[4.902901716440955,52.385619936470341],[4.903006347021203,52.38569026942325],[4.90389499998988,52.385854235381281],[4.904091535809526,52.385902562073795],[4.904285950739031,52.385988859607401],[4.904484607448818,52.38602683046868],[4.904532680958658,52.386042363993511],[4.904790722592364,52.386094573856099],[4.904799206152918,52.386073862513044],[4.904974533071164,52.386108812898868],[4.904999276789463,52.386100183176488],[4.905287541107636,52.385853480276289],[4.903576336246269,52.385074312916224],[4.903554420381488,52.385093082996256],[4.903498923756159,52.38509416173622],[4.903484784488561,52.385100202679702],[4.903473119592793,52.385110990076754],[4.903287895187223,52.385027927051986],[4.903256788798504,52.38504302943177],[4.903150390809821,52.38513472234078],[4.903119637902792,52.385127171167227],[4.903098782483085,52.38514421524286],[4.903084996697174,52.385139037296511],[4.90302349088312,52.385198152147602],[4.902889698063478,52.385435257999404]]]}}'
141281
geojson_text_area = mo.ui.text_area(
@@ -151,108 +291,37 @@ def _(mo):
151291
return buffer, geojson_text_area, site_geojson
152292

153293

154-
@app.cell
155-
def _(buffer):
156-
# EPSG:4258 is the European Terrestrial Reference System 1989 (ETR)
157-
# ETR89 coordinates are in [Longitude (x), Latitude (y)]
158-
lonlat_bounds = buffer.to_crs(4258).bounds
159-
lonlat_bounds
294+
@app.cell(hide_code=True)
295+
def _(mo):
296+
mo.md(r"""The bouding box around the site + buffer that we created above is now used to make a call to the BRO API to retrieve all the CPT characteristics of the CPTs in that bounding box.""")
160297
return
161298

162299

163-
@app.cell
164-
def _(Request, bounds, datetime, json, minidom, mo, urlopen, xmltodict):
165-
cpt_search_url = "https://publiek.broservices.nl/sr/cpt/v1/characteristics/searches"
166-
api_request_data = json.dumps(
167-
{
168-
"registrationPeriod": {
169-
"beginDate": "2017-01-01",
170-
"endDate": datetime.date.today().isoformat(),
171-
},
172-
"area": {
173-
"boundingBox": {
174-
"lowerCorner": {
175-
"lon": bounds[0],
176-
"lat": bounds[1],
177-
},
178-
"upperCorner": {
179-
"lon": bounds[2],
180-
"lat": bounds[3],
181-
},
182-
}
183-
},
184-
}
185-
).encode("utf-8")
186-
187-
cpt_search_req = Request(
188-
cpt_search_url,
189-
data=api_request_data,
190-
headers={"Content-Type": "application/json"},
191-
method="POST",
192-
)
193-
with urlopen(cpt_search_req, timeout=30) as cpt_search_resp:
194-
xml = cpt_search_resp.read()
195-
196-
cpt_search = xmltodict.parse(
197-
xml,
198-
xml_attribs=False,
199-
process_namespaces=True,
200-
namespaces={
201-
"http://www.broservices.nl/xsd/dscpt/1.1": None,
202-
"http://www.broservices.nl/xsd/brocommon/3.0": None,
203-
},
204-
)
205-
206-
tabs = mo.ui.tabs(
207-
{
208-
"XML": mo.md(
209-
f"```xml\n{minidom.parseString(xml).toprettyxml(indent=' ')}\n```"
210-
),
211-
"JSON": xmltodict.parse(xml),
212-
"JSON, no XML attributes": cpt_search,
213-
}
214-
)
215-
216-
tabs
300+
@app.cell(hide_code=True)
301+
def _(bounds_rectangle, cpt_search_df, gpd, site):
302+
cpt_search_gdf = gpd.GeoDataFrame(cpt_search_df.to_pandas(), geometry=gpd.points_from_xy(cpt_search_df["deliveredLocation"].arr.get(0), cpt_search_df["deliveredLocation"].arr.get(1)), crs=28992)
303+
cpt_map = cpt_search_gdf.explore(tiles="CartoDB positron")
304+
bounds_rectangle.add_to(cpt_map)
305+
site.explore(m=cpt_map, color="red", style_kwds={"fill": False})
306+
cpt_map
217307
return
218308

219309

220-
app._unparsable_cell(
221-
r"""
222-
for cpt in cpt_search
223-
""",
224-
name="_",
225-
)
226-
227-
228310
@app.cell
229-
def _(Request, etree, urlopen):
230-
cpt_get_url = "https://publiek.broservices.nl/sr/cpt/v1/objects/CPT000000198164"
231-
cpt_get_req = Request(cpt_get_url, method="GET")
232-
with urlopen(cpt_get_req, timeout=30) as resp:
233-
cpt_str = resp.read()
234-
cpt_xml = etree.fromstring(cpt_str)
235-
236-
print(etree.tostring(cpt_xml, pretty_print=True, encoding="unicode"))
237-
return (cpt_str,)
238-
239-
240-
@app.cell
241-
def _(cpt_str):
242-
cpt_str
311+
def _(cpt_data_df):
312+
cpt_data_df
243313
return
244314

245315

246316
@app.cell
247-
def _(BytesIO, cpt_str, read_cpt):
248-
cpt_data = read_cpt(BytesIO(cpt_str))
249-
cpt_data.__dict__
250-
return (cpt_data,)
317+
def _(cpt_data_df, mo):
318+
mo.ui.dataframe(cpt_data_df)
319+
return
251320

252321

253322
@app.cell
254-
def _(cpt_data, plot_cpt):
255-
plot_cpt(cpt_data)
323+
def _(cpt_data_df):
324+
cpt_data_df.filter(cpt_data_df["broId"].contains("CPT000000198163"))
256325
return
257326

258327

0 commit comments

Comments
 (0)