Skip to content

Commit ce4838e

Browse files
authored
FIX: onbedekt consolidation category (#206)
1 parent f9b9228 commit ce4838e

File tree

4 files changed

+30
-9
lines changed

4 files changed

+30
-9
lines changed

cropclassification/calc_cover.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def run_cover(
4949
logger.warning("This is a POC for a cover marker, so not for operational use!")
5050
logger.info(f"Config used: \n{conf.pformat_config()}")
5151

52+
force = True
5253
markertype = conf.marker["markertype"]
5354
if not markertype.startswith(("COVER", "ONBEDEKT")):
5455
raise ValueError(f"Invalid markertype {markertype}, expected COVER_XXX")
@@ -88,7 +89,9 @@ def run_cover(
8889
OR "ALL_BEST" like '%TBG%'
8990
OR "ALL_BEST" like '%EBG%'
9091
"""
91-
gfo.copy_layer(input_parcel_path, input_parcel_filtered_path, where=where)
92+
gfo.copy_layer(
93+
input_parcel_path, input_parcel_filtered_path, where=where, force=force
94+
)
9295
input_parcel_path = input_parcel_filtered_path
9396

9497
elif markertype == "COVER_EEB_VOORJAAR":
@@ -100,7 +103,9 @@ def run_cover(
100103
where = (
101104
"ALL_BEST like '%EEB%' AND GWSCOD_V = '83' AND GWSCOD_H IN ('201', '202')"
102105
)
103-
gfo.copy_layer(input_parcel_path, input_parcel_filtered_path, where=where)
106+
gfo.copy_layer(
107+
input_parcel_path, input_parcel_filtered_path, where=where, force=force
108+
)
104109
input_parcel_path = input_parcel_filtered_path
105110

106111
elif markertype in ("COVER_TBG_BMG_VOORJAAR", "COVER_TBG_BMG_NAJAAR"):
@@ -110,7 +115,9 @@ def run_cover(
110115
input_parcel_filtered_path = input_preprocessed_dir / input_parcel_filename
111116

112117
where = "ALL_BEST like '%TBG%' OR ALL_BEST like '%BMG%'"
113-
gfo.copy_layer(input_parcel_path, input_parcel_filtered_path, where=where)
118+
gfo.copy_layer(
119+
input_parcel_path, input_parcel_filtered_path, where=where, force=force
120+
)
114121
input_parcel_path = input_parcel_filtered_path
115122

116123
else:
@@ -144,6 +151,7 @@ def run_cover(
144151
input_parcel_path=input_parcel_path,
145152
output_imagedata_parcel_input_path=imagedata_input_parcel_path,
146153
output_parcel_nogeo_path=input_parcel_nogeo_path,
154+
force=force,
147155
)
148156

149157
# STEP 2: Calculate the timeseries data needed
@@ -165,6 +173,7 @@ def run_cover(
165173
end_date=end_date,
166174
images_to_use=images_to_use,
167175
timeseries_periodic_dir=timeseries_periodic_dir,
176+
force=force,
168177
)
169178

170179
# STEP 3: Determine the cover for the parcels for all periods
@@ -175,7 +184,7 @@ def run_cover(
175184
)
176185
cover_dir = run_dir / input_parcel_nogeo_path.stem
177186
cover_dir.mkdir(parents=True, exist_ok=True)
178-
force = False
187+
179188
on_error = "warn"
180189
parcels_cover_paths = []
181190

@@ -238,10 +247,12 @@ def run_cover(
238247
cols_to_keep = [*list(input_info.columns), "pred1"]
239248
if "provincie" in parcels_selected.columns:
240249
cols_to_keep.append("provincie")
250+
241251
parcels_selected = (
242252
parcels_selected[[*cols_to_keep, "pred1_prob"]]
243-
.groupby(cols_to_keep, dropna=False, as_index=False)
244-
.max()
253+
.sort_values("pred1_prob", ascending=False)
254+
.groupby(conf.columns["id"], dropna=False, as_index=False)
255+
.first() # Take the highest pred1_prob per id
245256
)
246257

247258
# Add pred_consolidated based on max pred1_proba
@@ -268,7 +279,7 @@ def _categorize_pred(x):
268279
return "NODATA"
269280
try:
270281
x_num = float(x)
271-
if x_num >= 0.5:
282+
if x_num > 0.5:
272283
return "ONBEDEKT"
273284
elif x_num > 0.4:
274285
return "DOUBT"
@@ -295,7 +306,7 @@ def _calc_cover(
295306
if force:
296307
gfo.remove(output_path)
297308
elif output_geo_path is not None and not output_geo_path.exists():
298-
# Geo file is asked bu missing: we need to recalculate the output file as
309+
# Geo file is asked but missing: we need to recalculate the output file as
299310
# well because we need temporary files to create the geo file.
300311
gfo.remove(output_path)
301312
else:

cropclassification/preprocess/_timeseries_calc_openeo.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def calculate_periodic_timeseries(
2727
timeseries_periodic_dir: Path,
2828
nb_parallel: int,
2929
on_missing_image: str,
30+
force: bool = False,
3031
):
3132
"""Calculate timeseries data for the input parcels.
3233
@@ -49,6 +50,8 @@ def calculate_periodic_timeseries(
4950
- ignore: ignore that the image, don't try to download it
5051
- calculate_raise: calculate the image and raise an error if it fails
5152
- calculate_ignore: calculate the image and ignore the error if it fails
53+
force (bool = False): whether to force recalculation of existing data.
54+
(will not redownload images)
5255
"""
5356
info = gfo.get_layerinfo(input_parcel_path)
5457
if info.crs is not None and not info.crs.equals(roi_crs):
@@ -67,7 +70,7 @@ def calculate_periodic_timeseries(
6770
imageprofiles_to_get=imageprofiles_to_get,
6871
imageprofiles=imageprofiles,
6972
on_missing_image=on_missing_image,
70-
force=False,
73+
force=False, # dont redownload on force
7174
)
7275

7376
# Now calculate the timeseries
@@ -94,4 +97,5 @@ def calculate_periodic_timeseries(
9497
stats=["count", "mean", "median", "std", "min", "max"],
9598
engine="pyqgis",
9699
nb_parallel=nb_parallel,
100+
force=force,
97101
)

cropclassification/preprocess/_timeseries_helper.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def prepare_input(
8484
if output_parcel_nogeo_path is not None and (
8585
force is True or not output_parcel_nogeo_path.exists()
8686
):
87+
if output_parcel_nogeo_path.exists():
88+
os.remove(output_parcel_nogeo_path)
89+
8790
logger.info(f"Save non-geo data to {output_parcel_nogeo_path}")
8891
parceldata_nogeo_df = parceldata_gdf.drop(["geometry"], axis=1)
8992
pdh.to_file(parceldata_nogeo_df, output_parcel_nogeo_path)

cropclassification/preprocess/timeseries.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def calc_timeseries_data(
2727
end_date: datetime,
2828
images_to_use: dict[str, conf.ImageConfig],
2929
timeseries_periodic_dir: Path,
30+
force: bool = False,
3031
):
3132
"""Calculate timeseries data for the input parcels.
3233
@@ -40,6 +41,7 @@ def calc_timeseries_data(
4041
images_to_use (List[str]): an array with data you want to be calculated:
4142
check out the constants starting with DATA_TO_GET... for the options.
4243
timeseries_periodic_dir (Path): Directory the timeseries will be written to.
44+
force (bool = False): whether to force recalculation of existing data.
4345
"""
4446
# Check some variables...
4547
if images_to_use is None:
@@ -65,6 +67,7 @@ def calc_timeseries_data(
6567
timeseries_periodic_dir=timeseries_periodic_dir,
6668
nb_parallel=conf.general.getint("nb_parallel", -1),
6769
on_missing_image=conf.images.get("on_missing_image", "calculate_raise"),
70+
force=force,
6871
)
6972

7073

0 commit comments

Comments
 (0)