Skip to content

Commit f333ca4

Browse files
committed
Fix: add fix when bad NetCDF
1 parent 435e66e commit f333ca4

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

aodn_cloud_optimised/lib/schema.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,8 @@ def _write_nullified_dataset(ds, output_path):
564564
ds_null = ds.copy()
565565
encoding = {}
566566

567+
ds_null.attrs = _clean_netcdf_attrs(ds_null.attrs)
568+
567569
for var in ds_null.data_vars:
568570
data = ds_null[var].data
569571
dtype = data.dtype
@@ -585,6 +587,30 @@ def _write_nullified_dataset(ds, output_path):
585587
ds_null.to_netcdf(output_path, encoding=encoding, engine="netcdf4")
586588

587589

590+
def _clean_netcdf_attrs(attrs: dict) -> dict:
591+
"""Remove or fix attributes that cannot be written by netCDF4."""
592+
cleaned = {}
593+
594+
for k, v in attrs.items():
595+
if v is None:
596+
continue
597+
598+
if isinstance(v, bytes):
599+
try:
600+
cleaned[k] = v.decode("utf-8", errors="replace")
601+
except Exception:
602+
continue
603+
604+
elif isinstance(v, str):
605+
# Remove UTF-16 surrogate code points explicitly
606+
cleaned[k] = v.encode("utf-8", errors="replace").decode("utf-8")
607+
608+
else:
609+
cleaned[k] = v
610+
611+
return cleaned
612+
613+
588614
def convert_pandas_csv_config_to_polars(pandas_config: dict) -> dict:
589615
"""
590616
Convert a pandas.read_csv configuration dictionary to a polars.read_csv equivalent.

0 commit comments

Comments
 (0)