Skip to content

Commit bf51577

Browse files
committed
update to latest version of geokube and add new driver for supporting dataset with ancillary files
1 parent b7b0169 commit bf51577

File tree

4 files changed

+100
-5
lines changed

4 files changed

+100
-5
lines changed

.github/workflows/build_on_release.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
name: Build geolake docker images for geolake components and push to the repository
22

33
on:
4-
push:
5-
tags:
6-
- 'v*'
4+
release:
5+
types: [published]
6+
workflow_dispatch:
7+
78
jobs:
89
build:
910
runs-on: ubuntu-latest

drivers/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG REGISTRY=rg.fr-par.scw.cloud/geokube
22
#ARG TAG=2025.03.25.10.56
3-
ARG TAG=v0.2.7.10
3+
ARG TAG=2025.06
44
FROM $REGISTRY/geokube:$TAG
55

66
ADD . /opt/intake_geokube
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""geokube driver for intake."""
2+
import logging
3+
from typing import Mapping, Optional
4+
from .base import GeokubeSource
5+
from geokube import open_dataset, open_datacube
6+
from geokube.core.datacube import DataCube
7+
import pickle
8+
import os
9+
import xarray as xr
10+
import numpy as np
11+
import glob
12+
13+
_PROJECTION = {"grid_mapping_name": "latitude_longitude"}
14+
15+
class NetCDFAncillarySource(GeokubeSource):
16+
name = "geokube_netcdf_ancillary"
17+
18+
def add_projection(self, dset: xr.Dataset, **kwargs) -> xr.Dataset:
19+
"""Add projection information to the dataset"""
20+
coords = dset.coords
21+
coords["crs"] = xr.DataArray(data=np.array(1), attrs=_PROJECTION)
22+
for var in dset.data_vars.values():
23+
enc = var.encoding
24+
enc["grid_mapping"] = "crs"
25+
return dset
26+
27+
def __init__(
28+
self,
29+
path: str,
30+
ancillary_path: str,
31+
pattern: str = None,
32+
field_id: str = None,
33+
delay_read_cubes: bool = False,
34+
metadata_caching: bool = False,
35+
metadata_cache_path: str = None,
36+
storage_options: dict = None,
37+
xarray_kwargs: dict = None,
38+
metadata=None,
39+
mapping: Optional[Mapping[str, Mapping[str, str]]] = None,
40+
load_files_on_persistance: Optional[bool] = True,
41+
**kwargs
42+
):
43+
self._kube = None
44+
self.path = path
45+
self.ancillary_path = ancillary_path
46+
self.pattern = pattern
47+
self.field_id = field_id
48+
self.delay_read_cubes = delay_read_cubes
49+
self.metadata_caching = metadata_caching
50+
self.metadata_cache_path = metadata_cache_path
51+
self.storage_options = storage_options
52+
self.mapping = mapping
53+
self.xarray_kwargs = {} if xarray_kwargs is None else xarray_kwargs
54+
self.load_files_on_persistance = load_files_on_persistance
55+
# self.xarray_kwargs.update({'engine' : 'netcdf'})
56+
super(NetCDFAncillarySource, self).__init__(metadata=metadata, **kwargs)
57+
58+
def _open_dataset(self):
59+
60+
if self.metadata_caching:
61+
cached = None
62+
if os.path.exists(self.metadata_cache_path):
63+
with open(self.metadata_cache_path, "rb") as f:
64+
cached = pickle.load(f)
65+
self._kube = cached
66+
return self._kube
67+
68+
afilepaths = glob.glob(self.ancillary_path)
69+
filepaths = glob.glob(self.path)
70+
ancillary = xr.open_mfdataset(afilepaths, compat='override')
71+
ds = xr.open_mfdataset(filepaths, **self.xarray_kwargs)
72+
finalds = xr.merge([ancillary, ds])
73+
74+
finalds.xgrid.attrs['standard_name'] = 'projection_grid_x_centers'
75+
finalds.ygrid.attrs['standard_name'] = 'projection_grid_y_centers'
76+
77+
finalds2 = self.add_projection(finalds)
78+
finalds3 = finalds2.assign_coords(tdim=np.arange(finalds2.sizes['tdim']))
79+
time = finalds3.time.values
80+
finalds4 = finalds3.assign_coords(time=("tdim", time)).swap_dims({"tdim": "time"})
81+
finalds5 = finalds4.sortby("time")
82+
83+
for var in finalds5.data_vars.values():
84+
if "grid_mapping" in var.attrs:
85+
del var.attrs["grid_mapping"]
86+
87+
self._kube = DataCube.from_xarray(finalds5, mapping=self.mapping)
88+
89+
if self.metadata_caching:
90+
with open(self.metadata_cache_path, "wb") as f:
91+
pickle.dump(self._kube, f)
92+
93+
return self._kube

drivers/setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"intake.drivers": [
1919
"geokube_netcdf = intake_geokube.netcdf:NetCDFSource",
2020
"cmcc_wrf_geokube = intake_geokube.wrf:CMCCWRFSource",
21-
"cmcc_sentinel_geokube = intake_geokube.sentinel:CMCCSentinelSource"
21+
"cmcc_sentinel_geokube = intake_geokube.sentinel:CMCCSentinelSource",
22+
"geokube_netcdf_ancillary = intake_geokube.netcdf_with_ancillary:NetCDFAncillarySource",
2223
]
2324
},
2425
classifiers=[

0 commit comments

Comments
 (0)