Skip to content

Commit d868b2a

Browse files
committed
ready for review
1 parent ce9c91e commit d868b2a

File tree

7 files changed

+55
-78
lines changed

7 files changed

+55
-78
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616
xcube Multi-Source Data Store: Seamlessly Integrating and Harmonizing Data from
1717
Multiple Sources.
1818

19-
Find out more in the [xcube Multi-Source Data Store Documentation](https://xcube-dev.github.io/xcube-multistore/).
19+
Find out more in the [xcube Multi-Source Data Store Documentation](https://xcube-dev.github.io/xcube-multistore/).

docs/index.md

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,34 @@ For further examples please view the [examples folder](https://github.com/xcube-
6262
* support preload API for [xcube-clms](https://github.com/xcube-dev/xcube-clms) and
6363
[xcube-zendoo](https://github.com/xcube-dev/xcube-zenodo)
6464
* allow to write to netcdf and zarr
65+
* some auxiliary functionalities which shall help to setup a config YAML file.
66+
* interpolate along the time axis
6567

66-
> The following features will be implemented in the future:
68+
### Configuration Generator GUI
6769

68-
* some auxiliary functionalities which shall help to setup a config YAML file.
69-
* interpolate along the time axis
70+
The **Configuration Generator GUI** provides an interactive interface for creating and
71+
editing the configuration YAML, making the setup process more intuitive and less
72+
error-prone.
7073

71-
### License
74+
**Key features (in development):**
75+
76+
- Display of all available fields for each configuration section
77+
- Dynamic fetching and updating of valid parameters and inputs
78+
- Dropdown menus that show only supported options
79+
- Autofill assistance for large option sets (e.g., thousands of data IDs)
80+
- Built-in configuration validator/checker
81+
- Geolocation visualization to help define bounding boxes
82+
83+
> **Note:** This feature is under active development, and only a minimal working
84+
> example is currently available.
85+
86+
To launch the GUI, run the following command from the package root:
87+
88+
```bash
89+
panel serve xcube_multistore/gui/app.py --dev
90+
```
91+
92+
## License
7293

7394
The package is open source and released under the
7495
[MIT license](https://opensource.org/license/mit). :heart:

examples/debug.py

Lines changed: 0 additions & 35 deletions
This file was deleted.

xcube_multistore/accessors/cds.py

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,7 @@ def open_data(self, data_id: str, **open_params) -> xr.Dataset:
3838
open_params = self._convert_point_to_bbox(data_id, open_params)
3939
point = open_params.pop("point")
4040
time_range = open_params.pop("time_range")
41-
self.notify(
42-
GeneratorState(
43-
self.identifier,
44-
message=f"Open dataset {self.identifier!r} 0%.",
45-
)
46-
)
47-
ds, _ = self._open_with_split(data_id, time_range, open_params, time_range)
41+
ds = self._open_with_split(data_id, time_range, open_params, time_range)
4842

4943
if time_series:
5044
# noinspection PyUnboundLocalVariable
@@ -70,31 +64,24 @@ def _open_with_split(
7064
time_range: tuple[str, str],
7165
open_params: dict,
7266
original_time_range: tuple[str, str],
73-
progress: int | None = None,
74-
) -> tuple[xr.Dataset, int]:
67+
) -> xr.Dataset:
7568
"""
7669
Recursively fetch data by splitting time_range into smaller ranges
7770
until store.open_data() succeeds.
7871
"""
79-
if progress is None:
80-
progress = 0
8172
try:
8273
open_params["time_range"] = time_range
8374
ds = self.store.open_data(data_id, **open_params)
84-
progress += 1
85-
time_diff = get_timedelta(open_params["time_range"]).days
86-
time_diff_orig = get_timedelta(original_time_range).days
87-
nb_requests = time_diff_orig // time_diff
8875
self.notify(
8976
GeneratorState(
9077
self.identifier,
9178
message=(
9279
f"Open dataset {self.identifier!r} "
93-
f"{progress / nb_requests * 100:.0f}%."
80+
f"time range: {open_params['time_range']}"
9481
),
9582
)
9683
)
97-
return ds, progress
84+
return ds
9885

9986
except Exception:
10087
# Split the request into two halves
@@ -121,22 +108,20 @@ def _open_with_split(
121108
),
122109
datetime.datetime.strftime(end, "%Y-%m-%d"),
123110
)
124-
left, progress = self._open_with_split(
111+
left = self._open_with_split(
125112
data_id,
126113
time_range_left,
127114
open_params,
128115
original_time_range,
129-
progress=progress,
130116
)
131-
right, progress = self._open_with_split(
117+
right = self._open_with_split(
132118
data_id,
133119
time_range_right,
134120
open_params,
135121
original_time_range,
136-
progress=progress,
137122
)
138123

139-
return xr.concat((left, right), dim="time"), progress
124+
return xr.concat((left, right), dim="time")
140125

141126

142127
def get_timedelta(time_range: tuple[str, str]) -> datetime.timedelta:

xcube_multistore/accessors/stac.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from xcube_multistore.visualization import GeneratorState
3030

3131
_NB_PIXELS = int(2e4 * 2e4) * 5 * 4
32+
_MAX_DAYS = 100
33+
_NUM_SEN2_BANDS = 13
3234

3335

3436
class StacAccessor(Accessor):
@@ -68,7 +70,7 @@ def _split_time_range(open_params: dict):
6870
if "asset_names" in open_params:
6971
nb_vars = len(open_params["asset_names"])
7072
else:
71-
nb_vars = 13
73+
nb_vars = _NUM_SEN2_BANDS
7274
start, end = open_params["time_range"]
7375
start = datetime.date.fromisoformat(start)
7476
end = datetime.date.fromisoformat(end)
@@ -93,9 +95,9 @@ def _split_time_range(open_params: dict):
9395
nb_splits = 1
9496

9597
base = total_days // nb_splits
96-
if base > 365:
97-
base = 365
98-
nb_splits = total_days // 365
98+
if base > _MAX_DAYS:
99+
base = _MAX_DAYS
100+
nb_splits = total_days // _MAX_DAYS
99101
remainder = total_days % nb_splits
100102
time_ranges = []
101103
current = start

xcube_multistore/multistore.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class MultiSourceDataStore:
8181
def __init__(self, config: str | dict[str, Any]):
8282
config = MultiSourceConfig(config)
8383
self.config = config
84-
self.stores = None
84+
self.stores = DataStores.setup_data_stores(self.config)
8585
if config.grid_mappings:
8686
self._grid_mappings = GridMappings.setup_grid_mappings(config)
8787
else:
@@ -96,7 +96,6 @@ def __init__(self, config: str | dict[str, Any]):
9696
self._display = GeneratorDisplay.create(list(self._states.values()))
9797

9898
def generate(self):
99-
self.stores = DataStores.setup_data_stores(self.config)
10099
# preload data, which is not preloaded as default
101100
if self.config.preload_datasets is not None:
102101
self._preload_datasets()
@@ -125,7 +124,6 @@ def display_config(self):
125124
display.show()
126125

127126
def display_geolocations(self):
128-
self.stores = DataStores.setup_data_stores(self.config)
129127
records = []
130128

131129
for config_ds in self.config.datasets.values():
@@ -673,16 +671,21 @@ def _write_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset | Exception
673671
format_id = config.get("format_id", "zarr")
674672
if format_id == "netcdf":
675673
ds = prepare_dataset_for_netcdf(ds)
674+
675+
# unify chunksize
676+
ds = ds.unify_chunks()
676677
chunksize = config.get("chunksize")
677-
if not chunksize:
678-
chunksize = ds.chunksizes
679-
if format_id in ["zarr", "levels"]:
680-
ds = chunk_dataset(ds, format_name="zarr", chunk_sizes=chunksize)
681-
else:
682-
ds = chunk_dataset(ds, format_name=format_id, chunk_sizes=chunksize)
683-
for data_var in ds.data_vars:
684-
if "chunks" in ds[data_var].encoding:
685-
del ds[data_var].encoding["chunks"]
678+
if chunksize is None:
679+
chunksize = {
680+
dim: sizes[0] for dim, sizes in getattr(ds, "chunksizes", {}).items()
681+
}
682+
if chunksize:
683+
# Select format name for chunking
684+
format_name = "zarr" if format_id in ["zarr", "levels"] else format_id
685+
ds = chunk_dataset(ds, format_name=format_name, chunk_sizes=chunksize)
686+
# Remove "chunks" from encoding to avoid serialization issues
687+
for var in ds.data_vars:
688+
ds[var].encoding.pop("chunks", None)
686689

687690
data_id = _get_data_id(config)
688691
ds = clean_dataset(ds)

xcube_multistore/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def clean_dataset(ds: xr.Dataset, gm: GridMapping | None = None) -> xr.Dataset:
127127
128128
Args:
129129
ds: The input xarray dataset to be cleaned.
130+
gm: grid mapping associated with ds
130131
131132
Returns:
132133
A cleaned version of the dataset with boundary variables removed and grid

0 commit comments

Comments
 (0)