ready for review

konstntokas · konstntokas · commit d868b2a56ba9 · 2025-12-12T13:49:56.000+01:00
diff --git a/README.md b/README.md
@@ -16,4 +16,4 @@
 xcube Multi-Source Data Store: Seamlessly Integrating and Harmonizing Data from 
 Multiple Sources.
 
-Find out more in the [xcube Multi-Source Data Store Documentation](https://xcube-dev.github.io/xcube-multistore/).
+Find out more in the [xcube Multi-Source Data Store Documentation](https://xcube-dev.github.io/xcube-multistore/).
diff --git a/docs/index.md b/docs/index.md
@@ -62,13 +62,34 @@ For further examples please view the [examples folder](https://github.com/xcube-
 * support preload API for [xcube-clms](https://github.com/xcube-dev/xcube-clms) and 
   [xcube-zendoo](https://github.com/xcube-dev/xcube-zenodo)
 * allow to write to netcdf and zarr
+* some auxiliary functionalities which shall help to setup a config YAML file.
+* interpolate along the time axis
 
-> The following features will be implemented in the future:
+### Configuration Generator GUI
 
-* some auxiliary functionalities which shall help to setup a config YAML file. 
-* interpolate along the time axis 
+The **Configuration Generator GUI** provides an interactive interface for creating and
+editing the configuration YAML, making the setup process more intuitive and less
+error-prone.
 
-### License
+**Key features (in development):**
+
+- Display of all available fields for each configuration section
+- Dynamic fetching and updating of valid parameters and inputs
+- Dropdown menus that show only supported options
+- Autofill assistance for large option sets (e.g., thousands of data IDs)
+- Built-in configuration validator/checker
+- Geolocation visualization to help define bounding boxes
+
+> **Note:** This feature is under active development, and only a minimal working
+> example is currently available.
+
+To launch the GUI, run the following command from the package root:
+
+```bash
+panel serve xcube_multistore/gui/app.py --dev
+```
+
+## License
 
 The package is open source and released under the 
 [MIT license](https://opensource.org/license/mit). :heart:
diff --git a/examples/debug.py b/examples/debug.py
diff --git a/xcube_multistore/accessors/cds.py b/xcube_multistore/accessors/cds.py
@@ -38,13 +38,7 @@ def open_data(self, data_id: str, **open_params) -> xr.Dataset:
             open_params = self._convert_point_to_bbox(data_id, open_params)
             point = open_params.pop("point")
         time_range = open_params.pop("time_range")
-        self.notify(
-            GeneratorState(
-                self.identifier,
-                message=f"Open dataset {self.identifier!r} 0%.",
-            )
-        )
-        ds, _ = self._open_with_split(data_id, time_range, open_params, time_range)
+        ds = self._open_with_split(data_id, time_range, open_params, time_range)
 
         if time_series:
             # noinspection PyUnboundLocalVariable
@@ -70,31 +64,24 @@ def _open_with_split(
         time_range: tuple[str, str],
         open_params: dict,
         original_time_range: tuple[str, str],
-        progress: int | None = None,
-    ) -> tuple[xr.Dataset, int]:
+    ) -> xr.Dataset:
         """
         Recursively fetch data by splitting time_range into smaller ranges
         until store.open_data() succeeds.
         """
-        if progress is None:
-            progress = 0
         try:
             open_params["time_range"] = time_range
             ds = self.store.open_data(data_id, **open_params)
-            progress += 1
-            time_diff = get_timedelta(open_params["time_range"]).days
-            time_diff_orig = get_timedelta(original_time_range).days
-            nb_requests = time_diff_orig // time_diff
             self.notify(
                 GeneratorState(
                     self.identifier,
                     message=(
                         f"Open dataset {self.identifier!r} "
-                        f"{progress / nb_requests * 100:.0f}%."
+                        f"time range: {open_params['time_range']}"
                     ),
                 )
             )
-            return ds, progress
+            return ds
 
         except Exception:
             # Split the request into two halves
@@ -121,22 +108,20 @@ def _open_with_split(
                 ),
                 datetime.datetime.strftime(end, "%Y-%m-%d"),
             )
-            left, progress = self._open_with_split(
+            left = self._open_with_split(
                 data_id,
                 time_range_left,
                 open_params,
                 original_time_range,
-                progress=progress,
             )
-            right, progress = self._open_with_split(
+            right = self._open_with_split(
                 data_id,
                 time_range_right,
                 open_params,
                 original_time_range,
-                progress=progress,
             )
 
-            return xr.concat((left, right), dim="time"), progress
+            return xr.concat((left, right), dim="time")
 
 
 def get_timedelta(time_range: tuple[str, str]) -> datetime.timedelta:
diff --git a/xcube_multistore/accessors/stac.py b/xcube_multistore/accessors/stac.py
@@ -29,6 +29,8 @@
 from xcube_multistore.visualization import GeneratorState
 
 _NB_PIXELS = int(2e4 * 2e4) * 5 * 4
+_MAX_DAYS = 100
+_NUM_SEN2_BANDS = 13
 
 
 class StacAccessor(Accessor):
@@ -68,7 +70,7 @@ def _split_time_range(open_params: dict):
         if "asset_names" in open_params:
             nb_vars = len(open_params["asset_names"])
         else:
-            nb_vars = 13
+            nb_vars = _NUM_SEN2_BANDS
         start, end = open_params["time_range"]
         start = datetime.date.fromisoformat(start)
         end = datetime.date.fromisoformat(end)
@@ -93,9 +95,9 @@ def _split_time_range(open_params: dict):
             nb_splits = 1
 
         base = total_days // nb_splits
-        if base > 365:
-            base = 365
-            nb_splits = total_days // 365
+        if base > _MAX_DAYS:
+            base = _MAX_DAYS
+            nb_splits = total_days // _MAX_DAYS
         remainder = total_days % nb_splits
         time_ranges = []
         current = start
diff --git a/xcube_multistore/multistore.py b/xcube_multistore/multistore.py
@@ -81,7 +81,7 @@ class MultiSourceDataStore:
     def __init__(self, config: str | dict[str, Any]):
         config = MultiSourceConfig(config)
         self.config = config
-        self.stores = None
+        self.stores = DataStores.setup_data_stores(self.config)
         if config.grid_mappings:
             self._grid_mappings = GridMappings.setup_grid_mappings(config)
         else:
@@ -96,7 +96,6 @@ def __init__(self, config: str | dict[str, Any]):
             self._display = GeneratorDisplay.create(list(self._states.values()))
 
     def generate(self):
-        self.stores = DataStores.setup_data_stores(self.config)
         # preload data, which is not preloaded as default
         if self.config.preload_datasets is not None:
             self._preload_datasets()
@@ -125,7 +124,6 @@ def display_config(self):
         display.show()
 
     def display_geolocations(self):
-        self.stores = DataStores.setup_data_stores(self.config)
         records = []
 
         for config_ds in self.config.datasets.values():
@@ -673,16 +671,21 @@ def _write_dataset(self, ds: xr.Dataset, config: dict) -> xr.Dataset | Exception
         format_id = config.get("format_id", "zarr")
         if format_id == "netcdf":
             ds = prepare_dataset_for_netcdf(ds)
+
+        # unify chunksize
+        ds = ds.unify_chunks()
         chunksize = config.get("chunksize")
-        if not chunksize:
-            chunksize = ds.chunksizes
-        if format_id in ["zarr", "levels"]:
-            ds = chunk_dataset(ds, format_name="zarr", chunk_sizes=chunksize)
-        else:
-            ds = chunk_dataset(ds, format_name=format_id, chunk_sizes=chunksize)
-        for data_var in ds.data_vars:
-            if "chunks" in ds[data_var].encoding:
-                del ds[data_var].encoding["chunks"]
+        if chunksize is None:
+            chunksize = {
+                dim: sizes[0] for dim, sizes in getattr(ds, "chunksizes", {}).items()
+            }
+        if chunksize:
+            # Select format name for chunking
+            format_name = "zarr" if format_id in ["zarr", "levels"] else format_id
+            ds = chunk_dataset(ds, format_name=format_name, chunk_sizes=chunksize)
+            # Remove "chunks" from encoding to avoid serialization issues
+            for var in ds.data_vars:
+                ds[var].encoding.pop("chunks", None)
 
         data_id = _get_data_id(config)
         ds = clean_dataset(ds)
diff --git a/xcube_multistore/utils.py b/xcube_multistore/utils.py
@@ -127,6 +127,7 @@ def clean_dataset(ds: xr.Dataset, gm: GridMapping | None = None) -> xr.Dataset:
 
     Args:
         ds: The input xarray dataset to be cleaned.
+        gm: grid mapping associated with ds
 
     Returns:
         A cleaned version of the dataset with boundary variables removed and grid