Add compute_fl_diagnostics_quantiles (OGGM#1746)

pat-schmitt · web-flow · commit 5ac2eea127bb · 2024-12-30T09:22:40.000+01:00
* adapt ranking of pixels for elevation bands

* add compute_fl_diagnostics_quantiles with tests

* pep8

* add whats-new
diff --git a/docs/whats-new.rst b/docs/whats-new.rst
@@ -9,6 +9,18 @@ v1.6.x (not released)
 Enhancements
 ~~~~~~~~~~~~
 
+- Added ``tasks.compute_fl_diagnostics_quantiles``, this task is designed to
+  calculate quantiles from multiple fl_diagnostic files. It enables users to
+  compute metrics such as the median flowline across various GCM projections
+  (:pull:`1746`).
+  By `Patrick Schmitt <https://github.com/pat-schmitt>`_
+- New ranking options for the ``distribute.assign_points_to_band`` task, which
+  determines the order in which pixels "melt away" when redistributing flowline
+  model runs into 2D for visualization purposes. It is now possible to combine
+  different variables from ``gridded_data`` (e.g. ``slope``,
+  ``dis_from_border``, ...) to define this ranking, providing greater flexibility
+  and control over how the melting sequence is visualized (:pull:`1746`).
+  By `Patrick Schmitt <https://github.com/pat-schmitt>`_
 
 Bug fixes
 ~~~~~~~~~
diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py
@@ -4592,3 +4592,80 @@ def clean_merged_flowlines(gdir, buffer=None):
 
     # Finally write the flowlines
     gdir.write_pickle(allfls, 'model_flowlines')
+
+
+@entity_task(log)
+def compute_fl_diagnostics_quantiles(gdir,
+                                     input_filesuffixes,
+                                     quantiles=0.5,
+                                     output_filesuffix='_median',
+                                     ):
+    """Compute quantile fl_diagnostics (e.g. median flowline of projections)
+
+    This function takes a number of fl_diagnostic files and compute out of them
+    quantiles. This could be useful for example for calculating a median
+    flowline for different gcm projections which use the same scenario
+    (e.g. ssp126).
+
+    Parameters
+    ----------
+    gdir : :py:class:`oggm.GlacierDirectory`
+        the glacier directory to process
+    input_filesuffixes : list
+        a list of fl_diagnostic filesuffixes which should be considered for
+        computing
+    quantiles : float or list
+        The quantiles to compute. Could be a float for a single quantile or a
+        list of floats for severel quantile calculations.
+        Default is 0.5 (the median).
+    output_filesuffix : str
+        The output_filesuffix of the resulting fl_diagnostics.
+        Default is '_median'.
+    """
+
+    # if quantiles is a list with one element, only use this
+    if isinstance(quantiles, list):
+        if len(quantiles) == 1:
+            quantiles = quantiles[0]
+
+    # get filepath of all fl_diagnostic files
+    all_fl_diag_paths = []
+    for filesuffix in input_filesuffixes:
+        all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics',
+                                                   filesuffix=filesuffix))
+
+    # assume all have the same number of flowlines, extract the base structure
+    # and number of fl_ids from the first file
+    with xr.open_dataset(all_fl_diag_paths[0]) as ds:
+        ds_base = ds.load()
+        fl_ids = ds.flowlines.data
+
+    # help function to open all fl_diag files as one with open_mfdataset
+    def add_filename_coord(ds):
+        filepath = ds.encoding["source"]
+        filename = os.path.basename(filepath).split(".")[0]
+        return ds.expand_dims({'filename': [filename]})
+
+    # now loop through all flowlines and save back in the same structure
+    fl_diag_dss = []
+    for fl_id in fl_ids:
+        with xr.open_mfdataset(all_fl_diag_paths,
+                               preprocess=lambda ds: add_filename_coord(ds),
+                               group=f'fl_{fl_id}') as ds_fl:
+            with warnings.catch_warnings():
+                # ignore warnings for NaNs
+                warnings.simplefilter("ignore", category=RuntimeWarning)
+                fl_diag_dss.append(ds_fl.load().quantile(quantiles,
+                                                         dim='filename'))
+
+    # for writing into the nice output structure
+    output_filepath = gdir.get_filepath('fl_diagnostics',
+                                        filesuffix=output_filesuffix)
+    encode = {}
+    for v in fl_diag_dss[0]:
+        encode[v] = {'zlib': True, 'complevel': 5}
+
+    ds_base.to_netcdf(output_filepath, 'w')
+    for fl_id, ds in zip(fl_ids, fl_diag_dss):
+        ds.to_netcdf(output_filepath, 'a', group='fl_{}'.format(fl_id),
+                     encoding=encode)
diff --git a/oggm/sandbox/distribute_2d.py b/oggm/sandbox/distribute_2d.py
@@ -99,7 +99,9 @@ def add_smoothed_glacier_topo(gdir, outline_offset=-40,
 
 @entity_task(log, writes=['gridded_data'])
 def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
-                          elevation_weight=1.003):
+                          ranking_variables=None,
+                          ranking_variables_weights=None,
+                          ):
     """Assigns glacier grid points to flowline elevation bands and ranks them.
 
     Creates two variables in gridded_data.nc:
@@ -123,18 +125,25 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
     topo_variable : str
         the topography to read from `gridded_data.nc` (could be smoothed, or
         smoothed differently).
-    elevation_weight : float
-        how much weight to give to the elevation of the grid point versus the
-        thickness. Arbitrary number, might be tuned differently.
+    ranking_variables : list
+        A list of distributed variables used for ranking pixels of each
+        elevation band. All variables inside of 'gridded_data' can be used (e.g.
+        'topo_smoothed', 'slope', 'dis_from_border', 'distributed_thickness',
+        'aspect'). Default is 'distributed_thickness'.
+    ranking_variables_weights : list
+        A list of weights, corresponding to the ranking_variables. Larger values
+        assign more weight to the corresponding ranking_variable. Must be in the
+        same order as ranking_variables! Default is [1].
     """
     # We need quite a few data from the gridded dataset
     with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
+        ds_grid = ds.load()
         topo_data = ds[topo_variable].data.copy()
         glacier_mask = ds.glacier_mask.data == 1
         topo_data_flat = topo_data[glacier_mask]
         band_index = topo_data * np.nan  # container
         per_band_rank = topo_data * np.nan  # container
-        distrib_thick = ds.distributed_thickness.data
+        weighting_per_pixel = topo_data * 0.  # container
 
     # For the flowline we need the model flowlines only
     fls = gdir.read_pickle('model_flowlines')
@@ -173,15 +182,36 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed',
     # assert np.nanmin(band_index) == 0
     assert np.all(np.isfinite(band_index[glacier_mask]))
 
-    # Ok now assign within band using ice thickness weighted by elevation
-    # We rank the pixels within one band by elevation, but also add
-    # a penalty is added to higher elevation grid points
-    min_alt = np.nanmin(topo_data)
-    weighted_thick = ((topo_data - min_alt + 1) * elevation_weight) * distrib_thick
+    # Ok now we rank the pixels within each band, which variables are considered
+    # can be defined by the user. All variables are normalized (between 0 and 1)
+    # and multiplied by a weight and summed up. If the weight is negative the
+    # normalized variable will be inverted (0 becomes 1 and vice versa).
+    if ranking_variables is None:
+        # the default variables to use
+        ranking_variables = ['distributed_thickness']
+    if ranking_variables_weights is None:
+        # the default weights to use
+        ranking_variables_weights = [1]
+    assert len(ranking_variables) == len(ranking_variables_weights)
+
+    # normalize all values to the range of 0 to 1
+    def min_max_normalization(data):
+        data = np.where(glacier_mask, data, np.nan)
+        return (data - np.nanmin(data)) / (np.nanmax(data) - np.nanmin(data))
+
+    for var, var_weight in zip(ranking_variables,
+                               ranking_variables_weights):
+        normalized_var = min_max_normalization(ds_grid[var].data)
+        # for negative weights we invert the normalized variable
+        # (smallest becomes largest and vice versa)
+        if var_weight < 0:
+            normalized_var = 1 - normalized_var
+        weighting_per_pixel += normalized_var * np.abs(var_weight)
+
+    # now rank the pixel of each band individually
     for band_id in np.unique(np.sort(band_index[glacier_mask])):
-        # We work per band here
         is_band = band_index == band_id
-        per_band_rank[is_band] = mstats.rankdata(weighted_thick[is_band])
+        per_band_rank[is_band] = mstats.rankdata(weighting_per_pixel[is_band])
 
     with ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc:
         vn = 'band_index'
diff --git a/oggm/tasks.py b/oggm/tasks.py
@@ -62,6 +62,7 @@
 from oggm.core.flowline import run_from_climate_data
 from oggm.core.flowline import run_constant_climate
 from oggm.core.flowline import run_with_hydro
+from oggm.core.flowline import compute_fl_diagnostics_quantiles
 from oggm.core.dynamic_spinup import run_dynamic_spinup
 from oggm.core.dynamic_spinup import run_dynamic_melt_f_calibration
 from oggm.utils import copy_to_basedir
diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py
@@ -3198,6 +3198,80 @@ def test_elevation_feedback(self, hef_gdir):
             plt.legend()
             plt.show()
 
+    @pytest.mark.slow
+    def test_fl_diag_quantiles(self, hef_gdir):
+        cfg.PARAMS['store_fl_diagnostics'] = True
+
+        # conduct three runs from which to calculate the quantiles
+        output_suffixes = ['_run1', '_run2', '_run3']
+        for i, output_suffix in enumerate(output_suffixes):
+            run_random_climate(hef_gdir, y0=1985-i*5, nyears=10,
+                               output_filesuffix=output_suffix, seed=i)
+
+        # only calculate the median
+        workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
+                                     hef_gdir,
+                                     input_filesuffixes=output_suffixes,
+                                     quantiles=0.5,
+                                     output_filesuffix='_median'
+                                     )
+
+        # calculate 5th and 95th quantiles together
+        workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles,
+                                     hef_gdir,
+                                     input_filesuffixes=output_suffixes,
+                                     quantiles=[0.05, 0.95],
+                                     output_filesuffix='_iqr'
+                                     )
+
+        ft = 'fl_diagnostics'
+        with xr.open_dataset(
+                hef_gdir.get_filepath(ft, filesuffix=output_suffixes[0])) as ds:
+            fl_ids = ds.flowlines.data
+
+        for fl_id in fl_ids:
+            # open data of current flowline
+            ds_runs = []
+            for output_suffix in output_suffixes:
+                fp = hef_gdir.get_filepath(ft, filesuffix=output_suffix)
+                with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                    ds_runs.append(ds.load())
+            fp = hef_gdir.get_filepath(ft, filesuffix='_median')
+            with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                ds_median = ds.load()
+            fp = hef_gdir.get_filepath(ft, filesuffix='_iqr')
+            with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds:
+                ds_iqr = ds.load()
+
+            # the median flowline should never be the smallest or largest
+            # value, compared to the values of the runs (as we have three runs)
+            variables_to_check = ['volume_m3', 'area_m2', 'thickness_m']
+            for var in variables_to_check:
+                var_das = []
+                for ds_run in ds_runs:
+                    var_das.append(ds_run[var])
+                var_stack = xr.concat(var_das, dim='runs')
+
+                var_min = var_stack.min(dim='runs')
+                var_max = var_stack.max(dim='runs')
+
+                var_median = ds_median[var]
+                is_median_equal_to_min = (var_median == var_min).any()
+                is_median_equal_to_max = (var_median == var_max).any()
+
+                assert is_median_equal_to_min
+                assert is_median_equal_to_max
+
+                # median should be larger/smaller than 5th/95th quantile
+                var_5th = ds_iqr.loc[{'quantile': 0.05}][var]
+                var_95th = ds_iqr.loc[{'quantile': 0.95}][var]
+
+                is_median_larger_than_5th_q = (var_median >= var_5th).all()
+                is_median_smaller_than_95th_q = (var_median <= var_95th).all()
+
+                assert is_median_larger_than_5th_q
+                assert is_median_smaller_than_95th_q
+
 
 @pytest.mark.usefixtures('with_class_wd')
 class TestDynamicSpinup: