Merge remote-tracking branch 'origin/develop' into feature/get_datase…

…t_file # Conflicts: # CHANGELOG.md
CLIMADA-project · Dec 13, 2023 · 6881938 · 6881938
2 parents 5bdca65 + e81249f
commit 6881938
Show file tree

Hide file tree

Showing 7 changed files with 143 additions and 164 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ Code freeze date: YYYY-MM-DD
 ### Added
 
 - Convenience method `api_client.Client.get_dataset_file`, combining `get_dataset_info` and `download_dataset`, returning a single file objet. [#821](https://github.com/CLIMADA-project/climada_python/pull/821)
+- Read and Write methods to and from csv files for the `DiscRates` class. [#818](ttps://github.com/CLIMADA-project/climada_python/pull/818)
 
 ### Changed
 
@@ -25,6 +26,7 @@ Code freeze date: YYYY-MM-DD
 
 - `Hazard.from_xarray_raster` now stores strings as default values for `Hazard.event_name` [#795](https://github.com/CLIMADA-project/climada_python/pull/795)
 - Fix the dist_approx util function when used with method="geosphere" and log=True and points that are very close. [#792](https://github.com/CLIMADA-project/climada_python/pull/792)
+- `climada.util.yearsets.sample_from_poisson`: fix a bug ([#819](https://github.com/CLIMADA-project/climada_python/issues/819)) and inconsistency that occurs when lambda events per year (`lam`) are set to 1. [[#823](https://github.com/CLIMADA-project/climada_python/pull/823)]
 
 ### Deprecated
 

diff --git a/climada/entity/disc_rates/base.py b/climada/entity/disc_rates/base.py
@@ -259,9 +259,11 @@ def from_mat(cls, file_name, var_names=None):
         return cls(years=years, rates=rates)
 
     def read_mat(self, *args, **kwargs):
-        """This function is deprecated, use DiscRates.from_mats instead."""
-        LOGGER.warning("The use of DiscRates.read_mats is deprecated."
-                       "Use DiscRates.from_mats instead.")
+        """This function is deprecated, use ``DiscRates.from_mat`` instead."""
+        LOGGER.warning(
+            "The use of DiscRates.read_mat is deprecated."
+            "Use DiscRates.from_mat instead."
+        )
         self.__dict__ = DiscRates.from_mat(*args, **kwargs).__dict__
 
     @classmethod
@@ -307,8 +309,7 @@ def read_excel(self, *args, **kwargs):
         """This function is deprecated, use DiscRates.from_excel instead."""
         LOGGER.warning("The use of DiscRates.read_excel is deprecated."
                        "Use DiscRates.from_excel instead.")
-        self.__dict__ = DiscRates.from_mat(*args, **kwargs).__dict__
-
+        self.__dict__ = DiscRates.from_excel(*args, **kwargs).__dict__
 
     def write_excel(self, file_name, var_names=None):
         """
@@ -341,3 +342,68 @@ def write_excel(self, file_name, var_names=None):
             disc_ws.write(i_yr, 0, disc_yr)
             disc_ws.write(i_yr, 1, disc_rt)
         disc_wb.close()
+
+    @classmethod
+    def from_csv(
+        cls, file_name, year_column="year", disc_column="discount_rate", **kwargs
+    ):
+        """
+        Read DiscRate from a csv file following template and store variables.
+
+        Parameters
+        ----------
+        file_name: str
+            filename including path and extension
+        year_column: str, optional
+            name of the column that contains the years,
+            Default: "year"
+        disc_column: str, optional
+            name of the column that contains the discount rates,
+            Default: "discount_rate"
+        **kwargs:
+            any additional arguments, e.g., `sep`, `delimiter`, `head`,
+            are forwarded to ``pandas.read_csv``
+
+        Returns
+        -------
+        climada.entity.DiscRates :
+            The disc rates from the csv file
+        """
+        dfr = pd.read_csv(file_name, **kwargs)
+        try:
+            years = dfr[year_column].values.astype(int, copy=False)
+            rates = dfr[disc_column].values
+        except KeyError as err:
+            raise ValueError(
+                f"missing column in csv file ({year_column} or {disc_column})"
+            ) from err
+
+        return cls(years=years, rates=rates)
+
+    def write_csv(
+        self, file_name, year_column="year", disc_column="discount_rate", **kwargs
+    ):
+        """
+        Write DiscRate to a csv file following template and store variables.
+
+        Parameters
+        ----------
+        file_name: str
+            filename including path and extension
+        year_column: str, optional
+            name of the column that contains the years,
+            Default: "year"
+        disc_column: str, optional
+            name of the column that contains the discount rates,
+            Default: "discount_rate"
+        **kwargs:
+            any additional arguments, e.g., `sep`, `delimiter`, `head`,
+            are forwarded to ``pandas.read_csv``
+        """
+        dfr = pd.DataFrame(
+            {
+                year_column: self.years,
+                disc_column: self.rates,
+            }
+        )
+        dfr.to_csv(file_name, **kwargs)
diff --git a/climada/entity/disc_rates/test/test_base.py b/climada/entity/disc_rates/test/test_base.py
@@ -21,6 +21,8 @@
 import unittest
 import numpy as np
 import copy
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
 from climada import CONFIG
 from climada.entity.disc_rates.base import DiscRates
@@ -216,23 +218,46 @@ def test_demo_file_pass(self):
         self.assertEqual(disc_rate.rates.max(), 0.02)
 
 
-class TestWriter(unittest.TestCase):
-    """Test excel reader for discount rates"""
+class TestWriteRead(unittest.TestCase):
+    """Test file write read cycle for discount rates"""
+
+    @classmethod
+    def setUpClass(cls):
+        cls._td = TemporaryDirectory()
+        cls.tempdir = Path(cls._td.name)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._td.cleanup()
 
-    def test_write_read_pass(self):
+    def test_write_read_excel_pass(self):
         """Read demo excel file."""
         years = np.arange(1950, 2150)
         rates = np.ones(years.size) * 0.03
         disc_rate = DiscRates(years=years, rates=rates)
 
-        file_name = CONFIG.disc_rates.test_data.dir().joinpath('test_disc.xlsx')
+        file_name = self.tempdir.joinpath('test_disc.xlsx')
         disc_rate.write_excel(file_name)
 
         disc_read = DiscRates.from_excel(file_name)
 
         self.assertTrue(np.array_equal(disc_read.years, disc_rate.years))
         self.assertTrue(np.array_equal(disc_read.rates, disc_rate.rates))
 
+    def test_write_read_csv_pass(self):
+        """Write and read csv file."""
+        years = np.arange(1950, 2150)
+        rates = np.ones(years.size) * 0.03
+        disc_rate = DiscRates(years=years, rates=rates)
+
+        file_name = self.tempdir.joinpath('test_disc.csv')
+        disc_rate.write_csv(file_name)
+
+        disc_read = DiscRates.from_csv(file_name)
+
+        self.assertTrue(np.array_equal(disc_read.years, disc_rate.years))
+        self.assertTrue(np.array_equal(disc_read.rates, disc_rate.rates))
+
 
 # Execute Tests
 if __name__ == "__main__":
@@ -243,5 +268,5 @@ def test_write_read_pass(self):
     TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestNetPresValue))
     TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestReaderExcel))
     TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestReaderMat))
-    TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriter))
+    TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriteRead))
     unittest.TextTestRunner(verbosity=2).run(TESTS)
diff --git a/climada/hazard/test/test_base_xarray.py b/climada/hazard/test/test_base_xarray.py
@@ -359,19 +359,14 @@ def test_missing_dims(self):
 
             # Now drop variable altogether, should raise an error
             ds = ds.drop_vars("time")
-            with self.assertRaises(RuntimeError) as cm:
+            with self.assertRaisesRegex(RuntimeError, "time"):
                 Hazard.from_xarray_raster(ds, "", "")
-            self.assertIn(
-                "Dataset is missing dimension/coordinate: time", str(cm.exception)
-            )
 
             # Expand time again
             ds = ds.expand_dims(time=[np.datetime64("2022-01-01")])
             hazard = Hazard.from_xarray_raster(ds, "", "")
             self._assert_default_types(hazard)
-            np.testing.assert_array_equal(
-                hazard.event_name, ["2022-01-01"]
-            )
+            np.testing.assert_array_equal(hazard.event_name, ["2022-01-01"])
             np.testing.assert_array_equal(
                 hazard.date, [dt.datetime(2022, 1, 1).toordinal()]
             )
@@ -568,17 +563,13 @@ def test_errors(self):
         self.assertIn("Unknown coordinates passed: '['bar']'.", str(cm.exception))
 
         # Correctly specified, but the custom dimension does not exist
-        with self.assertRaises(RuntimeError) as cm:
+        with self.assertRaisesRegex(RuntimeError, "lalalatitude"):
             Hazard.from_xarray_raster_file(
                 self.netcdf_path,
                 "",
                 "",
                 coordinate_vars=dict(latitude="lalalatitude"),
             )
-        self.assertIn(
-            "Dataset is missing dimension/coordinate: lalalatitude.", str(cm.exception)
-        )
-
 
 # Execute Tests
 if __name__ == "__main__":

diff --git a/climada/util/test/test_yearsets.py b/climada/util/test/test_yearsets.py
@@ -60,11 +60,14 @@ def test_impact_yearset_sampling_vect(self):
     def test_sample_from_poisson(self):
         """Test sampling amount of events per year."""
         n_sample_years = 1000
-        lam = np.sum(IMP.frequency)
-        events_per_year = yearsets.sample_from_poisson(n_sample_years, lam)
-
-        self.assertEqual(events_per_year.size, n_sample_years)
-        self.assertAlmostEqual(np.round(np.mean(events_per_year)), 2)
+        for lam in [0, 1, 2.5]:
+            events_per_year = yearsets.sample_from_poisson(n_sample_years, lam, seed=1)
+
+            self.assertEqual(events_per_year.size, n_sample_years)
+            self.assertAlmostEqual(np.mean(events_per_year), lam, places=1)
+
+        self.assertRaises(TypeError, yearsets.sample_from_poisson, n_sample_years, None)
+        self.assertRaises(ValueError, yearsets.sample_from_poisson, n_sample_years, -1)
 
     def test_sample_events(self):
         """Test the sampling of 34 events out of a pool of 20 events."""

diff --git a/climada/util/yearsets.py b/climada/util/yearsets.py
@@ -152,7 +152,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
     -----------
         n_sampled_years : int
             The target number of years the impact yearset shall contain.
-        lam: int
+        lam: float
             the applied Poisson distribution is centered around lambda events per year
         seed : int, optional
             seed for numpy.random, will be set if not None
@@ -165,14 +165,8 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
     """
     if seed is not None:
         np.random.seed(seed)
-    if lam != 1:
-        events_per_year = np.round(np.random.poisson(lam=lam,
-                                                     size=n_sampled_years)).astype('int')
-    else:
-        events_per_year = np.ones(len(n_sampled_years))
-
+    return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype('int')
 
-    return events_per_year
 
 def sample_events(events_per_year, freqs_orig, seed=None):
     """Sample events uniformely from an array (indices_orig) without replacement