ihmeuw
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/index.md‎
Lines changed: 62 additions & 1 deletion b/‎docs/index.md‎
Lines changed: 62 additions & 1 deletion
diff --git a/‎poetry.lock‎
Lines changed: 27 additions & 1 deletion b/‎poetry.lock‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/climate_data/data.py‎
Lines changed: 4 additions & 1 deletion b/‎src/climate_data/data.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/climate_data/extract/cmip6.py‎
Lines changed: 3 additions & 3 deletions b/‎src/climate_data/extract/cmip6.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/climate_data/generate/scenario_annual.py‎
Lines changed: 10 additions & 0 deletions b/‎src/climate_data/generate/scenario_annual.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/climate_data/generate/scenario_daily.py‎
Lines changed: 23 additions & 17 deletions b/‎src/climate_data/generate/scenario_daily.py‎
Lines changed: 23 additions & 17 deletions
diff --git a/‎src/climate_data/generate/scenario_inclusion.py‎
Lines changed: 3 additions & 3 deletions b/‎src/climate_data/generate/scenario_inclusion.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/climate_data/generate/supplementary_data/dengue_suitability.parquet‎
7.34 KB b/‎src/climate_data/generate/supplementary_data/dengue_suitability.parquet‎
7.34 KB
@@ -38,7 +38,7 @@ Instructions using conda:
 
     ```sh
     pip install poetry
-    cd climate-data 
+    cd climate-data
     poetry install
     ```
 
 
@@ -1 +1,62 @@
---8<-- "README.md"
+# Climate Data
+
+This package contains pipelines and utilities to systematically extract, format, and downscale
+data from ERA5 climate models and CMIP6 climate forecasts.
+
+## Developer Installation
+
+Instructions using [`conda`](https://docs.anaconda.com/miniconda/):
+
+1. Clone this repository.
+
+    Over ssh:
+    ```sh
+    git clone git@github.com:ihmeuw/climate-data.git
+    ```
+
+    Over https:
+    ```sh
+    git clone https://github.com/ihmeuw/climate-data.git
+    ```
+
+2. Create a new conda environment.
+
+    ```sh
+    conda create -n climate-data python=3.12
+    conda activate climate-data
+    ```
+
+3. Install `poetry` and the project dependencies.
+
+    ```sh
+    pip install poetry
+    cd climate-data
+    poetry install
+    ```
+
+### Pre-commit
+
+[`pre-commit`](https://pre-commit.com/) hooks run all the auto-formatting (`ruff format`),
+linters (e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is
+in good shape before a commit/push happens.
+
+You can install the hooks with (runs for each commit):
+
+```sh
+pre-commit install
+```
+
+Or if you want them to run only for each push:
+
+```sh
+pre-commit install -t pre-push
+```
+
+Or if you want e.g. want to run all checks manually for all files:
+
+```sh
+poetry run pre-commit run --all-files
+```
+
+`pre-commit` is configured in the `.pre-commit-config.yaml` file in the repository root.
+All auto-formatting, linting, and other tooling is configured in the `pyproject.toml` file.
@@ -50,6 +50,7 @@ gcsfs = "^2024.6.0"
 zarr = "^2.18.2"
 types-pyyaml = "^6.0.12.20240311"
 dask = "^2024.5.2"
+pandas-stubs = "^2.2.3.241009"
 
 [tool.poetry.group.dev.dependencies]
 mkdocstrings = {version = ">=0.23", extras = ["python"]}
@@ -103,6 +104,7 @@ ignore = [
     "PD010",    # I like stack and unstack
     "FBT001",   # Boolean positional args are super common in clis
     "FBT002",   # Boolean positional args are super common in clis
+    "PD901",    # Generic df names are fine
 ]
 
 [tool.ruff.lint.per-file-ignores]
 
@@ -37,7 +37,10 @@ def extracted_era5(self) -> Path:
     def extracted_era5_path(
         self, dataset: str, variable: str, year: int | str, month: str
     ) -> Path:
-        return self.extracted_era5 / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc"
+        return (
+            self.extracted_era5
+            / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc"
+        )
 
     @property
     def extracted_cmip6(self) -> Path:
 
@@ -36,7 +36,7 @@ def extract_cmip6_main(
     cmip6_source: str,
     cmip6_experiment: str,
     cmip6_variable: str,
-    overwrite: bool,  # noqa: FBT001
+    overwrite: bool,
 ) -> None:
     print(f"Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}")
     cd_data = ClimateDownscaleData(output_dir)
@@ -101,7 +101,7 @@ def extract_cmip6_task(
     cmip6_source: str,
     cmip6_experiment: str,
     cmip6_variable: str,
-    overwrite: bool,  # noqa: FBT001
+    overwrite: bool,
 ) -> None:
     extract_cmip6_main(
         output_dir, cmip6_source, cmip6_experiment, cmip6_variable, overwrite
@@ -121,7 +121,7 @@ def extract_cmip6(
     cmip6_experiment: str,
     cmip6_variable: str,
     queue: str,
-    overwrite: bool,  # noqa: FBT001
+    overwrite: bool,
 ) -> None:
     sources = (
         clio.VALID_CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source]
 
@@ -52,6 +52,16 @@
         )
         for lower, upper in BETWEEN_TEMP_THRESHOLDS
     },
+    **{
+        f"{disease}_suitability": utils.Transform(
+            source_variables=["mean_temperature"],
+            transform_funcs=[
+                utils.map_suitability(disease),
+                utils.annual_sum,
+            ],
+        )
+        for disease in ["malaria", "dengue"]
+    },
     "mean_heat_index": utils.Transform(
         source_variables=["heat_index"],
         transform_funcs=[utils.annual_mean],
 
@@ -1,5 +1,5 @@
-from collections import defaultdict
 import itertools
+from collections import defaultdict
 from pathlib import Path
 
 import click
@@ -90,10 +90,12 @@ def get_source_paths(
     source_paths = defaultdict(list)
     for source, variant in inclusion_meta.index.tolist():
         source_paths[source].append(
-            [cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant) 
-             for v in source_variables]
-        )    
-    
+            [
+                cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant)
+                for v in source_variables
+            ]
+        )
+
     return source_paths
 
 
@@ -152,7 +154,7 @@ def compute_anomaly(
     return anomaly
 
 
-def generate_scenario_daily_main(  # noqa: PLR0912
+def generate_scenario_daily_main(  # noqa: PLR0912, PLR0915, C901
     output_dir: str | Path,
     year: str | int,
     target_variable: str,
@@ -177,7 +179,7 @@ def generate_scenario_daily_main(  # noqa: PLR0912
         sid = f"Source {i+1}/{len(source_paths)}: {source}"
 
         source_anomalies: dict[str, tuple[int, xr.Dataset]] = {}
-        for j, vps in enumerate(variant_paths):            
+        for j, vps in enumerate(variant_paths):
             vid = f"{sid}, Variant {j+1}/{len(variant_paths)}: {vps[0].stem.split('_')[-1]}"
             try:
                 print(f"{vid}: Loading reference")
@@ -187,20 +189,20 @@ def generate_scenario_daily_main(  # noqa: PLR0912
             except KeyError:
                 print(f"{vid}: Bad formatting, skipping...")
                 continue
-            
+
             print(f"{vid}: computing anomaly")
             v_anomaly = compute_anomaly(sref, target, anomaly_type)
-            
+
             key = f"{len(v_anomaly.latitude)}_{len(v_anomaly.longitude)}"
 
             if key in source_anomalies:
                 old_count, old_anomaly = source_anomalies[key]
-                
+
                 for coord in ["latitude", "longitude"]:
                     old_c = old_anomaly[coord].to_numpy()
                     new_c = v_anomaly[coord].to_numpy()
                     tol = 1e-5
-                    
+
                     if np.abs(old_c - new_c).max() < tol:
                         v_anomaly = v_anomaly.assign({coord: old_c})
                     else:
@@ -212,7 +214,7 @@ def generate_scenario_daily_main(  # noqa: PLR0912
         if source_anomalies:
             anomalies[source] = source_anomalies
 
-    ensemble_anomaly = xr.Dataset()    
+    ensemble_anomaly = xr.Dataset()
     for i, (source, source_anomalies) in enumerate(anomalies.items()):
         sid = f"Source {i+1}/{len(source_paths)}: {source}"
         print(f"Downscaling {i+1}/{len(anomalies)}: {source}")
@@ -222,19 +224,23 @@ def generate_scenario_daily_main(  # noqa: PLR0912
         for j, (res, (count, v_anomaly)) in enumerate(source_anomalies.items()):
             res_id = f"{sid}, Resolution {j} / {len(source_anomalies)}: {res}"
             print(f"Downscaling {res_id}")
-        
+
             if source_ensemble_anomaly.nbytes:
-                source_ensemble_anomaly += utils.interpolate_to_target_latlon(v_anomaly, method="linear")
+                source_ensemble_anomaly += utils.interpolate_to_target_latlon(
+                    v_anomaly, method="linear"
+                )
             else:
-                source_ensemble_anomaly = utils.interpolate_to_target_latlon(v_anomaly, method="linear")
+                source_ensemble_anomaly = utils.interpolate_to_target_latlon(
+                    v_anomaly, method="linear"
+                )
             total_count += count
         source_ensemble_anomaly /= total_count
 
         if ensemble_anomaly.nbytes:
             ensemble_anomaly += source_ensemble_anomaly
         else:
             ensemble_anomaly = source_ensemble_anomaly
-    
+
     ensemble_anomaly /= len(anomalies)
 
     print("Computing scenario data")
@@ -277,7 +283,7 @@ def generate_scenario_daily(
     target_variable: str,
     cmip6_experiment: str,
     queue: str,
-    overwrite: bool,  # noqa: FBT001
+    overwrite: bool,
 ) -> None:
     cd_data = ClimateDownscaleData(output_dir)
 
 
@@ -57,9 +57,9 @@ def generate_scenario_inclusion_main(
     inclusion_df["include"] = inclusion_df.valid_scenarios == 5  # noqa: PLR2004
     inclusion_df = (
         inclusion_df.loc[inclusion_df.include]
-        .set_index(['source', 'variant', 'variable']).include
-        .unstack()
-        .fillna(False)
+        .set_index(["source", "variant", "variable"])
+        .include.unstack()
+        .fillna(value=False)
     )
 
     cd_data.save_scenario_metadata(meta_df)