Skip to content

Commit 710120d

Browse files
authored
Merge pull request #8 from ihmeuw/checkpoint
Fix linting and type checking
2 parents d622802 + a63f091 commit 710120d

File tree

13 files changed

+163
-708
lines changed

13 files changed

+163
-708
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Instructions using conda:
3838

3939
```sh
4040
pip install poetry
41-
cd climate-data
41+
cd climate-data
4242
poetry install
4343
```
4444

docs/index.md

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,62 @@
1-
--8<-- "README.md"
1+
# Climate Data
2+
3+
This package contains pipelines and utilities to systematically extract, format, and downscale
4+
data from ERA5 climate models and CMIP6 climate forecasts.
5+
6+
## Developer Installation
7+
8+
Instructions using [`conda`](https://docs.anaconda.com/miniconda/):
9+
10+
1. Clone this repository.
11+
12+
Over ssh:
13+
```sh
14+
git clone git@github.com:ihmeuw/climate-data.git
15+
```
16+
17+
Over https:
18+
```sh
19+
git clone https://github.com/ihmeuw/climate-data.git
20+
```
21+
22+
2. Create a new conda environment.
23+
24+
```sh
25+
conda create -n climate-data python=3.12
26+
conda activate climate-data
27+
```
28+
29+
3. Install `poetry` and the project dependencies.
30+
31+
```sh
32+
pip install poetry
33+
cd climate-data
34+
poetry install
35+
```
36+
37+
### Pre-commit
38+
39+
[`pre-commit`](https://pre-commit.com/) hooks run all the auto-formatting (`ruff format`),
40+
linters (e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is
41+
in good shape before a commit/push happens.
42+
43+
You can install the hooks with (runs for each commit):
44+
45+
```sh
46+
pre-commit install
47+
```
48+
49+
Or if you want them to run only for each push:
50+
51+
```sh
52+
pre-commit install -t pre-push
53+
```
54+
55+
Or if you want e.g. want to run all checks manually for all files:
56+
57+
```sh
58+
poetry run pre-commit run --all-files
59+
```
60+
61+
`pre-commit` is configured in the `.pre-commit-config.yaml` file in the repository root.
62+
All auto-formatting, linting, and other tooling is configured in the `pyproject.toml` file.

poetry.lock

Lines changed: 27 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ gcsfs = "^2024.6.0"
5050
zarr = "^2.18.2"
5151
types-pyyaml = "^6.0.12.20240311"
5252
dask = "^2024.5.2"
53+
pandas-stubs = "^2.2.3.241009"
5354

5455
[tool.poetry.group.dev.dependencies]
5556
mkdocstrings = {version = ">=0.23", extras = ["python"]}
@@ -103,6 +104,7 @@ ignore = [
103104
"PD010", # I like stack and unstack
104105
"FBT001", # Boolean positional args are super common in clis
105106
"FBT002", # Boolean positional args are super common in clis
107+
"PD901", # Generic df names are fine
106108
]
107109

108110
[tool.ruff.lint.per-file-ignores]

src/climate_data/data.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def extracted_era5(self) -> Path:
3737
def extracted_era5_path(
3838
self, dataset: str, variable: str, year: int | str, month: str
3939
) -> Path:
40-
return self.extracted_era5 / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc"
40+
return (
41+
self.extracted_era5
42+
/ f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc"
43+
)
4144

4245
@property
4346
def extracted_cmip6(self) -> Path:

src/climate_data/extract/cmip6.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def extract_cmip6_main(
3636
cmip6_source: str,
3737
cmip6_experiment: str,
3838
cmip6_variable: str,
39-
overwrite: bool, # noqa: FBT001
39+
overwrite: bool,
4040
) -> None:
4141
print(f"Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}")
4242
cd_data = ClimateDownscaleData(output_dir)
@@ -101,7 +101,7 @@ def extract_cmip6_task(
101101
cmip6_source: str,
102102
cmip6_experiment: str,
103103
cmip6_variable: str,
104-
overwrite: bool, # noqa: FBT001
104+
overwrite: bool,
105105
) -> None:
106106
extract_cmip6_main(
107107
output_dir, cmip6_source, cmip6_experiment, cmip6_variable, overwrite
@@ -121,7 +121,7 @@ def extract_cmip6(
121121
cmip6_experiment: str,
122122
cmip6_variable: str,
123123
queue: str,
124-
overwrite: bool, # noqa: FBT001
124+
overwrite: bool,
125125
) -> None:
126126
sources = (
127127
clio.VALID_CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source]

src/climate_data/generate/scenario_annual.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@
5252
)
5353
for lower, upper in BETWEEN_TEMP_THRESHOLDS
5454
},
55+
**{
56+
f"{disease}_suitability": utils.Transform(
57+
source_variables=["mean_temperature"],
58+
transform_funcs=[
59+
utils.map_suitability(disease),
60+
utils.annual_sum,
61+
],
62+
)
63+
for disease in ["malaria", "dengue"]
64+
},
5565
"mean_heat_index": utils.Transform(
5666
source_variables=["heat_index"],
5767
transform_funcs=[utils.annual_mean],

src/climate_data/generate/scenario_daily.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from collections import defaultdict
21
import itertools
2+
from collections import defaultdict
33
from pathlib import Path
44

55
import click
@@ -90,10 +90,12 @@ def get_source_paths(
9090
source_paths = defaultdict(list)
9191
for source, variant in inclusion_meta.index.tolist():
9292
source_paths[source].append(
93-
[cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant)
94-
for v in source_variables]
95-
)
96-
93+
[
94+
cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant)
95+
for v in source_variables
96+
]
97+
)
98+
9799
return source_paths
98100

99101

@@ -152,7 +154,7 @@ def compute_anomaly(
152154
return anomaly
153155

154156

155-
def generate_scenario_daily_main( # noqa: PLR0912
157+
def generate_scenario_daily_main( # noqa: PLR0912, PLR0915, C901
156158
output_dir: str | Path,
157159
year: str | int,
158160
target_variable: str,
@@ -177,7 +179,7 @@ def generate_scenario_daily_main( # noqa: PLR0912
177179
sid = f"Source {i+1}/{len(source_paths)}: {source}"
178180

179181
source_anomalies: dict[str, tuple[int, xr.Dataset]] = {}
180-
for j, vps in enumerate(variant_paths):
182+
for j, vps in enumerate(variant_paths):
181183
vid = f"{sid}, Variant {j+1}/{len(variant_paths)}: {vps[0].stem.split('_')[-1]}"
182184
try:
183185
print(f"{vid}: Loading reference")
@@ -187,20 +189,20 @@ def generate_scenario_daily_main( # noqa: PLR0912
187189
except KeyError:
188190
print(f"{vid}: Bad formatting, skipping...")
189191
continue
190-
192+
191193
print(f"{vid}: computing anomaly")
192194
v_anomaly = compute_anomaly(sref, target, anomaly_type)
193-
195+
194196
key = f"{len(v_anomaly.latitude)}_{len(v_anomaly.longitude)}"
195197

196198
if key in source_anomalies:
197199
old_count, old_anomaly = source_anomalies[key]
198-
200+
199201
for coord in ["latitude", "longitude"]:
200202
old_c = old_anomaly[coord].to_numpy()
201203
new_c = v_anomaly[coord].to_numpy()
202204
tol = 1e-5
203-
205+
204206
if np.abs(old_c - new_c).max() < tol:
205207
v_anomaly = v_anomaly.assign({coord: old_c})
206208
else:
@@ -212,7 +214,7 @@ def generate_scenario_daily_main( # noqa: PLR0912
212214
if source_anomalies:
213215
anomalies[source] = source_anomalies
214216

215-
ensemble_anomaly = xr.Dataset()
217+
ensemble_anomaly = xr.Dataset()
216218
for i, (source, source_anomalies) in enumerate(anomalies.items()):
217219
sid = f"Source {i+1}/{len(source_paths)}: {source}"
218220
print(f"Downscaling {i+1}/{len(anomalies)}: {source}")
@@ -222,19 +224,23 @@ def generate_scenario_daily_main( # noqa: PLR0912
222224
for j, (res, (count, v_anomaly)) in enumerate(source_anomalies.items()):
223225
res_id = f"{sid}, Resolution {j} / {len(source_anomalies)}: {res}"
224226
print(f"Downscaling {res_id}")
225-
227+
226228
if source_ensemble_anomaly.nbytes:
227-
source_ensemble_anomaly += utils.interpolate_to_target_latlon(v_anomaly, method="linear")
229+
source_ensemble_anomaly += utils.interpolate_to_target_latlon(
230+
v_anomaly, method="linear"
231+
)
228232
else:
229-
source_ensemble_anomaly = utils.interpolate_to_target_latlon(v_anomaly, method="linear")
233+
source_ensemble_anomaly = utils.interpolate_to_target_latlon(
234+
v_anomaly, method="linear"
235+
)
230236
total_count += count
231237
source_ensemble_anomaly /= total_count
232238

233239
if ensemble_anomaly.nbytes:
234240
ensemble_anomaly += source_ensemble_anomaly
235241
else:
236242
ensemble_anomaly = source_ensemble_anomaly
237-
243+
238244
ensemble_anomaly /= len(anomalies)
239245

240246
print("Computing scenario data")
@@ -277,7 +283,7 @@ def generate_scenario_daily(
277283
target_variable: str,
278284
cmip6_experiment: str,
279285
queue: str,
280-
overwrite: bool, # noqa: FBT001
286+
overwrite: bool,
281287
) -> None:
282288
cd_data = ClimateDownscaleData(output_dir)
283289

src/climate_data/generate/scenario_inclusion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ def generate_scenario_inclusion_main(
5757
inclusion_df["include"] = inclusion_df.valid_scenarios == 5 # noqa: PLR2004
5858
inclusion_df = (
5959
inclusion_df.loc[inclusion_df.include]
60-
.set_index(['source', 'variant', 'variable']).include
61-
.unstack()
62-
.fillna(False)
60+
.set_index(["source", "variant", "variable"])
61+
.include.unstack()
62+
.fillna(value=False)
6363
)
6464

6565
cd_data.save_scenario_metadata(meta_df)
Binary file not shown.

0 commit comments

Comments
 (0)