Skip to content

Commit f1de4e4

Browse files
author
Marcon Louie Fikingas
committed
added flood data class and root
1 parent 821704f commit f1de4e4

File tree

2 files changed

+270
-1
lines changed

2 files changed

+270
-1
lines changed

src/climate_data/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
MODEL_ROOT = Path("/mnt/share/erf/climate_downscale/")
1717
# Aggregation working directory
1818
AGGREGATE_ROOT = RRA_ROOT / "climate-aggregates"
19-
19+
# Floodinf Directory
20+
FLOOD_ROOT = Path("/mnt/team/rapidresponse/pub/flooding")
2021

2122
######################
2223
# Pipeline variables #

src/climate_data/data.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,274 @@ def load_results(
982982
return pd.read_parquet(path)
983983

984984

985+
class FloodingData:
986+
"""Class for managing the flooding data used in the project."""
987+
988+
def __init__(
989+
self,
990+
root: str | Path = cdc.FLOOD_ROOT,
991+
*,
992+
read_only: bool = False,
993+
) -> None:
994+
self._root = Path(root)
995+
self._credentials_root = self._root / "credentials"
996+
self._read_only = read_only
997+
if not read_only:
998+
self._create_flooding_root()
999+
1000+
def _create_flooding_root(self) -> None:
1001+
mkdir(self.root, exist_ok=True)
1002+
mkdir(self.credentials_root, exist_ok=True)
1003+
1004+
mkdir(self.raw_data, exist_ok=True)
1005+
mkdir(self.processed_data, exist_ok=True)
1006+
mkdir(self.downscale_model, exist_ok=True)
1007+
mkdir(self.predictors, exist_ok=True)
1008+
mkdir(self.training_data, exist_ok=True)
1009+
1010+
mkdir(self.results, exist_ok=True)
1011+
mkdir(self.results_metadata, exist_ok=True)
1012+
mkdir(self.daily_results, exist_ok=True)
1013+
mkdir(self.raw_daily_results, exist_ok=True)
1014+
mkdir(self.annual_results, exist_ok=True)
1015+
mkdir(self.raw_annual_results, exist_ok=True)
1016+
1017+
@property
1018+
def root(self) -> Path:
1019+
return self._root
1020+
1021+
@property
1022+
def credentials_root(self) -> Path:
1023+
return self._credentials_root
1024+
1025+
###########
1026+
# Results #
1027+
###########
1028+
1029+
@property
1030+
def results(self) -> Path:
1031+
return self.root / "results"
1032+
1033+
@property
1034+
def results_metadata(self) -> Path:
1035+
return self.results / "metadata"
1036+
1037+
def save_scenario_metadata(self, df: pd.DataFrame) -> None:
1038+
if self._read_only:
1039+
msg = "Cannot save scenario metadata to read-only data"
1040+
raise ValueError(msg)
1041+
path = self.results_metadata / "scenario_metadata.parquet"
1042+
save_parquet(df, path)
1043+
1044+
def load_scenario_metadata(self) -> pd.DataFrame:
1045+
path = self.results_metadata / "scenario_metadata.parquet"
1046+
return pd.read_parquet(path)
1047+
1048+
def save_scenario_inclusion_metadata(self, df: pd.DataFrame) -> None:
1049+
if self._read_only:
1050+
msg = "Cannot save scenario inclusion metadata to read-only data"
1051+
raise ValueError(msg)
1052+
# Need to save to our scripts directory for doc building
1053+
scripts_root = Path(__file__).parent.parent.parent / "scripts"
1054+
for root_dir in [self.results_metadata, scripts_root]:
1055+
path = root_dir / "scenario_inclusion_metadata.parquet"
1056+
save_parquet(df, path)
1057+
1058+
def load_scenario_inclusion_metadata(self) -> pd.DataFrame:
1059+
path = self.results_metadata / "scenario_inclusion_metadata.parquet"
1060+
return pd.read_parquet(path)
1061+
1062+
@property
1063+
def daily_results(self) -> Path:
1064+
return self.results / "daily"
1065+
1066+
@property
1067+
def raw_daily_results(self) -> Path:
1068+
return self.daily_results / "raw"
1069+
1070+
def raw_daily_results_path(
1071+
self,
1072+
scenario: str,
1073+
variable: str,
1074+
year: int | str,
1075+
gcm_member: str,
1076+
) -> Path:
1077+
return self.raw_daily_results / scenario / variable / f"{year}_{gcm_member}.nc"
1078+
1079+
def save_raw_daily_results(
1080+
self,
1081+
results_ds: xr.Dataset,
1082+
scenario: str,
1083+
variable: str,
1084+
year: int | str,
1085+
gcm_member: str,
1086+
encoding_kwargs: dict[str, Any],
1087+
) -> None:
1088+
if self._read_only:
1089+
msg = "Cannot save raw daily results to read-only data"
1090+
raise ValueError(msg)
1091+
path = self.raw_daily_results_path(scenario, variable, year, gcm_member)
1092+
mkdir(path.parent, exist_ok=True, parents=True)
1093+
save_xarray(results_ds, path, encoding_kwargs)
1094+
1095+
def daily_results_path(
1096+
self,
1097+
scenario: str,
1098+
variable: str,
1099+
year: int | str,
1100+
) -> Path:
1101+
return self.daily_results / scenario / variable / f"{year}.nc"
1102+
1103+
def save_daily_results(
1104+
self,
1105+
results_ds: xr.Dataset,
1106+
scenario: str,
1107+
variable: str,
1108+
year: int | str,
1109+
encoding_kwargs: dict[str, Any],
1110+
) -> None:
1111+
if self._read_only:
1112+
msg = "Cannot save daily results to read-only data"
1113+
raise ValueError(msg)
1114+
path = self.daily_results_path(scenario, variable, year)
1115+
mkdir(path.parent, exist_ok=True, parents=True)
1116+
save_xarray(results_ds, path, encoding_kwargs)
1117+
1118+
def load_daily_results(
1119+
self,
1120+
scenario: str,
1121+
variable: str,
1122+
year: int | str,
1123+
) -> xr.Dataset:
1124+
results_path = self.daily_results_path(scenario, variable, year)
1125+
return xr.open_dataset(results_path)
1126+
1127+
@property
1128+
def annual_results(self) -> Path:
1129+
return self.results / "annual"
1130+
1131+
@property
1132+
def raw_annual_results(self) -> Path:
1133+
return self.annual_results / "raw"
1134+
1135+
def raw_annual_results_path(
1136+
self,
1137+
scenario: str,
1138+
variable: str,
1139+
year: int | str,
1140+
gcm_member: str,
1141+
) -> Path:
1142+
return self.raw_annual_results / scenario / variable / f"{year}_{gcm_member}.nc"
1143+
1144+
def save_raw_annual_results(
1145+
self,
1146+
results_ds: xr.Dataset,
1147+
scenario: str,
1148+
variable: str,
1149+
year: int | str,
1150+
gcm_member: str,
1151+
encoding_kwargs: dict[str, Any],
1152+
) -> None:
1153+
if self._read_only:
1154+
msg = "Cannot save raw annual results to read-only data"
1155+
raise ValueError(msg)
1156+
path = self.raw_annual_results_path(scenario, variable, year, gcm_member)
1157+
mkdir(path.parent, exist_ok=True, parents=True)
1158+
save_xarray(results_ds, path, encoding_kwargs)
1159+
1160+
@property
1161+
def compiled_annual_results(self) -> Path:
1162+
return self.raw_annual_results / "compiled"
1163+
1164+
def compiled_annual_results_path(
1165+
self,
1166+
scenario: str,
1167+
variable: str,
1168+
gcm_member: str,
1169+
) -> Path:
1170+
return self.compiled_annual_results / scenario / variable / f"{gcm_member}.nc"
1171+
1172+
def save_compiled_annual_results(
1173+
self,
1174+
results_ds: xr.Dataset,
1175+
scenario: str,
1176+
variable: str,
1177+
gcm_member: str,
1178+
) -> None:
1179+
if self._read_only:
1180+
msg = "Cannot save compiled annual results to read-only data"
1181+
raise ValueError(msg)
1182+
path = self.compiled_annual_results_path(scenario, variable, gcm_member)
1183+
mkdir(path.parent, exist_ok=True, parents=True)
1184+
touch(path, clobber=True)
1185+
results_ds.to_netcdf(path)
1186+
1187+
def annual_results_path(
1188+
self,
1189+
scenario: str,
1190+
variable: str,
1191+
draw: int | str,
1192+
) -> Path:
1193+
return self.annual_results / scenario / variable / f"{draw:0>3}.nc"
1194+
1195+
def link_annual_draw(
1196+
self,
1197+
draw: int | str,
1198+
scenario: str,
1199+
variable: str,
1200+
gcm_member: str,
1201+
) -> None:
1202+
if self._read_only:
1203+
msg = "Cannot link annual draw to read-only data"
1204+
raise ValueError(msg)
1205+
source_path = self.compiled_annual_results_path(scenario, variable, gcm_member)
1206+
dest_path = self.annual_results_path(scenario, variable, draw)
1207+
mkdir(dest_path.parent, exist_ok=True, parents=True)
1208+
if dest_path.exists():
1209+
dest_path.unlink()
1210+
dest_path.symlink_to(source_path)
1211+
1212+
def draw_results_path(self, scenario: str, measure: str, draw: str) -> Path:
1213+
"""Get the path to annual results for a specific scenario, measure, and draw.
1214+
1215+
Parameters
1216+
----------
1217+
scenario
1218+
The climate scenario (e.g. "ssp126")
1219+
measure
1220+
The climate measure (e.g. "mean_temperature")
1221+
draw
1222+
The draw of the climate data to load (e.g. "000")
1223+
1224+
Returns
1225+
-------
1226+
Path
1227+
The path to the results file
1228+
"""
1229+
return self.annual_results / scenario / measure / f"{draw}.nc"
1230+
1231+
def load_draw_results(self, scenario: str, measure: str, draw: str) -> xr.Dataset:
1232+
"""Load annual climate results for a specific scenario, measure, and draw.
1233+
1234+
Parameters
1235+
----------
1236+
scenario
1237+
The climate scenario (e.g. "ssp126")
1238+
measure
1239+
The climate measure (e.g. "mean_temperature")
1240+
draw
1241+
The draw of the climate data to load (e.g. "000")
1242+
1243+
Returns
1244+
-------
1245+
xr.Dataset
1246+
The climate data in xarray format
1247+
"""
1248+
path = self.annual_results_path(scenario, measure, draw)
1249+
ds = xr.open_dataset(path, decode_coords="all")
1250+
ds = ds.rio.write_crs("EPSG:4326")
1251+
return ds
1252+
9851253
def save_xarray(
9861254
ds: xr.Dataset,
9871255
output_path: str | Path,

0 commit comments

Comments
 (0)