Skip to content

Commit 438d882

Browse files
authored
Merge pull request #89 from jitendra-kumar/master
This commits updates processing of CALM dataset
2 parents d3703c1 + 6b3d5b4 commit 438d882

File tree

1 file changed

+34
-54
lines changed

1 file changed

+34
-54
lines changed

active_layer_thickness/CALM/convert.py

Lines changed: 34 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -28,40 +28,40 @@
2828
108,
2929
147,
3030
156,
31-
179,
32-
196,
33-
225,
34-
234,
35-
241,
36-
247,
37-
251,
38-
258,
39-
265,
40-
270,
41-
286,
42-
336,
43-
350,
31+
182,
32+
199,
33+
228,
34+
237,
35+
244,
36+
250,
37+
254,
38+
261,
39+
268,
40+
273,
41+
289,
42+
339,
43+
353,
4444
],
4545
[
4646
72,
4747
99,
4848
104,
4949
142,
5050
152,
51-
175,
52-
192,
53-
221,
54-
230,
55-
236,
56-
243,
57-
247,
58-
254,
59-
261,
60-
266,
61-
281,
62-
332,
63-
346,
64-
352,
51+
178,
52+
195,
53+
224,
54+
233,
55+
239,
56+
246,
57+
250,
58+
257,
59+
264,
60+
269,
61+
284,
62+
335,
63+
349,
64+
355,
6565
],
6666
):
6767
dfs.append(pd.read_excel(local_source, skiprows=first - 2, nrows=last - first + 1))
@@ -88,10 +88,6 @@
8888
if query["LAT"].iloc[0] < 60:
8989
df.loc[df["Site Name"] == "Andryushkino", "LAT"] += 60.0
9090

91-
# Cleanup the string columns
92-
for col in ["Site Code", "Site Name", "Method"]:
93-
df[col] = df[col].astype(str).str.strip()
94-
9591
# Cleanup the data columns
9692
years = [c for c in df.columns if isinstance(c, int)]
9793
for year in years:
@@ -103,28 +99,8 @@
10399
col = col.replace("", np.nan)
104100
df[year] = col.astype(float)
105101

106-
# Setup target grid and only keep cells where there is some data
107-
GRID_RES = 1.0
108-
lat = np.linspace(-90, 90, int(round(180.0 / GRID_RES)) + 1)
109-
lon = np.linspace(-180, 180, int(round(360.0 / GRID_RES)) + 1)
110-
df.rename(columns={y: str(y) for y in years}).to_parquet("df.parquet")
111-
df = (
112-
df.groupby(
113-
[
114-
pd.cut(df["LAT"], lat),
115-
pd.cut(df["LONG"], lon),
116-
]
117-
)
118-
.median(numeric_only=True)
119-
.drop(columns=["LAT", "LONG"])
120-
.reset_index()
121-
)
122102
df = df[~df[years].isna().all(axis=1)].reset_index(drop=True)
123103

124-
# The pandas cuts leaves these dimensions as intervals, we want the midpoint.
125-
for col in ["LAT", "LONG"]:
126-
df[col] = df[col].apply(lambda x: x.mid)
127-
128104
tb = np.array(
129105
[
130106
[cf.DatetimeNoLeap(y, 1, 1) for y in years],
@@ -136,12 +112,16 @@
136112
ds = xr.DataArray(
137113
df[years].to_numpy().T,
138114
coords={"time": t},
139-
dims=("time", "data"),
115+
dims=("time", "sites"),
140116
attrs={"long_name": "Average thaw depth at end-of-season", "units": "cm"},
141117
).to_dataset(name="alt")
118+
ds["site_code"] = xr.DataArray(df["Site Code"].to_numpy(), dims=("sites"))
119+
ds["site_code"].attrs = {"long_name":"CALM site code"}
120+
ds["site_name"] = xr.DataArray(df["Site Name"].to_numpy(), dims=("sites"))
121+
ds["site_name"].attrs = {"long_name":"CALM site name"}
142122
ds["time_bnds"] = xr.DataArray(tb, dims=("time", "nb"))
143-
ds["lat"] = xr.DataArray(df["LAT"].to_numpy(), dims=("data"))
144-
ds["lon"] = xr.DataArray(df["LONG"].to_numpy(), dims=("data"))
123+
ds["lat"] = xr.DataArray(df["LAT"].to_numpy(), dims=("sites"))
124+
ds["lon"] = xr.DataArray(df["LONG"].to_numpy(), dims=("sites"))
145125
ds.attrs = {
146126
"title": "CALM: Circumpolar Active Layer Monitoring Network",
147127
"versions": "2022",

0 commit comments

Comments
 (0)