|
28 | 28 | 108, |
29 | 29 | 147, |
30 | 30 | 156, |
31 | | - 179, |
32 | | - 196, |
33 | | - 225, |
34 | | - 234, |
35 | | - 241, |
36 | | - 247, |
37 | | - 251, |
38 | | - 258, |
39 | | - 265, |
40 | | - 270, |
41 | | - 286, |
42 | | - 336, |
43 | | - 350, |
| 31 | + 182, |
| 32 | + 199, |
| 33 | + 228, |
| 34 | + 237, |
| 35 | + 244, |
| 36 | + 250, |
| 37 | + 254, |
| 38 | + 261, |
| 39 | + 268, |
| 40 | + 273, |
| 41 | + 289, |
| 42 | + 339, |
| 43 | + 353, |
44 | 44 | ], |
45 | 45 | [ |
46 | 46 | 72, |
47 | 47 | 99, |
48 | 48 | 104, |
49 | 49 | 142, |
50 | 50 | 152, |
51 | | - 175, |
52 | | - 192, |
53 | | - 221, |
54 | | - 230, |
55 | | - 236, |
56 | | - 243, |
57 | | - 247, |
58 | | - 254, |
59 | | - 261, |
60 | | - 266, |
61 | | - 281, |
62 | | - 332, |
63 | | - 346, |
64 | | - 352, |
| 51 | + 178, |
| 52 | + 195, |
| 53 | + 224, |
| 54 | + 233, |
| 55 | + 239, |
| 56 | + 246, |
| 57 | + 250, |
| 58 | + 257, |
| 59 | + 264, |
| 60 | + 269, |
| 61 | + 284, |
| 62 | + 335, |
| 63 | + 349, |
| 64 | + 355, |
65 | 65 | ], |
66 | 66 | ): |
67 | 67 | dfs.append(pd.read_excel(local_source, skiprows=first - 2, nrows=last - first + 1)) |
|
88 | 88 | if query["LAT"].iloc[0] < 60: |
89 | 89 | df.loc[df["Site Name"] == "Andryushkino", "LAT"] += 60.0 |
90 | 90 |
|
91 | | -# Cleanup the string columns |
92 | | -for col in ["Site Code", "Site Name", "Method"]: |
93 | | - df[col] = df[col].astype(str).str.strip() |
94 | | - |
95 | 91 | # Cleanup the data columns |
96 | 92 | years = [c for c in df.columns if isinstance(c, int)] |
97 | 93 | for year in years: |
|
103 | 99 | col = col.replace("", np.nan) |
104 | 100 | df[year] = col.astype(float) |
105 | 101 |
|
106 | | -# Setup target grid and only keep cells where there is some data |
107 | | -GRID_RES = 1.0 |
108 | | -lat = np.linspace(-90, 90, int(round(180.0 / GRID_RES)) + 1) |
109 | | -lon = np.linspace(-180, 180, int(round(360.0 / GRID_RES)) + 1) |
110 | | -df.rename(columns={y: str(y) for y in years}).to_parquet("df.parquet") |
111 | | -df = ( |
112 | | - df.groupby( |
113 | | - [ |
114 | | - pd.cut(df["LAT"], lat), |
115 | | - pd.cut(df["LONG"], lon), |
116 | | - ] |
117 | | - ) |
118 | | - .median(numeric_only=True) |
119 | | - .drop(columns=["LAT", "LONG"]) |
120 | | - .reset_index() |
121 | | -) |
122 | 102 | df = df[~df[years].isna().all(axis=1)].reset_index(drop=True) |
123 | 103 |
|
124 | | -# The pandas cuts leaves these dimensions as intervals, we want the midpoint. |
125 | | -for col in ["LAT", "LONG"]: |
126 | | - df[col] = df[col].apply(lambda x: x.mid) |
127 | | - |
128 | 104 | tb = np.array( |
129 | 105 | [ |
130 | 106 | [cf.DatetimeNoLeap(y, 1, 1) for y in years], |
|
136 | 112 | ds = xr.DataArray( |
137 | 113 | df[years].to_numpy().T, |
138 | 114 | coords={"time": t}, |
139 | | - dims=("time", "data"), |
| 115 | + dims=("time", "sites"), |
140 | 116 | attrs={"long_name": "Average thaw depth at end-of-season", "units": "cm"}, |
141 | 117 | ).to_dataset(name="alt") |
| 118 | +ds["site_code"] = xr.DataArray(df["Site Code"].to_numpy(), dims=("sites")) |
| 119 | +ds["site_code"].attrs = {"long_name":"CALM site code"} |
| 120 | +ds["site_name"] = xr.DataArray(df["Site Name"].to_numpy(), dims=("sites")) |
| 121 | +ds["site_name"].attrs = {"long_name":"CALM site name"} |
142 | 122 | ds["time_bnds"] = xr.DataArray(tb, dims=("time", "nb")) |
143 | | -ds["lat"] = xr.DataArray(df["LAT"].to_numpy(), dims=("data")) |
144 | | -ds["lon"] = xr.DataArray(df["LONG"].to_numpy(), dims=("data")) |
| 123 | +ds["lat"] = xr.DataArray(df["LAT"].to_numpy(), dims=("sites")) |
| 124 | +ds["lon"] = xr.DataArray(df["LONG"].to_numpy(), dims=("sites")) |
145 | 125 | ds.attrs = { |
146 | 126 | "title": "CALM: Circumpolar Active Layer Monitoring Network", |
147 | 127 | "versions": "2022", |
|
0 commit comments