Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
671cf92
NUK_P tx format added (#310)
PennyHow Oct 24, 2024
8610065
update rh_*_cor to rh_'_wrt_ice_or_water
BaptisteVandecrux Nov 8, 2024
ede895d
fixing tests
BaptisteVandecrux Nov 8, 2024
3bce759
fixing adjustHumidity
BaptisteVandecrux Nov 8, 2024
c39844b
fixed resample
BaptisteVandecrux Nov 8, 2024
de45a9c
fixed unintended removal of rh_*_ice_or_water from the daily and mont…
BaptisteVandecrux Nov 12, 2024
5d312ac
Fixed format test
BaptisteVandecrux Nov 11, 2024
a77b725
ORO added to gps_lon station exemption (#314)
PennyHow Nov 12, 2024
125da67
adjusting altitude filter
BaptisteVandecrux Nov 12, 2024
087e485
version number update
BaptisteVandecrux Nov 19, 2024
e87a363
Update conf.py
BaptisteVandecrux Nov 19, 2024
0a9fb56
adding compression
BaptisteVandecrux Sep 11, 2024
13b7e51
increasing the time constrain on the reading of dataset
BaptisteVandecrux Sep 11, 2024
55e5cba
Added NetCDF zip compr to joined l3 products
ladsmund Nov 21, 2024
8a22757
Removed unused write methods from the AWS class
ladsmund Nov 21, 2024
5365355
fixup join_l3
ladsmund Nov 21, 2024
595c149
Reverted time constrain from test
ladsmund Nov 21, 2024
89665a5
Updated e2e test to teste zlib compression
ladsmund Nov 21, 2024
0668c97
Merge branch 'develop' of https://github.com/GEUS-Glaciology-and-Clim…
BaptisteVandecrux Nov 24, 2024
d28748d
Update variable_aliases_GC-Net.csv
BaptisteVandecrux Jan 31, 2025
646c874
upload-artefact version bumped
PennyHow Jan 31, 2025
2a1cd13
update L3 data merging + z_ice_surf postprocessing
BaptisteVandecrux Feb 7, 2025
1b0b619
typo fix
BaptisteVandecrux Feb 7, 2025
3d4004b
Update join_l3.py
BaptisteVandecrux Feb 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/process_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ jobs:
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/ --data_issues_path $GITHUB_WORKSPACE/data_issues
done
- name: Upload test output
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: result
path: out
retention-days: 21
118 changes: 62 additions & 56 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,63 +194,11 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):


if ds.attrs['site_type'] == 'ablation':
# Calculate rolling minimum for ice surface height and snow height
ts_interpolated = np.minimum(
xr.where(ds.z_ice_surf.notnull(),
ds.z_ice_surf,ds.z_surf_combined),
ds.z_surf_combined).to_series().resample('h').interpolate(limit=72)

if len(ts_interpolated)>24*7:
# Apply the rolling window with median calculation
z_ice_surf = (ts_interpolated
.rolling('14D', center=True, min_periods=1)
.median())
# Overprint the first and last 7 days with interpolated values
# because of edge effect of rolling windows
z_ice_surf.iloc[:24*7] = (ts_interpolated.iloc[:24*7]
.rolling('1D', center=True, min_periods=1)
.median().values)
z_ice_surf.iloc[-24*7:] = (ts_interpolated.iloc[-24*7:]
.rolling('1D', center=True, min_periods=1)
.median().values)
else:
z_ice_surf = (ts_interpolated
.rolling('1D', center=True, min_periods=1)
.median())

z_ice_surf = z_ice_surf.loc[ds.time]
# here we make sure that the periods where both z_stake and z_pt are
# missing are also missing in z_ice_surf
msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull()
z_ice_surf = z_ice_surf.where(msk)

# taking running minimum to get ice
z_ice_surf = z_ice_surf.cummin()

# filling gaps only if they are less than a year long and if values on both
# sides are less than 0.01 m appart

# Forward and backward fill to identify bounds of gaps
df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')

# Identify gaps and their start and end dates
gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
gaps['prev_value'] = df_filled.shift(1)
gaps['next_value'] = df_filled.shift(-1)
gaps['gap_start'] = gaps.index.to_series().shift(1)
gaps['gap_end'] = gaps.index.to_series().shift(-1)
gaps['gap_duration'] = (gaps['gap_end'] - gaps['gap_start']).dt.days
gaps['value_diff'] = (gaps['next_value'] - gaps['prev_value']).abs()

# Determine which gaps to fill
mask = (gaps['gap_duration'] < 365) & (gaps['value_diff'] < 0.01)
gaps_to_fill = gaps[mask].index

# Fill gaps in the original Series
z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]

# bringing the variable into the dataset
ds['z_ice_surf'] = z_ice_surf
ds['z_ice_surf'] = post_processing_z_ice_surf(ds.z_ice_surf,
ds.z_surf_combined,
ds.z_surf_2_adj)

ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
Expand All @@ -274,9 +222,67 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
station_config)
for var in df_out.columns:
ds[var] = ('time', df_out[var].values)

return ds

def post_processing_z_ice_surf(z_ice_surf, z_surf_combined, z_surf_2_adj):
# here we make sure that the periods where both z_stake and z_pt are
# missing are also missing in z_ice_surf
msk = z_ice_surf.notnull() | z_surf_2_adj.notnull()

# Calculate rolling minimum for ice surface height and snow height
ts_interpolated = np.minimum(
xr.where(z_ice_surf.notnull(), z_ice_surf, z_surf_combined),
z_surf_combined).to_series().resample('h').interpolate(limit=72)

if len(ts_interpolated)>24*7:
# Apply the rolling window with median calculation
z_ice_surf = (ts_interpolated
.rolling('14D', center=True, min_periods=1)
.median())
# Overprint the first and last 7 days with interpolated values
# because of edge effect of rolling windows
z_ice_surf.iloc[:24*7] = (ts_interpolated.iloc[:24*7]
.rolling('1D', center=True, min_periods=1)
.median().values)
z_ice_surf.iloc[-24*7:] = (ts_interpolated.iloc[-24*7:]
.rolling('1D', center=True, min_periods=1)
.median().values)
else:
z_ice_surf = (ts_interpolated
.rolling('1D', center=True, min_periods=1)
.median())

z_ice_surf = z_ice_surf.loc[z_surf_combined.time]
# here we make sure that the periods where both z_stake and z_pt are
# missing are also missing in z_ice_surf
z_ice_surf = z_ice_surf.where(msk)

# taking running minimum to get ice
z_ice_surf = z_ice_surf.cummin()

# filling gaps only if they are less than a year long and if values on both
# sides are less than 0.01 m appart

# Forward and backward fill to identify bounds of gaps
df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill')

# Identify gaps and their start and end dates
gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index)
gaps['prev_value'] = df_filled.shift(1)
gaps['next_value'] = df_filled.shift(-1)
gaps['gap_start'] = gaps.index.to_series().shift(1)
gaps['gap_end'] = gaps.index.to_series().shift(-1)
gaps['gap_duration'] = (gaps['gap_end'] - gaps['gap_start']).dt.days
gaps['value_diff'] = (gaps['next_value'] - gaps['prev_value']).abs()

# Determine which gaps to fill
mask = (gaps['gap_duration'] < 365) & (gaps['value_diff'] < 0.01)
gaps_to_fill = gaps[mask].index

# Fill gaps in the original Series
z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill]
return z_ice_surf

def combine_surface_height(df, site_type, threshold_ablation = -0.0002):
'''Combines the data from three sensor: the two sonic rangers and the
pressure transducer, to recreate the surface height, the ice surface height
Expand Down
120 changes: 93 additions & 27 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pypromice.resources
from pypromice.process.write import prepare_and_write
from pypromice.process.L2toL3 import post_processing_z_ice_surf
import numpy as np
import pandas as pd
import xarray as xr
Expand Down Expand Up @@ -308,7 +309,7 @@ def align_surface_heights(data_series_new, data_series_old):
return data_series_new


def build_station_list(config_folder: str, target_station_site: str) -> list:
def build_station_list(config_folder: str, target_station_site: str, folder_l3: str) -> list:
"""
Get a list of unique station information dictionaries for a given station site.

Expand All @@ -322,8 +323,10 @@ def build_station_list(config_folder: str, target_station_site: str) -> list:
Returns
-------
list
A list of dictionaries containing station information that have the specified station site.
A list of dictionaries containing station information that have the specified station site,
structured to handle overlapping data periods.
"""

station_info_list = [] # Initialize an empty list to store station information

found_as_station = False
Expand Down Expand Up @@ -363,12 +366,70 @@ def build_station_list(config_folder: str, target_station_site: str) -> list:
% target_station_site
)

return station_info_list
# first listing of the AWS data blocks
merged_blocks = []
for station_info in station_info_list:
stid = station_info["stid"]
filepath = os.path.join(folder_l3, f"{stid}/{stid}_hour.nc")
if not os.path.isfile(filepath):
continue

with xr.open_dataset(filepath) as ds:
ds.load()
first_valid_tu = ds["t_u"].dropna("time").time.min().values
last_valid_tu = ds["t_u"].dropna("time").time.max().values
first_valid_dsr = ds["dsr"].dropna("time").time.min().values
last_valid_dsr = ds["dsr"].dropna("time").time.max().values

first_valid = min(first_valid_tu, first_valid_dsr)
last_valid = max(last_valid_tu, last_valid_dsr)

merged_blocks.append({
"stid": stid,
"start_time": first_valid,
"end_time": last_valid,
**station_info # Unpack all keys/values from station_info
})


merged_blocks = sorted(merged_blocks, key=lambda x: x["start_time"])

# now going through the AWS data blocks again and splitting if necessary
# older stations data so that it can be appended before and after a temporary
# newer station.
final_blocks = []
prev_block = None

for block in merged_blocks:
# if a more recent bloc ends before the previous block stopped
if prev_block and block["end_time"] <= prev_block["end_time"]:
print('updating', block['stid'],block["start_time"],block["end_time"])

final_blocks.append({
"stid": block["stid"],
"start_time": block["start_time"],
"end_time": min(block["end_time"], prev_block["end_time"]),
**{k: v for k, v in block.items() if k not in ["start_time", "end_time"]}
})
# then we make a new block out of the previous bloc with the most recent data
prev_block = {
"stid": prev_block["stid"],
"start_time": block["end_time"],
"end_time": prev_block["end_time"],
**{k: v for k, v in prev_block.items() if k not in ["start_time", "end_time"]}
}
final_blocks.append(prev_block)
else:
print(block['stid'],block["start_time"],block["end_time"])
final_blocks.append(block)
prev_block = block

return final_blocks


def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, metadata):
# Get the list of station information dictionaries associated with the given site
list_station_info = build_station_list(config_folder, site)
list_station_info = build_station_list(config_folder, site, folder_l3)

# Read the datasets and store them into a list along with their latest timestamp and station info
list_station_data = []
Expand All @@ -381,15 +442,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
filepath = os.path.join(folder_gcnet, stid + ".csv")
isNead = True
if not os.path.isfile(filepath):
logger.error(
"\n***\n"
+ stid
+ " was listed as station but could not be found in "
+ folder_l3
+ " nor "
+ folder_gcnet
+ "\n***"
)
logger.error(f"\n***\n{stid} was listed as station but could not be found in {folder_l3} nor {folder_gcnet}\n***")
continue

l3, _ = loadArr(filepath, isNead)
Expand All @@ -400,6 +453,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
logger.info("Skipping %s from %s" % (specific_vars_to_drop, stid))
l3 = l3.drop_vars([var for var in specific_vars_to_drop if var in l3])

l3 = l3.sel(time=slice(station_info['start_time'], station_info['end_time']))
list_station_data.append((l3, station_info))

# Sort the list in reverse chronological order so that we start with the latest data
Expand Down Expand Up @@ -428,6 +482,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
.item()
)

logger.debug('adding',stid,st_attrs[stid]["first_timestamp"] , st_attrs[stid]["last_timestamp"] )
# then stripping attributes
attrs_list = list(l3.attrs.keys())
for k in attrs_list:
Expand Down Expand Up @@ -482,44 +537,55 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
l3.z_surf_combined.to_series(),
),
)
if "z_ice_surf" in l3_merged.keys() and "z_ice_surf" in l3.keys():
if (
l3_merged.z_ice_surf.notnull().any()
and l3.z_ice_surf.notnull().any()
):
l3_merged["z_ice_surf"] = (
"time",
align_surface_heights(
l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
),
)

if st_attrs[stid]['site_type'] == 'accumulation':
if "z_ice_surf" in l3_merged.keys() and "z_ice_surf" in l3.keys():
if (
l3_merged.z_ice_surf.notnull().any()
and l3.z_ice_surf.notnull().any()
):
l3_merged["z_ice_surf"] = (
"time",
align_surface_heights(
l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
),
)

# saves attributes
attrs = l3_merged.attrs
# merging by time block
l3_merged = xr.concat(
(
l3.sel(
time=slice(l3.time.isel(time=0), l3_merged.time.isel(time=0))
time=slice(l3.time.isel(time=0),
l3_merged.time.isel(time=0)- pd.Timedelta(minutes=5),
# subtracting 5 minutes to make sure timestamps are not added once from l3 and once from l3_merge
)
),
l3_merged,
),
dim="time",
)

# restauring attributes
l3_merged.attrs = attrs

# Assign site id
if not l3_merged:
logger.error("No level 3 station data file found for " + site)
return None, sorted_list_station_data

l3_merged.attrs["site_id"] = site
l3_merged.attrs["stations"] = " ".join(sorted_stids)
l3_merged.attrs["level"] = "L3"
first_station_name = list(l3_merged.attrs["stations_attributes"].keys())[0]

l3_merged.attrs["project"] = sorted_list_station_data[0][1]["project"]
l3_merged.attrs["location_type"] = sorted_list_station_data[0][1]["location_type"]

if l3_merged.attrs["stations_attributes"][first_station_name]['site_type'] == 'ablation':
l3_merged["z_ice_surf"] = post_processing_z_ice_surf(l3_merged["z_ice_surf"],
l3_merged["z_surf_combined"],
l3_merged["z_ice_surf"])
site_source = dict(
site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
Expand Down
8 changes: 4 additions & 4 deletions src/pypromice/resources/variable_aliases_GC-Net.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ wspd_u,VW2
wspd_l,VW1
wdir_u,DW2
wdir_l,DW1
dsr,
dsr_cor,ISWR
usr,
usr_cor,OSWR
dsr,ISWR
dsr_cor,
usr,OSWR
usr_cor,
albedo,Alb
dlr,
ulr,
Expand Down