@@ -46,8 +46,12 @@ def annotate_granule(
4646 config_file = varinfo_config_file ,
4747 )
4848
49- if len (granule_varinfo .cf_config .metadata_overrides ):
50- # There are metadata overrides applicable to the granule's collection:
49+ if (
50+ len (granule_varinfo .cf_config .metadata_overrides )
51+ or granule_varinfo .cf_config .excluded_science_variables
52+ ):
53+ # There are metadata overrides or excluded variables
54+ # applicable to the granule's collection:
5155 amend_in_file_metadata (input_file_name , output_file_name , granule_varinfo )
5256 else :
5357 # There are no updates required, so copy the input file as-is:
@@ -60,10 +64,10 @@ def amend_in_file_metadata(
6064 """Update metadata attributes according to known rules.
6165
6266 First, identify the variables or groups needing to be updated, or variables
63- that need to be created. Next create any missing, attribute only, variables.
64- Update the metadata attributes of all variables listed in overrides, or
65- removing any attributes with an overriding value of None. Lastly, update
66- the `history` global attribute.
67+ that need to be created. Then, delete any variables that are configured to be
68+ excluded. Next create any missing, attribute only, variables. Update the metadata
69+ attributes of all variables listed in overrides, or removing any attributes with an
70+ overriding value of None. Lastly, update the `history` global attribute.
6771
6872 When opening the file as a DataTree, attempts to decode times, coordinates
6973 and other CF-Convention metadata are disabled, to allow updates to be made
@@ -74,7 +78,7 @@ def amend_in_file_metadata(
7478 items_to_update , variables_to_create = get_matching_groups_and_variables (
7579 granule_varinfo ,
7680 )
77-
81+ variables_to_delete = get_variables_to_delete ( granule_varinfo )
7882 with xr .open_datatree (
7983 input_file_name ,
8084 decode_times = False ,
@@ -83,7 +87,15 @@ def amend_in_file_metadata(
8387 concat_characters = True ,
8488 use_cftime = False ,
8589 mask_and_scale = False ,
90+ engine = 'h5netcdf' ,
8691 ) as datatree :
92+ # Delete the excluded variables from the datatree and remove them from
93+ # the set of items to update
94+ for variable in variables_to_delete :
95+ if variable in items_to_update :
96+ items_to_update .remove (variable )
97+ delete_variable (datatree , variable )
98+
8799 # Update all pre-existing variables or groups with metadata overrides including
88100 # dimension renaming where applicable.
89101 update_group_and_variable_attributes (datatree , items_to_update , granule_varinfo )
@@ -117,7 +129,7 @@ def amend_in_file_metadata(
117129 # whole `xarray.DataTree` in one operation. Making this write variables
118130 # and group separately reduces the memory usage, but makes the
119131 # operation slower. (See Harmony SMAP L2 Gridder implementation)
120- datatree .to_netcdf (output_file_name )
132+ datatree .to_netcdf (output_file_name , engine = 'h5netcdf' )
121133
122134
123135def get_matching_groups_and_variables (
@@ -510,3 +522,26 @@ def get_referenced_variables(
510522 )
511523
512524 return referenced_variables
525+
526+
527+ def get_variables_to_delete (
528+ var_info : VarInfoFromNetCDF4 ,
529+ ) -> list [str ]:
530+ """Returns a list of variables to delete identified by VarInfo configuration."""
531+ var_list = var_info .get_all_variables ()
532+ return [var for var in var_list if is_excluded_science_variable (var_info , var )]
533+
534+
535+ def is_excluded_science_variable (var_info : VarInfoFromNetCDF4 , var ) -> bool :
536+ """Returns True if variable is explicitly excluded by VarInfo configuration."""
537+ exclusions_pattern = re .compile (
538+ '|' .join (var_info .cf_config .excluded_science_variables )
539+ )
540+ return var_info .variable_is_excluded (var , exclusions_pattern )
541+
542+
543+ def delete_variable (datatree , full_variable_path : str ) -> None :
544+ """Delete a variable from the DataTree."""
545+ parent_group , variable_name = full_variable_path .rsplit ('/' , 1 )
546+ node = datatree [parent_group ] if parent_group else datatree
547+ del node [variable_name ]
0 commit comments