Skip to content

Commit 1d9084f

Browse files
authored
DAS-2373: Adds support to drop unprocessable variables. (#38)
1 parent 58bd8c7 commit 1d9084f

9 files changed

Lines changed: 326 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ The Harmony Regridding Service follows semantic versioning. All notable changes
44
to this project will be documented in this file. The format is based on [Keep a
55
Changelog](http://keepachangelog.com/en/1.0.0/).
66

7+
## [v1.6.0] - 2025-09-12
8+
9+
### Changed
10+
11+
- Unprocessable variables are excluded from output. These include any string
12+
variable or any variable configured for exclusion via earthata-varinfo's
13+
ExcludedScienceVariables.
14+
715
## [v1.5.0] - 2025-08-11
816

917
### Changed
@@ -136,6 +144,8 @@ include updated documentation and files outlined by the
136144
For more information on internal releases prior to NASA open-source approval,
137145
see legacy-CHANGELOG.md.
138146

147+
[v1.6.0]: https://github.com/nasa/harmony-regridding-service/releases/tag/1.6.0
148+
[v1.5.0]: https://github.com/nasa/harmony-regridding-service/releases/tag/1.5.0
139149
[v1.4.0]: https://github.com/nasa/harmony-regridding-service/releases/tag/1.4.0
140150
[v1.3.0]: https://github.com/nasa/harmony-regridding-service/releases/tag/1.3.0
141151
[v1.2.0]: https://github.com/nasa/harmony-regridding-service/releases/tag/1.2.0

docker/service_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.5.0
1+
1.6.0

harmony_regridding_service/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,11 @@ class InvalidCRSResampling(RegridderException):
6464

6565
def __init__(self, message: str):
6666
super().__init__(message)
67+
68+
69+
class InvalidVariableRequest(RegridderException):
70+
"""Raised when a user requests an unprocessable variable."""
71+
72+
def __init__(self, bad_vars: set[str]):
73+
message = f'Request for unprocessable variable(s): {bad_vars}.'
74+
super().__init__(message)

harmony_regridding_service/regridding_service.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
)
1919
from harmony_regridding_service.exceptions import (
2020
InvalidCRSResampling,
21+
InvalidVariableRequest,
2122
)
2223
from harmony_regridding_service.file_io import (
2324
clone_variables,
@@ -33,12 +34,14 @@
3334
transfer_resampled_dimensions,
3435
unresampled_variables,
3536
)
37+
from harmony_regridding_service.var_utilitities import get_unprocessable_variables
3638

3739
logger = getLogger(__name__)
3840

39-
HRS_VARINFO_CONFIG_FILENAME = str(
40-
Path(Path(__file__).parent, 'config', 'HRS_varinfo_config.json')
41-
)
41+
42+
def varinfo_config_filename() -> str:
43+
"""Return a path to the varinfo config."""
44+
return str(Path(Path(__file__).parent, 'config', 'HRS_varinfo_config.json'))
4245

4346

4447
def regrid(
@@ -53,10 +56,12 @@ def regrid(
5356
logger.info(f'Format:\n {message.format}')
5457
logger.info(f'Source:\n {source}')
5558

59+
user_requested_variables = {f'/{v.name.lstrip("/")}' for v in source.variables}
60+
5661
var_info = VarInfoFromNetCDF4(
5762
input_filepath,
5863
short_name=source.shortName, # pyright: ignore[reportAttributeAccessIssue]
59-
config_file=HRS_VARINFO_CONFIG_FILENAME,
64+
config_file=varinfo_config_filename(),
6065
)
6166

6267
try:
@@ -81,12 +86,14 @@ def regrid(
8186

8287
vars_to_process = var_info.get_all_variables()
8388
unresampled_vars = unresampled_variables(var_info)
89+
8490
grid_mapping_variable_names = filter_grid_mappings_to_variables(
8591
input_grid_mappings(source_ds, vars_to_process)
8692
)
8793

88-
logger.info(f'dropping grid_mappings: {grid_mapping_variable_names}')
89-
vars_to_process -= grid_mapping_variable_names
94+
if grid_mapping_variable_names:
95+
logger.info(f'dropping grid_mappings: {grid_mapping_variable_names}')
96+
vars_to_process -= grid_mapping_variable_names
9097

9198
cloned_vars = clone_variables(
9299
source_ds, target_ds, unresampled_vars - grid_mapping_variable_names
@@ -100,8 +107,17 @@ def regrid(
100107
logger.info(f'processed dimension variables: {dimension_vars}')
101108
vars_to_process -= dimension_vars
102109

110+
unprocessable_variables = get_unprocessable_variables(var_info, vars_to_process)
111+
if unprocessable_variables:
112+
if unprocessable_variables.intersection(user_requested_variables):
113+
raise InvalidVariableRequest(
114+
unprocessable_variables.intersection(user_requested_variables)
115+
)
116+
logger.info(f'Dropping unprocessable variables: {unprocessable_variables}')
117+
vars_to_process -= unprocessable_variables
118+
103119
resampled_vars = resample_n_dimensional_variables(
104-
source_ds, target_ds, var_info, resampler_cache, set(vars_to_process)
120+
source_ds, target_ds, var_info, resampler_cache, vars_to_process
105121
)
106122
vars_to_process -= resampled_vars
107123
logger.info(f'resampled variables: {resampled_vars}')
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""Variable helper functions.
2+
3+
A collection of functions for getting information about variables, mostly with
4+
varinfo.
5+
6+
"""
7+
8+
import re
9+
10+
from varinfo import VarInfoFromNetCDF4
11+
12+
13+
def get_unprocessable_variables(
14+
var_info: VarInfoFromNetCDF4, var_list: set[str]
15+
) -> set[str]:
16+
"""Variables that can't be processed by Harmony Regridding Service.
17+
18+
Currently string variables are unprocessable because pyresample cannot
19+
handle them. All string variables are excluded.
20+
21+
Science variables that are excluded explicitly by varInfo are also removed
22+
"""
23+
string_vars = {var for var in var_list if is_string_variable(var_info, var)}
24+
excluded_vars = {
25+
var for var in var_list if is_excluded_science_variable(var_info, var)
26+
}
27+
28+
return string_vars | excluded_vars
29+
30+
31+
def is_excluded_science_variable(var_info: VarInfoFromNetCDF4, var) -> bool:
32+
"""Returns True if variable is explicitly excluded by VarInfo configuration."""
33+
exclusions_pattern = re.compile(
34+
'|'.join(var_info.cf_config.excluded_science_variables)
35+
)
36+
return var_info.variable_is_excluded(var, exclusions_pattern)
37+
38+
39+
def is_string_variable(var_info: VarInfoFromNetCDF4, var_name: str) -> bool:
40+
"""Returns True if variable is a string type."""
41+
return var_info.get_variable(var_name).data_type in ['str', 'bytes8']

pip_requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Open source packages available from PyPI
22
dask~=2025.2.0
3-
earthdata-varinfo~=3.1.0
4-
harmony-service-lib~=2.6.0
3+
earthdata-varinfo~=3.3.0
4+
harmony-service-lib~=2.7.1
55
netCDF4~=1.7.2
66
numpy~=2.2.4
77
pyproj~=3.7.1
88
pyresample~=1.34.0
9-
xarray~=2025.3.1
9+
xarray~=2025.9.0
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"Identification": "harmony_regridding_service_test_varinfo_config",
3+
"Version": 1,
4+
"CollectionShortNamePath": [ ],
5+
"Mission": {
6+
"SPL[34].+": "SMAP"
7+
},
8+
"ExcludedScienceVariables": [
9+
{
10+
"Applicability": {
11+
"Mission": "SMAP"
12+
},
13+
"VariablePattern": [
14+
"/.*altitude_dem.*"
15+
]
16+
}
17+
],
18+
"MetadataOverrides": []
19+
}

tests/unit/test_regridding_service.py

Lines changed: 150 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Tests the regridding service module."""
22

3+
import logging
34
from pathlib import Path
45
from unittest.mock import MagicMock, patch
56

@@ -8,6 +9,7 @@
89
from harmony_service_lib.message import Message as HarmonyMessage
910
from harmony_service_lib.message import Source as HarmonySource
1011

12+
from harmony_regridding_service.exceptions import InvalidVariableRequest
1113
from harmony_regridding_service.regridding_service import regrid
1214

1315
test_scale_extent = {
@@ -36,7 +38,7 @@ def test_regrid_projected_data_end_to_end(
3638
"""Test the full regrid process for projected input data."""
3739
input_filename = str(smap_projected_netcdf_file)
3840
output_filename = str(tmp_path / 'regridded_output.nc')
39-
logger_mock = MagicMock()
41+
logger = logging.getLogger()
4042

4143
# Define a target CRS [and optionally grid parameters]
4244
params = {
@@ -57,7 +59,7 @@ def test_regrid_projected_data_end_to_end(
5759
'harmony_regridding_service.regridding_service.generate_output_filename',
5860
return_value=output_filename,
5961
):
60-
result_filename = regrid(message, input_filename, source, logger_mock)
62+
result_filename = regrid(message, input_filename, source, logger)
6163

6264
assert result_filename == output_filename
6365
assert Path(output_filename).exists()
@@ -82,14 +84,14 @@ def test_regrid_projected_data_end_to_end(
8284
), description
8385

8486

85-
def test_regrid_smap_data_end_to_end(
87+
def test_regrid_smap_file(
8688
test_spl3ftp_ncfile,
8789
tmp_path,
8890
):
8991
"""Test the full regrid process for projected input data."""
9092
input_filename = str(test_spl3ftp_ncfile)
9193
output_filename = str(tmp_path / 'regridded_output.nc')
92-
logger_mock = MagicMock()
94+
logger = logging.getLogger()
9395

9496
# Define a target CRS [and optionally grid parameters]
9597
params = {
@@ -107,7 +109,7 @@ def test_regrid_smap_data_end_to_end(
107109
'harmony_regridding_service.regridding_service.generate_output_filename',
108110
return_value=output_filename,
109111
):
110-
result_filename = regrid(message, input_filename, source, logger_mock)
112+
result_filename = regrid(message, input_filename, source, logger)
111113

112114
assert result_filename == output_filename
113115
assert Path(output_filename).exists()
@@ -139,3 +141,146 @@ def test_regrid_smap_data_end_to_end(
139141
assert 'longitude' in dt[group], f'failed: {group}'
140142
assert 'latitude' in dt[group], f'failed: {group}'
141143
assert 'altitude_dem' in dt[group], f'failed: {group}'
144+
145+
146+
def test_regrid_smap_excluded_variable_file(
147+
test_spl3ftp_ncfile,
148+
tmp_path,
149+
):
150+
"""Test the full regrid process with excluded variables.
151+
152+
This test adds a test varinfo config that excludes science variables:
153+
"/.*altitude_dem.*"
154+
155+
Is it the same as the previous test, but the last assertion is that the
156+
variable is not in the output.
157+
158+
"""
159+
input_filename = str(test_spl3ftp_ncfile)
160+
output_filename = str(tmp_path / 'regridded_output.nc')
161+
logger = logging.getLogger()
162+
163+
# Define a target CRS [and optionally grid parameters]
164+
params = {
165+
'format': {
166+
'mime': 'application/x-netcdf',
167+
'crs': 'EPSG:4326',
168+
},
169+
'sources': [{'collection': 'C123-test', 'shortName': 'SPL3FTP'}],
170+
}
171+
message = HarmonyMessage(params)
172+
source = HarmonySource({'collection': 'C123-TEST', 'shortName': 'SPL3FTP'})
173+
174+
# Mock generate_output_filename to control the output path
175+
with (
176+
patch(
177+
'harmony_regridding_service.regridding_service.generate_output_filename',
178+
return_value=output_filename,
179+
),
180+
patch(
181+
'harmony_regridding_service.regridding_service.varinfo_config_filename',
182+
return_value=str(
183+
Path(Path(__file__).parent / 'fixtures/test_HRS_varinfo_config.json')
184+
),
185+
),
186+
):
187+
result_filename = regrid(message, input_filename, source, logger)
188+
189+
assert result_filename == output_filename
190+
assert Path(output_filename).exists()
191+
192+
expected_groups = [
193+
'/Freeze_Thaw_Retrieval_Data_Polar',
194+
'/Freeze_Thaw_Retrieval_Data_Global',
195+
]
196+
expected = {
197+
'/Freeze_Thaw_Retrieval_Data_Polar': {
198+
'width': 263,
199+
'height': 122,
200+
},
201+
'/Freeze_Thaw_Retrieval_Data_Global': {
202+
'width': 186,
203+
'height': 73,
204+
},
205+
}
206+
207+
with xr.open_datatree(output_filename) as dt:
208+
for group in expected_groups:
209+
expects = expected[group]
210+
211+
assert 'crs' in dt[group], f'failed: {group}'
212+
213+
assert dt[group].dims['y'] == expects['height'], f'failed: {group}'
214+
assert dt[group].dims['x'] == expects['width'], f'failed: {group}'
215+
216+
assert 'longitude' in dt[group], f'failed: {group}'
217+
assert 'latitude' in dt[group], f'failed: {group}'
218+
### This is the change from the previous test. altitude_dem is
219+
### configured to be an excluded science variable
220+
221+
assert 'altitude_dem' not in dt[group], f'failed: {group}'
222+
223+
224+
def test_regrid_smap_bad_user_requested_variable_data_end_to_end(
225+
test_spl3ftp_ncfile,
226+
tmp_path,
227+
):
228+
"""Test a Request that specifies an explicitly excluded variable.
229+
230+
This test repeats the previous test but a new test config is used that
231+
explicitly excludes that user's variable. we expect this request to fail.
232+
233+
"""
234+
input_filename = str(test_spl3ftp_ncfile)
235+
output_filename = str(tmp_path / 'regridded_output.nc')
236+
logger = MagicMock()
237+
238+
# Define a user selected variable
239+
user_var = {
240+
'id': 'V12345789-EEDTEST',
241+
'name': 'Freeze_Thaw_Retrieval_Data_Global/altitude_dem',
242+
}
243+
244+
# Define a target CRS [and optionally grid parameters]
245+
params = {
246+
'format': {
247+
'mime': 'application/x-netcdf',
248+
'crs': 'EPSG:4326',
249+
},
250+
'sources': [
251+
{'collection': 'C123-test', 'shortName': 'SPL3FTP', 'variables': [user_var]}
252+
],
253+
}
254+
message = HarmonyMessage(params)
255+
source = HarmonySource(message['sources'][0])
256+
257+
# Mock generate_output_filename to control the output path
258+
with (
259+
patch(
260+
'harmony_regridding_service.regridding_service.generate_output_filename',
261+
return_value=output_filename,
262+
),
263+
patch(
264+
'harmony_regridding_service.regridding_service.varinfo_config_filename',
265+
return_value=str(
266+
Path(Path(__file__).parent / 'fixtures/test_HRS_varinfo_config.json')
267+
),
268+
),
269+
):
270+
# when harmony-service-lib is updated use this.
271+
# expected_message = re.escape(
272+
# r"Request for unprocessable variable(s): "
273+
# "{'/Freeze_Thaw_Retrieval_Data_Global/altitude_dem'}."
274+
# )
275+
# with pytest.raises(InvalidVariableRequest, match=expected_message):
276+
# regrid(message, input_filename, source, logger)
277+
278+
expected_message = (
279+
'Request for unprocessable variable(s): '
280+
"{'/Freeze_Thaw_Retrieval_Data_Global/altitude_dem'}."
281+
)
282+
283+
with pytest.raises(InvalidVariableRequest) as exception_info:
284+
regrid(message, input_filename, source, logger)
285+
286+
assert exception_info.value.message == expected_message

0 commit comments

Comments
 (0)