@@ -18,17 +18,35 @@ def compare_results_to_reference_file(
18
18
reference_file_name : str ,
19
19
identical : bool = True ,
20
20
coordinates_to_fix : list [str ] | None = None ,
21
+ subset_selector : dict | None = None ,
21
22
) -> None :
22
- """Use `DataTree` functionality to compare data values, variables,
23
- coordinates, metadata, and all their corresponding attributes of
24
- downloaded results to a reference file.
23
+ """Compare two files as DataTrees
24
+
25
+ Args:
26
+ results_file_name: Path to the results file to validate
27
+
28
+ reference_file_name: Path to the reference file to compare against
29
+
30
+ identical: If True, use strict comparison including attributes; if
31
+ False, compare only values
32
+
33
+ coordinates_to_fix: List of coordinate names to be renamed in the case
34
+ that the input has "unalignable" names.
35
+
36
+ subset_selector: Dict of top level group names to selection
37
+ dictionaries used to subset the input DataTree to the
38
+ previously subsetted reference data. (see
39
+ `subset_datatree`'s doc string for more information)
40
+
41
+ Raises:
42
+ AssertionError: when files don't match according to comparison criteria.
25
43
26
44
"""
27
45
if coordinates_to_fix is None :
28
46
coordinates_to_fix = []
29
47
30
- reference_groups = open_groups (reference_file_name )
31
- results_groups = open_groups (results_file_name )
48
+ reference_groups = open_groups (reference_file_name , decode_times = False )
49
+ results_groups = open_groups (results_file_name , decode_times = False )
32
50
33
51
# Fix unalignable coordinates
34
52
for coord in coordinates_to_fix :
@@ -38,6 +56,10 @@ def compare_results_to_reference_file(
38
56
reference_data = DataTree .from_dict (reference_groups )
39
57
results_data = DataTree .from_dict (results_groups )
40
58
59
+ # Limit comparison of data
60
+ if subset_selector is not None :
61
+ results_data = subset_datatree (results_data , subset_selector )
62
+
41
63
if identical :
42
64
assert results_data .identical (
43
65
reference_data
@@ -51,6 +73,37 @@ def compare_results_to_reference_file(
51
73
results_data = None
52
74
53
75
76
+ def subset_datatree (dt : DataTree , selectors : dict [str , dict ]) -> DataTree :
77
+ """Using a selector dictionary return a subset of the input DataTree.
78
+
79
+ Args:
80
+ dt: Input Datatree to subset
81
+ selectors: Dictionary mapping top level group names to dictionaries,
82
+ where each dictionary contains dimension names to indices
83
+ selected by slice.
84
+
85
+ a sample selector dictionary:
86
+ selectors = {
87
+ "Soil_Moisture_Retrieval_Data_3km": {
88
+ "y-dim": slice(1000, 2000),
89
+ "x-dim": slice(8800, 9800),
90
+ },
91
+ "Soil_Moisture_Retrieval_Data": {
92
+ "y-dim": slice(0, 1000),
93
+ "x-dim": slice(2500, 3500),
94
+ },
95
+ }
96
+
97
+ This is used to subset the SPL2SMAP granule, the data groups are the keys
98
+ and the dimensions are subset into 1000x1000 grids that include valid
99
+ output data.
100
+ """
101
+ out_dt = dt .copy ()
102
+ for group_name , slices in selectors .items ():
103
+ out_dt [group_name ] = out_dt [group_name ].isel (slices )
104
+ return out_dt
105
+
106
+
54
107
def unalign_groups (
55
108
dict_of_datasets : dict [str , Dataset ], coordinate : str
56
109
) -> dict [str , Dataset ]:
0 commit comments