Skip to content

Commit 7ccb191

Browse files
committed
Add SignChecker
1 parent 5fe7abd commit 7ccb191

File tree

4 files changed

+83
-4
lines changed

4 files changed

+83
-4
lines changed

docs/src/ensemble_builder.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ of the metadata. For example, the short names are checked between the
157157
- [`SequentialIndicesChecker`](@ref ClimaCalibrate.Checker.SequentialIndicesChecker):
158158
Check the indices of the dates of the simulation data corresponding to the
159159
dates of the metadata is sequential.
160+
- [`SignChecker`](@ref ClimaCalibrate.Checker.SignChecker):
161+
Check that the proportion of positive values in the simulation data and
162+
observational data are approximately equal (within a default threshold of
163+
0.05).
160164

161165
By default, `GEnsembleBuilder` uses the first five checkers to validate
162166
compatibility between . You can also provide additional checkers using the `checkers`

ext/checkers.jl

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ import ClimaCalibrate.Checker:
55
DimUnitsChecker,
66
UnitsChecker,
77
DimValuesChecker,
8-
SequentialIndicesChecker
8+
SequentialIndicesChecker,
9+
SignChecker
910
import ClimaCalibrate.Checker
1011

1112
"""
@@ -237,3 +238,37 @@ function Checker.check(
237238
end
238239
return true
239240
end
241+
242+
"""
243+
Checker.check(
244+
::SignChecker,
245+
var::OutputVar,
246+
metadata::Metadata;
247+
data,
248+
verbose = false,
249+
)
250+
251+
Return `true` if the absolute difference of the proportion of positive values in
252+
`var.data` and the proportion of positive values in `data` is less than the threshold
253+
defined in `SignChecker`, `false` otherwise.
254+
"""
255+
function Checker.check(
256+
checker::SignChecker,
257+
var::OutputVar,
258+
metadata::Metadata;
259+
data,
260+
verbose = false,
261+
)
262+
obs_pos_proportion = mean(data .> 0)
263+
264+
# This is inaccurate, because not all the values in var.data will end up in
265+
# the G ensemble matrix. See _match_dates for one case. However, the mean
266+
# should not change that much with additional times.
267+
sim_pos_proportion = nanmean(var.data .> 0)
268+
269+
same_sign = abs(obs_pos_proportion - sim_pos_proportion) < checker.threshold
270+
!same_sign &&
271+
verbose &&
272+
@info "Proportion of positive values in the simulation data ($sim_pos_proportion) is not the same as the proportion of positive values in the observational data ($obs_pos_proportion)"
273+
return same_sign
274+
end

src/checkers.jl

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,25 @@ corresponding to the dates of the metadata is sequential.
7777
struct SequentialIndicesChecker <: AbstractChecker end
7878

7979
"""
80+
struct SignChecker{FT <: AbstractFloat} <: AbstractChecker
81+
8082
A struct that checks that the proportion of positive values in the simulation
8183
data and observational data is roughly the same.
82-
"""
83-
struct SignChecker <: AbstractChecker end
8484
85+
To change the default threshold of 0.05, you can pass a float to `SignChecker`.
86+
```julia
87+
import ClimaCalibrate
88+
sign_checker = ClimaCalibrate.Checker.SignChecker(0.01)
89+
```
8590
"""
86-
check(checker::AbstractChecker; verbose = false)
91+
@kwdef struct SignChecker{FT <: AbstractFloat} <: AbstractChecker
92+
threshold::FT = 0.05
93+
function SignChecker(threshold)
94+
zero(threshold) <= threshold <= one(threshold) ?
95+
new{typeof(threshold)}(threshold) :
96+
error("Threshold ($threshold) should be between zero and one")
97+
end
98+
end
8799

88100
"""
89101
check(checker::AbstractChecker,

test/ensemble_builder.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import ClimaAnalysis.Template:
3535

3636
@testset "Is compatible with metadata" begin
3737
make_metadata(var) = ClimaAnalysis.flatten(var).metadata
38+
make_flat_data(var) = ClimaAnalysis.flatten(var).data
3839
lat = [-90.0, 0.0, 90.0]
3940
var =
4041
TemplateVar() |>
@@ -185,6 +186,33 @@ import ClimaAnalysis.Template:
185186
make_metadata(date_var4),
186187
verbose = true,
187188
)
189+
190+
# Check sign of data
191+
sign_checker = Checker.SignChecker(0.05)
192+
neg_var = ClimaAnalysis.remake(var, data = -var.data)
193+
194+
@test Checker.check(
195+
sign_checker,
196+
var,
197+
make_metadata(var),
198+
data = make_flat_data(var),
199+
)
200+
@test !Checker.check(
201+
sign_checker,
202+
var,
203+
make_metadata(neg_var),
204+
data = make_flat_data(neg_var),
205+
)
206+
207+
@test_logs (:info, r"Proportion of positive values in the simulation data ") Checker.check(
208+
sign_checker,
209+
var,
210+
make_metadata(neg_var),
211+
data = make_flat_data(neg_var),
212+
verbose = true,
213+
)
214+
215+
@test_throws ErrorException Checker.SignChecker(-0.05)
188216
end
189217

190218
@testset "Match dates" begin

0 commit comments

Comments
 (0)