Skip to content

Commit 101d3f6

Browse files
authored
Generalize Random feature defaults for greater stability (#375)
* new defaults, neaten covariance estimation switches new verbose flags and default data minmax-like data scaling update regression example update ishigami example update g-function example update Lorenz spatial dep example missing verbose flag typo vectorising scalings format Cloudy example add required pkgs Updated LR factorization to LR + D reduce RF nonsep removed sigma learning, adjust prior ranges tweak examples for new priors fix plots bug tweak setups for RF-scalar and nonsep add edge-case protection for 1D->1D format * bugfix dimen error * remove extra features * format... * increase test coverage * try inc. RF separable flexibility * more samples * updates * rm info * remove all tikhonov - unused anywhere and fails tests (besides we have tikhonov priors already * try diferent combo * add necessary deepcopy to protect the test * format
1 parent 14e03ab commit 101d3f6

19 files changed

+969
-564
lines changed

docs/Manifest.toml

Lines changed: 273 additions & 221 deletions
Large diffs are not rendered by default.

examples/Cloudy/Cloudy_calibrate.jl

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ prior_N0 = constrained_gaussian(param_names[1], 400, 300, 0.4 * N0_true, Inf)
9494
prior_θ = constrained_gaussian(param_names[2], 1.0, 5.0, 1e-1, Inf)
9595
prior_k = constrained_gaussian(param_names[3], 0.2, 1.0, 1e-4, Inf)
9696
priors = combine_distributions([prior_N0, prior_θ, prior_k])
97-
# Plot the priors
98-
p = plot(priors, constrained = false)
97+
# Plots.Plot the priors
98+
p = Plots.plot(priors, constrained = false)
9999
savefig(p, output_directory * "cloudy_priors.png")
100100

101101
###
@@ -160,7 +160,7 @@ dummy = ones(n_params)
160160
dist_type = ParticleDistributions.GammaPrimitiveParticleDistribution(dummy...)
161161
model_settings = DynamicalModel.ModelSettings(kernel, dist_type, moments, tspan)
162162
# EKI iterations
163-
n_iter = N_iter
163+
n_iter = [N_iter]
164164
for n in 1:N_iter
165165
# Return transformed parameters in physical/constrained space
166166
ϕ_n = get_ϕ_final(priors, ekiobj)
@@ -169,12 +169,12 @@ for n in 1:N_iter
169169
G_ens = hcat(G_n...) # reformat
170170
terminate = EnsembleKalmanProcesses.update_ensemble!(ekiobj, G_ens)
171171
if !isnothing(terminate)
172-
n_iter = n - 1
172+
n_iter[1] = n - 1
173173
break
174174
end
175175

176176
end
177-
177+
n_iter = n_iter[1]
178178

179179
# EKI results: Has the ensemble collapsed toward the truth?
180180
θ_true = transform_constrained_to_unconstrained(priors, ϕ_true)
@@ -211,8 +211,14 @@ u_init = get_u_prior(ekiobj)
211211
anim_eki_unconst_cloudy = @animate for i in 1:(n_iter - 1)
212212
u_i = get_u(ekiobj, i)
213213

214-
p1 = plot(u_i[1, :], u_i[2, :], seriestype = :scatter, xlims = extrema(u_init[1, :]), ylims = extrema(u_init[2, :]))
215-
plot!(
214+
p1 = Plots.plot(
215+
u_i[1, :],
216+
u_i[2, :],
217+
seriestype = :scatter,
218+
xlims = extrema(u_init[1, :]),
219+
ylims = extrema(u_init[2, :]),
220+
)
221+
Plots.plot!(
216222
p1,
217223
[θ_true[1]],
218224
xaxis = "u1",
@@ -224,10 +230,16 @@ anim_eki_unconst_cloudy = @animate for i in 1:(n_iter - 1)
224230
margin = 5mm,
225231
title = "EKI iteration = " * string(i),
226232
)
227-
plot!(p1, [θ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
228-
229-
p2 = plot(u_i[2, :], u_i[3, :], seriestype = :scatter, xlims = extrema(u_init[2, :]), ylims = extrema(u_init[3, :]))
230-
plot!(
233+
Plots.plot!(p1, [θ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
234+
235+
p2 = Plots.plot(
236+
u_i[2, :],
237+
u_i[3, :],
238+
seriestype = :scatter,
239+
xlims = extrema(u_init[2, :]),
240+
ylims = extrema(u_init[3, :]),
241+
)
242+
Plots.plot!(
231243
p2,
232244
[θ_true[2]],
233245
xaxis = "u2",
@@ -240,10 +252,16 @@ anim_eki_unconst_cloudy = @animate for i in 1:(n_iter - 1)
240252
title = "EKI iteration = " * string(i),
241253
)
242254

243-
plot!(p2, [θ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
255+
Plots.plot!(p2, [θ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
244256

245-
p3 = plot(u_i[3, :], u_i[1, :], seriestype = :scatter, xlims = extrema(u_init[3, :]), ylims = extrema(u_init[1, :]))
246-
plot!(
257+
p3 = Plots.plot(
258+
u_i[3, :],
259+
u_i[1, :],
260+
seriestype = :scatter,
261+
xlims = extrema(u_init[3, :]),
262+
ylims = extrema(u_init[1, :]),
263+
)
264+
Plots.plot!(
247265
p3,
248266
[θ_true[3]],
249267
xaxis = "u3",
@@ -256,9 +274,9 @@ anim_eki_unconst_cloudy = @animate for i in 1:(n_iter - 1)
256274
title = "EKI iteration = " * string(i),
257275
)
258276

259-
plot!(p3, [θ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
277+
Plots.plot!(p3, [θ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
260278

261-
p = plot(p1, p2, p3, layout = (1, 3))
279+
p = Plots.plot(p1, p2, p3, layout = (1, 3))
262280
end
263281

264282
gif(anim_eki_unconst_cloudy, joinpath(output_directory, "cloudy_eki_unconstr.gif"), fps = 1) # hide
@@ -268,8 +286,14 @@ gif(anim_eki_unconst_cloudy, joinpath(output_directory, "cloudy_eki_unconstr.gif
268286
anim_eki_cloudy = @animate for i in 1:(n_iter - 1)
269287
ϕ_i = get_ϕ(priors, ekiobj, i)
270288

271-
p1 = plot(ϕ_i[1, :], ϕ_i[2, :], seriestype = :scatter, xlims = extrema(ϕ_init[1, :]), ylims = extrema(ϕ_init[2, :]))
272-
plot!(
289+
p1 = Plots.plot(
290+
ϕ_i[1, :],
291+
ϕ_i[2, :],
292+
seriestype = :scatter,
293+
xlims = extrema(ϕ_init[1, :]),
294+
ylims = extrema(ϕ_init[2, :]),
295+
)
296+
Plots.plot!(
273297
p1,
274298
[ϕ_true[1]],
275299
xaxis = "ϕ1",
@@ -282,11 +306,17 @@ anim_eki_cloudy = @animate for i in 1:(n_iter - 1)
282306
title = "EKI iteration = " * string(i),
283307
)
284308

285-
plot!(p1, [ϕ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
309+
Plots.plot!(p1, [ϕ_true[2]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
286310

287-
p2 = plot(ϕ_i[2, :], ϕ_i[3, :], seriestype = :scatter, xlims = extrema(ϕ_init[2, :]), ylims = extrema(ϕ_init[3, :]))
311+
p2 = Plots.plot(
312+
ϕ_i[2, :],
313+
ϕ_i[3, :],
314+
seriestype = :scatter,
315+
xlims = extrema(ϕ_init[2, :]),
316+
ylims = extrema(ϕ_init[3, :]),
317+
)
288318

289-
plot!(
319+
Plots.plot!(
290320
p2,
291321
[ϕ_true[2]],
292322
xaxis = "ϕ2",
@@ -299,10 +329,16 @@ anim_eki_cloudy = @animate for i in 1:(n_iter - 1)
299329
title = "EKI iteration = " * string(i),
300330
)
301331

302-
plot!(p2, [ϕ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
332+
Plots.plot!(p2, [ϕ_true[3]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
303333

304-
p3 = plot(ϕ_i[3, :], ϕ_i[1, :], seriestype = :scatter, xlims = extrema(ϕ_init[3, :]), ylims = extrema(ϕ_init[1, :]))
305-
plot!(
334+
p3 = Plots.plot(
335+
ϕ_i[3, :],
336+
ϕ_i[1, :],
337+
seriestype = :scatter,
338+
xlims = extrema(ϕ_init[3, :]),
339+
ylims = extrema(ϕ_init[1, :]),
340+
)
341+
Plots.plot!(
306342
p3,
307343
[ϕ_true[3]],
308344
xaxis = "ϕ3",
@@ -314,9 +350,9 @@ anim_eki_cloudy = @animate for i in 1:(n_iter - 1)
314350
label = false,
315351
title = "EKI iteration = " * string(i),
316352
)
317-
plot!(p3, [ϕ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
353+
Plots.plot!(p3, [ϕ_true[1]], seriestype = "hline", linestyle = :dash, linecolor = :red, label = "optimum")
318354

319-
p = plot(p1, p2, p3, layout = (1, 3))
355+
p = Plots.plot(p1, p2, p3, layout = (1, 3))
320356

321357
end
322358

examples/Cloudy/Cloudy_emulate_sample.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ using CalibrateEmulateSample.Emulators
1717
using CalibrateEmulateSample.MarkovChainMonteCarlo
1818
using CalibrateEmulateSample.Utilities
1919
using CalibrateEmulateSample.EnsembleKalmanProcesses
20+
using CalibrateEmulateSample.ParameterDistributions
21+
using CalibrateEmulateSample.DataContainers
2022

2123
################################################################################
2224
# #
@@ -90,8 +92,8 @@ function main()
9092

9193
cases = [
9294
"rf-scalar",
93-
"gp-gpjl", # Veeeery slow predictions
9495
"rf-nonsep",
96+
"gp-gpjl", # Veeeery slow predictions
9597
]
9698

9799
# Specify cases to run (e.g., case_mask = [2] only runs the second case)

examples/Emulator/G-function/emulate.jl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ function main()
4040

4141
rng = MersenneTwister(seed)
4242

43-
n_repeats = 5# repeat exp with same data.
44-
n_dimensions = 10
43+
n_repeats = 10# repeat exp with same data.
44+
n_dimensions = 20
4545
# To create the sampling
4646
n_data_gen = 800
4747

@@ -81,7 +81,8 @@ function main()
8181
input[:, i] = samples[ind[i], :]
8282
output[i] = y[ind[i]] + noise[i]
8383
end
84-
encoder_schedule = (decorrelate_structure_mat(), "out")
84+
# encoder_schedule = (decorrelate_structure_mat(), "out")
85+
encoder_schedule = (minmax_scale(), "in_and_out")
8586
iopairs = PairedDataContainer(input, output)
8687

8788
# analytic sobol indices taken from
@@ -99,12 +100,11 @@ function main()
99100
overrides = Dict(
100101
"verbose" => true,
101102
"scheduler" => DataMisfitController(terminate_at = 1e2),
102-
"n_features_opt" => 120,
103+
"n_features_opt" => 100,#120,
103104
"n_iteration" => 10,
104-
"cov_sample_multiplier" => 3.0,
105-
#"localization" => SEC(0.1),#,Doesnt help much tbh
105+
"cov_sample_multiplier" => 0.1,
106106
"n_ensemble" => 100, #40*n_dimensions,
107-
"n_cross_val_sets" => 4,
107+
"n_cross_val_sets" => 2,
108108
)
109109
if case == "Prior"
110110
# don't do anything
@@ -126,7 +126,8 @@ function main()
126126

127127
elseif case ["RF-scalar", "Prior"]
128128
rank = n_dimensions #<= 10 ? n_dimensions : 10
129-
kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor())
129+
# kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor())
130+
kernel_structure = SeparableKernel(DiagonalFactor(nugget), OneDimFactor())
130131
n_features = n_dimensions <= 10 ? n_dimensions * 100 : 1000
131132
if (n_features / n_train_pts > 0.9) && (n_features / n_train_pts < 1.1)
132133
@warn "The number of features similar to the number of training points, poor performance expected, change one or other of these"

examples/Emulator/G-function/plot_result.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ function main()
77
output_directory = "output"
88
cases = ["Prior", "GP", "RF-scalar"]
99
case = cases[3]
10-
n_dimensions = 3
11-
filename = joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2")
10+
n_dimensions = 20
11+
filename = joinpath(output_directory, "GFunction_$(case)_$(n_dimensions).jld2")
1212
legend = true
1313

1414
(

examples/Emulator/Ishigami/emulate.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ function main()
3737

3838
rng = MersenneTwister(seed)
3939

40-
n_repeats = 30 # repeat exp with same data.
40+
n_repeats = 10 # repeat exp with same data.
4141

4242
# To create the sampling
4343
n_data_gen = 2000
@@ -82,14 +82,15 @@ function main()
8282
end
8383
iopairs = PairedDataContainer(input, output)
8484
cases = ["Prior", "GP", "RF-scalar"]
85-
case = cases[2]
85+
case = cases[3]
8686
encoder_schedule = (decorrelate_structure_mat(), "out")
87-
nugget = Float64(1e-4)
87+
nugget = Float64(1e-8)
8888
overrides = Dict(
89+
"verbose" => true,
8990
"scheduler" => DataMisfitController(terminate_at = 1e4),
90-
"n_features_opt" => 150,
91+
"n_features_opt" => 100,
9192
"n_ensemble" => 50,
92-
"n_iteration" => 20,
93+
"n_iteration" => 10,
9394
)
9495

9596
if case == "Prior"

examples/Emulator/Ishigami/plot_result.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ function main()
99

1010
output_directory = "output"
1111
cases = ["Prior", "GP", "RF-scalar"]
12-
case = cases[2]
12+
case = cases[3]
1313
filename = joinpath(output_directory, "results_$case.jld2")
1414

1515
(sobol_pts, train_idx, mlt_pred_y, mlt_sobol, analytic_sobol, true_y, noise_sample, noise_cov, estimated_sobol) =

examples/Emulator/L63/emulate.jl

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ function main()
2626
# rng
2727
rng = MersenneTwister(1232435)
2828

29-
n_repeats = 1 # repeat exp with same data.
29+
n_repeats = 5 # repeat exp with same data.
3030
println("run experiment $n_repeats times")
3131

3232
#for later plots
@@ -96,20 +96,11 @@ function main()
9696

9797

9898
# Emulate
99-
cases = [
100-
"GP",
101-
"RF-prior",
102-
"RF-scalar",
103-
"RF-scalar-diagin",
104-
"RF-svd-nonsep",
105-
"RF-nosvd-nonsep",
106-
"RF-nosvd-sep",
107-
"RF-svd-sep",
108-
]
109-
110-
case = cases[3] # 7
111-
112-
nugget = Float64(1e-8)
99+
cases = ["GP", "RF-prior", "RF-lr-scalar", "rf-diag-scalar", "RF-lr-lr", "RF-nonsep"]
100+
101+
case = cases[4] # 7
102+
103+
nugget = Float64(1e-4)
113104
u_test = []
114105
u_hist = []
115106
train_err = []
@@ -148,10 +139,10 @@ function main()
148139
optimizer_options = rf_optimizer_overrides,
149140
)
150141

151-
elseif case ["RF-scalar", "RF-scalar-diagin"]
142+
elseif case ["RF-lr-scalar", "rf-diag-scalar"]
152143
n_features = 10 * Int(floor(sqrt(3 * n_tp)))
153144
kernel_structure =
154-
case == "RF-scalar-diagin" ? SeparableKernel(DiagonalFactor(nugget), OneDimFactor()) :
145+
case == "rf-diag-scalar" ? SeparableKernel(DiagonalFactor(nugget), OneDimFactor()) :
155146
SeparableKernel(LowRankFactor(3, nugget), OneDimFactor())
156147
mlt = ScalarRandomFeatureInterface(
157148
n_features,
@@ -160,7 +151,7 @@ function main()
160151
kernel_structure = kernel_structure,
161152
optimizer_options = rf_optimizer_overrides,
162153
)
163-
elseif case ["RF-svd-nonsep"]
154+
elseif case ["RF-nonsep"]
164155
kernel_structure = NonseparableKernel(LowRankFactor(4, nugget))
165156
n_features = 500
166157

@@ -172,18 +163,7 @@ function main()
172163
kernel_structure = kernel_structure,
173164
optimizer_options = rf_optimizer_overrides,
174165
)
175-
elseif case ["RF-nosvd-nonsep"]
176-
kernel_structure = NonseparableKernel(LowRankFactor(4, nugget))
177-
n_features = 500
178-
mlt = VectorRandomFeatureInterface(
179-
n_features,
180-
3,
181-
3,
182-
rng = rng,
183-
kernel_structure = kernel_structure,
184-
optimizer_options = rf_optimizer_overrides,
185-
)
186-
elseif case ["RF-nosvd-sep", "RF-svd-sep"]
166+
elseif case ["RF-lr-lr"]
187167
kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(1, nugget))
188168
n_features = 500
189169
mlt = VectorRandomFeatureInterface(
@@ -210,7 +190,7 @@ function main()
210190
end
211191

212192
# Emulate
213-
if case ["RF-nosvd-nonsep", "RF-nosvd-sep"]
193+
if case ["RF-nonsep"]
214194
encoder_schedule = [] # no encoding
215195
else
216196
encoder_schedule = (decorrelate_structure_mat(), "out")
@@ -224,7 +204,7 @@ function main()
224204
optimize_hyperparameters!(emulator)
225205

226206
# diagnostics
227-
if case ["RF-svd-nonsep", "RF-nosvd-nonsep", "RF-svd-sep"]
207+
if case ["RF-nonsep" "RF-lr-lr"]
228208
push!(opt_diagnostics, get_optimizer(mlt)[1]) #length-1 vec of vec -> vec
229209
end
230210

examples/Emulator/Regression_2d_2d/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
[deps]
22
CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
3+
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
34
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
45
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
56
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
7+
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
68
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
79
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
810
PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"

0 commit comments

Comments
 (0)