Skip to content

Commit 3ff4d42

Browse files
Fix stats.nf tracking and GPU compatibility in CVHin
- Fix stats.nf over-count: auto_dt_reset! adds 2 (for H-W's f₀+f₁), but CVHin only shares f₀. Compensate with stats.nf -= 1 at CVHin entry; iteration f calls are tracked individually inside the loop. - Fix GPU scalar indexing: replace internalnorm.()/ifelse.() broadcasts with abs.()/max.() for hub_inv computation, avoiding AbstractFloat type inference issues on JLArrays. - Track each CVHin iteration f call with stats.nf += 1. Co-Authored-By: Chris Rackauckas <accounts@chrisrackauckas.com> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ee9d748 commit 3ff4d42

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

lib/OrdinaryDiffEqCore/src/initdt.jl

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,11 @@
264264
# and order-dependent refinement: h ~ (2/yddnrm)^(1/(p+1))
265265
# =================================================================
266266

267+
# auto_dt_reset! adds 2 to stats.nf (for H-W's f₀+f₁), but CVHin
268+
# only shares f₀ with that assumption. Compensate by subtracting 1;
269+
# iteration f calls are tracked individually inside the loop.
270+
integrator.stats.nf -= 1
271+
267272
# NaN check via d₁ = norm(f₀/sk)
268273
if u0 isa Array
269274
@inbounds @simd ivdep for i in eachindex(u0)
@@ -302,13 +307,10 @@
302307
end
303308
end
304309
else
305-
u0_norms = internalnorm.(u0, t)
306-
f₀_norms = internalnorm.(f₀, t)
307-
tols = @.. broadcast = false reltol * u0_norms + abstol
308-
denoms = @.. broadcast = false convert(_tType, 0.1) * u0_norms + tols
309-
numers = @.. broadcast = false f₀_norms * oneunit_tType
310-
hub_inv_vals = ifelse.(denoms .> 0, numers ./ denoms, zero(_tType))
311-
hub_inv = maximum(hub_inv_vals)
310+
# GPU-compatible: use abs/max broadcasts instead of scalar indexing
311+
denoms = @.. broadcast = false convert(_tType, 0.1) * abs(u0) + sk
312+
numers = @.. broadcast = false abs(f₀) * oneunit_tType
313+
hub_inv = maximum(numers ./ max.(denoms, eps(eltype(denoms))))
312314
end
313315

314316
hub = convert(_tType, 0.1) * tdist
@@ -342,6 +344,7 @@
342344
@.. broadcast = false u₁ = u0 + hgs * f₀
343345
end
344346
f(f₁, u₁, p, t + convert(_tType, hgs))
347+
integrator.stats.nf += 1
345348

346349
if prob.f.mass_matrix != I && ftmp !== nothing && (
347350
!(prob.f isa DynamicalODEFunction) ||
@@ -562,6 +565,11 @@ end
562565
# Based on SUNDIALS CVODE's CVHin algorithm (Hindmarsh et al., 2005)
563566
# =================================================================
564567

568+
# auto_dt_reset! adds 2 to stats.nf (for H-W's f₀+f₁), but CVHin
569+
# only shares f₀ with that assumption. Compensate by subtracting 1;
570+
# iteration f calls are tracked individually inside the loop.
571+
integrator.stats.nf -= 1
572+
565573
# NaN check via d₁ = norm(f₀/sk)
566574
d₁ = internalnorm(f₀ ./ sk .* oneunit_tType, t)
567575
if isnan(d₁)
@@ -579,13 +587,9 @@ end
579587
hlb = convert(_tType, 100 * eps_tType * oneunit_tType)
580588

581589
# Upper bound: most restrictive component of |f₀| / (0.1*|u0| + tol)
582-
u0_norms = internalnorm.(u0, t)
583-
f₀_norms = internalnorm.(f₀, t)
584-
tols = @.. broadcast = false reltol * u0_norms + abstol
585-
denoms = @.. broadcast = false convert(_tType, 0.1) * u0_norms + tols
586-
numers = @.. broadcast = false f₀_norms * oneunit_tType
587-
hub_inv_vals = ifelse.(denoms .> 0, numers ./ denoms, zero(_tType))
588-
hub_inv = maximum(hub_inv_vals)
590+
denoms = @.. broadcast = false convert(_tType, 0.1) * abs(u0) + sk
591+
numers = @.. broadcast = false abs(f₀) * oneunit_tType
592+
hub_inv = maximum(numers ./ max.(denoms, eps(eltype(denoms))))
589593

590594
hub = convert(_tType, 0.1) * tdist
591595
if hub * hub_inv > 1
@@ -610,6 +614,7 @@ end
610614
hgs = hg * tdir
611615
u₁ = @.. broadcast = false u0 + hgs * f₀
612616
f₁ = f(u₁, p, t + convert(_tType, hgs))
617+
integrator.stats.nf += 1
613618

614619
if !any(x -> any(!isfinite, x), f₁)
615620
hg_ok = true

0 commit comments

Comments
 (0)