|
264 | 264 | # and order-dependent refinement: h ~ (2/yddnrm)^(1/(p+1)) |
265 | 265 | # ================================================================= |
266 | 266 |
|
| 267 | + # auto_dt_reset! adds 2 to stats.nf (for H-W's f₀+f₁), but CVHin |
| 268 | + # only shares f₀ with that assumption. Compensate by subtracting 1; |
| 269 | + # iteration f calls are tracked individually inside the loop. |
| 270 | + integrator.stats.nf -= 1 |
| 271 | + |
267 | 272 | # NaN check via d₁ = norm(f₀/sk) |
268 | 273 | if u0 isa Array |
269 | 274 | @inbounds @simd ivdep for i in eachindex(u0) |
|
302 | 307 | end |
303 | 308 | end |
304 | 309 | else |
305 | | - u0_norms = internalnorm.(u0, t) |
306 | | - f₀_norms = internalnorm.(f₀, t) |
307 | | - tols = @.. broadcast = false reltol * u0_norms + abstol |
308 | | - denoms = @.. broadcast = false convert(_tType, 0.1) * u0_norms + tols |
309 | | - numers = @.. broadcast = false f₀_norms * oneunit_tType |
310 | | - hub_inv_vals = ifelse.(denoms .> 0, numers ./ denoms, zero(_tType)) |
311 | | - hub_inv = maximum(hub_inv_vals) |
| 310 | + # GPU-compatible: use abs/max broadcasts instead of scalar indexing |
| 311 | + denoms = @.. broadcast = false convert(_tType, 0.1) * abs(u0) + sk |
| 312 | + numers = @.. broadcast = false abs(f₀) * oneunit_tType |
| 313 | + hub_inv = maximum(numers ./ max.(denoms, eps(eltype(denoms)))) |
312 | 314 | end |
313 | 315 |
|
314 | 316 | hub = convert(_tType, 0.1) * tdist |
|
342 | 344 | @.. broadcast = false u₁ = u0 + hgs * f₀ |
343 | 345 | end |
344 | 346 | f(f₁, u₁, p, t + convert(_tType, hgs)) |
| 347 | + integrator.stats.nf += 1 |
345 | 348 |
|
346 | 349 | if prob.f.mass_matrix != I && ftmp !== nothing && ( |
347 | 350 | !(prob.f isa DynamicalODEFunction) || |
|
562 | 565 | # Based on SUNDIALS CVODE's CVHin algorithm (Hindmarsh et al., 2005) |
563 | 566 | # ================================================================= |
564 | 567 |
|
| 568 | + # auto_dt_reset! adds 2 to stats.nf (for H-W's f₀+f₁), but CVHin |
| 569 | + # only shares f₀ with that assumption. Compensate by subtracting 1; |
| 570 | + # iteration f calls are tracked individually inside the loop. |
| 571 | + integrator.stats.nf -= 1 |
| 572 | + |
565 | 573 | # NaN check via d₁ = norm(f₀/sk) |
566 | 574 | d₁ = internalnorm(f₀ ./ sk .* oneunit_tType, t) |
567 | 575 | if isnan(d₁) |
|
579 | 587 | hlb = convert(_tType, 100 * eps_tType * oneunit_tType) |
580 | 588 |
|
581 | 589 | # Upper bound: most restrictive component of |f₀| / (0.1*|u0| + tol) |
582 | | - u0_norms = internalnorm.(u0, t) |
583 | | - f₀_norms = internalnorm.(f₀, t) |
584 | | - tols = @.. broadcast = false reltol * u0_norms + abstol |
585 | | - denoms = @.. broadcast = false convert(_tType, 0.1) * u0_norms + tols |
586 | | - numers = @.. broadcast = false f₀_norms * oneunit_tType |
587 | | - hub_inv_vals = ifelse.(denoms .> 0, numers ./ denoms, zero(_tType)) |
588 | | - hub_inv = maximum(hub_inv_vals) |
| 590 | + denoms = @.. broadcast = false convert(_tType, 0.1) * abs(u0) + sk |
| 591 | + numers = @.. broadcast = false abs(f₀) * oneunit_tType |
| 592 | + hub_inv = maximum(numers ./ max.(denoms, eps(eltype(denoms)))) |
589 | 593 |
|
590 | 594 | hub = convert(_tType, 0.1) * tdist |
591 | 595 | if hub * hub_inv > 1 |
|
610 | 614 | hgs = hg * tdir |
611 | 615 | u₁ = @.. broadcast = false u0 + hgs * f₀ |
612 | 616 | f₁ = f(u₁, p, t + convert(_tType, hgs)) |
| 617 | + integrator.stats.nf += 1 |
613 | 618 |
|
614 | 619 | if !any(x -> any(!isfinite, x), f₁) |
615 | 620 | hg_ok = true |
|
0 commit comments