I have been testing the vtkWriter since @ddeboerfluid had some issues with very large GPU simulations.
using WaterLily,StaticArrays,CUDA,BiotSavartBCs,WriteVTK
using TimerOutputs
function make_sim_acc(; N=128, R=32, a=0.5, U=1, Re=1e3, mem=Array, T=Float32)
disk(x,t) = (z=x-SA[-R,0,0].-N/2; y=z.-SA[0,clamp(z[2],-R,R),clamp(z[3],-R,R)]; √sum(abs2,y)-1.5)
Ut(i,t) = convert(typeof(t), i==1 ? min(a*t/R,U) : 0) # velocity BC
body = AutoBody(disk)
BiotSimulation((N,N,N), Ut, R; U, ν=U*R/Re, body, T, mem)
end
# make a writer with some attributes, need to output to CPU array to save file (|> Array)
import WaterLily: @loop,ω,λ₂
vtk_vort(a) = (@loop sim.flow.f[I,:] .= ω(I,sim.flow.u) over I in inside(sim.flow.p); a.flow.f |> Array)
vtk_body(a) = (measure_sdf!(a.flow.σ, a.body, WaterLily.time(a)); a.flow.σ |> Array)
vtk_lamda(a) = (@inside a.flow.σ[I] = λ₂(I, a.flow.u); a.flow.σ |> Array)
custom_attrib = Dict("ω"=>vtk_vort, "b"=>vtk_body, "λ₂"=>vtk_lamda)
# make the writer
writer = vtkWriter("Disk_high_Re_3"; attrib=custom_attrib,
dir="vtk_data")
# dimensions
N = 2^6; R = N/4
sim = make_sim_acc(mem=CUDA.CuArray;N,R,Re=125_000);
const to = TimerOutput()
for tᵢ in range(0,4;step=0.02)
@show tᵢ
@timeit to "sim_step!(sim, tᵢ)" sim_step!(sim,tᵢ;remeasure=false)
@timeit to "write!(writer, sim)" write!(writer,sim);
end
close(writer)
show(to; compact=true)
For the above testcase, N^3, N=64 the timing are very bad.
────────────────────────────────────────────────────────────────────────────────
Time Allocations
─────────────────────── ────────────────────────
Tot / % measured: 392s / 21.4% 8.25GiB / 97.7%
Section ncalls time %tot avg alloc %tot avg
────────────────────────────────────────────────────────────────────────────────
write!(writer, sim) 201 66.4s 79.1% 331ms 5.17GiB 64.2% 26.4MiB
sim_step!(sim, tᵢ) 201 17.5s 20.9% 87.1ms 2.89GiB 35.8% 14.7MiB
────────────────────────────────────────────────────────────────────────────────
I have been testing the
vtkWritersince @ddeboerfluid had some issues with very large GPU simulations.For the above testcase,
N^3, N=64the timing are very bad.──────────────────────────────────────────────────────────────────────────────── Time Allocations ─────────────────────── ──────────────────────── Tot / % measured: 392s / 21.4% 8.25GiB / 97.7% Section ncalls time %tot avg alloc %tot avg ──────────────────────────────────────────────────────────────────────────────── write!(writer, sim) 201 66.4s 79.1% 331ms 5.17GiB 64.2% 26.4MiB sim_step!(sim, tᵢ) 201 17.5s 20.9% 87.1ms 2.89GiB 35.8% 14.7MiB ────────────────────────────────────────────────────────────────────────────────