I also notice speedup but there is some instability in code performance. Notice too much variation in allocation.
full code
using HDF5, GLMakie, Interpolations, Statistics, Profile, Base.Threads, BenchmarkTools
@time begin
function readhdf(h5file::String)
h5 = h5open(h5file, "r")
B = h5["B"]
@views Br::Array{Float32, 4}, Bθ::Array{Float32, 4}, Bϕ::Array{Float32, 4} = B[:,:,:,:,1], B[:,:,:,:,2], B[:,:,:,:,3]
Bp = @. hypot(Br, Bθ) / Bϕ
x_all, y_all, z_all = h5["x1v"], h5["x2v"], h5["x3v"]
xe_all, ye_all, ze_all = h5["x1f"], h5["x2f"], h5["x3f"]
MeshBlockSize::Vector{Int32} = read((HDF5.attributes(h5))["MeshBlockSize"])
blocks::Int32 = read(HDF5.attributes(h5),"NumMeshBlocks")
cx_all::Matrix{Float32} = reshape(collect(x_all), size(x_all))
cy_all::Matrix{Float32} = reshape(collect(y_all), size(y_all))
cz_all::Matrix{Float32} = reshape(collect(z_all), size(z_all))
cxe_all::Matrix{Float32} = reshape(collect(xe_all), size(xe_all))
cye_all::Matrix{Float32} = reshape(collect(ye_all), size(ye_all))
cze_all::Matrix{Float32} = reshape(collect(ze_all), size(ze_all))
return Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks
end
Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks = readhdf("sane00.prim.01800.athdf")
function bound(cxe_all, cye_all, cze_all, MeshBlockSize, blocks)
XX,YY,ZZ = ntuple(_ -> zeros(Float32, blocks* prod(MeshBlockSize.+1)), 3)
icx=1
for b in 1:blocks
@views r::Vector{Float32}, θ::Vector{Float32}, ϕ::Vector{Float32} = cxe_all[:, b], cye_all[:, b], cze_all[:, b]
for rr in r, th in θ, ph in ϕ,
XX[icx] = rr * sin(th) * cos(ph)
YY[icx] = rr * sin(th) * sin(ph)
ZZ[icx] = rr * cos(th)
icx += 1
end
end
xmin,xmax = extrema(XX); ymin,ymax = extrema(YY); zmin,zmax = extrema(ZZ)
println(" x ∈ [$xmin, $xmax], y ∈ [$ymin, $ymax], z ∈ [$zmin, $zmax]")
return xmin, xmax, ymin, ymax, zmin, zmax
end
xmin, xmax, ymin, ymax, zmin, zmax = bound(cxe_all, cye_all, cze_all, MeshBlockSize, blocks)
function clp(X,Y,Z,C,xmin,ymin,zmin,sx,sy,sz,nx,ny,nz,field)
xi = trunc(Int, (X-xmin) * sx) + 1
yi = trunc(Int, (Y-ymin) * sy) + 1
zi = trunc(Int, (Z-zmin) * sz) + 1
ix = clamp(xi, 1, nx)
iy = clamp(yi, 1, ny)
iz = clamp(zi, 1, nz)
field[ix, iy, iz] = C
end
function cart(iex,ii,jj,kk,r_eval,θ_eval,ϕ_eval,ext_itp,X,Y,Z,C)
rr, th, ph = r_eval[ii], θ_eval[jj], ϕ_eval[kk]
xy, z = rr .* sincos(th)
y, x = xy .* sincos(ph)
X[iex] = x
Y[iex] = y
Z[iex] = z
C[iex] = ext_itp(log(rr), th, ph)
end
function loaddata(Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks, xmin, xmax, ymin, ymax, zmin, zmax)
nx,ny,nz = 10(MeshBlockSize.+1)
field = fill(NaN32, nx,ny,nz)
sx::Float32 = (nx-1)/(xmax-xmin); sy::Float32 = (ny-1)/(ymax-ymin); sz::Float32 = (nz-1)/(zmax-zmin)
X, Y, Z, C = ntuple(_ -> zeros(Float32, nx*ny*nz), 4)
@threads for b in 1:blocks
@views r, θ, ϕ = cx_all[:, b], cy_all[:, b], cz_all[:, b]
@views re, θe, ϕe = cxe_all[:, b], cye_all[:, b], cze_all[:, b]
@views Bp_block = Bp[:, :, :, b]
@views size(Bp_block)
ext_itp = extrapolate(interpolate((log.(r), θ, ϕ), Bp_block, Gridded(Linear())), Interpolations.Line())
r_eval = exp.(LinRange(log(re[1]), log(re[end]), nx))
θ_eval = LinRange(θe[1], θe[end], ny)
ϕ_eval = LinRange(ϕe[1], ϕe[end], nz)
iex::Int32 = 1
@inbounds for ii in 1:nx, jj in 1:ny, kk in 1:nz
cart(iex,ii,jj,kk,r_eval,θ_eval,ϕ_eval,ext_itp,X,Y,Z,C)
clp(X[iex],Y[iex],Z[iex],C[iex],xmin,ymin,zmin,sx,sy,sz,nx,ny,nz,field)
iex += 1
end
end
return field
end
field = loaddata(Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks, xmin, xmax, ymin, ymax, zmin, zmax)
function tdplot(xmin, xmax, ymin, ymax, zmin, zmax, field)
finite_vals = filter(!isnan, vec(field))
q_low, q_high = quantile(finite_vals, (0.05, 0.95))
fig = Figure()
ax::LScene = LScene(fig[1,1], show_axis=false)
volume!(ax, xmin..xmax, ymin..ymax, zmin..zmax, field, transparency=true; colorrange=(q_low, q_high))
display(fig)
end
tdplot(xmin, xmax, ymin, ymax, zmin, zmax, field)
end