Improve performance of this index creation and accessing code

I also notice speedup but there is some instability in code performance. Notice too much variation in allocation.

julia> include("Modular2.jl")
 64.548171 seconds (7.06 G allocations: 274.009 GiB, 20.45% gc time, 18.34% compilation time: 3% of which was recompilation)
GLMakie.Screen(...)

julia> include("Modular2.jl")
 38.827293 seconds (1.66 M allocations: 163.253 MiB, 0.10% gc time, 4.24% compilation time)
GLMakie.Screen(...)

julia> include("Modular2.jl")
 59.855314 seconds (7.04 G allocations: 272.950 GiB, 23.98% gc time, 5.20% compilation time)
GLMakie.Screen(...)

See the code

full code
using HDF5, GLMakie, Interpolations, Statistics, Profile, Base.Threads, BenchmarkTools
@time begin
function readhdf(h5file::String)
	h5 = h5open(h5file, "r")
	B = h5["B"]
	@views Br::Array{Float32, 4}, Bθ::Array{Float32, 4}, Bϕ::Array{Float32, 4} = B[:,:,:,:,1], B[:,:,:,:,2], B[:,:,:,:,3]
	Bp = @. hypot(Br, Bθ) / Bϕ
	x_all, y_all, z_all = h5["x1v"], h5["x2v"], h5["x3v"]
	xe_all, ye_all, ze_all = h5["x1f"], h5["x2f"], h5["x3f"]
	MeshBlockSize::Vector{Int32} = read((HDF5.attributes(h5))["MeshBlockSize"])
	blocks::Int32 = read(HDF5.attributes(h5),"NumMeshBlocks") 
	cx_all::Matrix{Float32} = reshape(collect(x_all), size(x_all))
	cy_all::Matrix{Float32} = reshape(collect(y_all), size(y_all))
	cz_all::Matrix{Float32} = reshape(collect(z_all), size(z_all))
	cxe_all::Matrix{Float32} = reshape(collect(xe_all), size(xe_all))
	cye_all::Matrix{Float32} = reshape(collect(ye_all), size(ye_all))
	cze_all::Matrix{Float32} = reshape(collect(ze_all), size(ze_all))
	return Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks
end

Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks = readhdf("sane00.prim.01800.athdf")

function bound(cxe_all, cye_all, cze_all, MeshBlockSize, blocks)
	XX,YY,ZZ = ntuple(_ -> zeros(Float32, blocks* prod(MeshBlockSize.+1)), 3)
	icx=1
	for b in 1:blocks
		@views r::Vector{Float32}, θ::Vector{Float32}, ϕ::Vector{Float32} = cxe_all[:, b], cye_all[:, b], cze_all[:, b]
		for rr in r, th in θ, ph in ϕ,  
			XX[icx] = rr * sin(th) * cos(ph)
			YY[icx] = rr * sin(th) * sin(ph)
			ZZ[icx] = rr * cos(th)
			icx += 1
		end
	end
	xmin,xmax = extrema(XX); ymin,ymax = extrema(YY); zmin,zmax = extrema(ZZ)
	println("  x ∈ [$xmin, $xmax], y ∈ [$ymin, $ymax], z ∈ [$zmin, $zmax]")
	return xmin, xmax, ymin, ymax, zmin, zmax
end
	
xmin, xmax, ymin, ymax, zmin, zmax = bound(cxe_all, cye_all, cze_all, MeshBlockSize, blocks)
	

function clp(X,Y,Z,C,xmin,ymin,zmin,sx,sy,sz,nx,ny,nz,field)	
	xi = trunc(Int, (X-xmin) * sx) + 1
	yi = trunc(Int, (Y-ymin) * sy) + 1
	zi = trunc(Int, (Z-zmin) * sz) + 1
	ix = clamp(xi, 1, nx)
	iy = clamp(yi, 1, ny)
	iz = clamp(zi, 1, nz)
	field[ix, iy, iz] = C
end
	
function cart(iex,ii,jj,kk,r_eval,θ_eval,ϕ_eval,ext_itp,X,Y,Z,C)
	rr, th, ph = r_eval[ii], θ_eval[jj], ϕ_eval[kk]
	xy, z = rr .* sincos(th)
    y, x = xy .* sincos(ph)
    X[iex] = x
    Y[iex] = y
    Z[iex] = z
	C[iex] = ext_itp(log(rr), th, ph)
end

function loaddata(Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks, xmin, xmax, ymin, ymax, zmin, zmax)
	nx,ny,nz = 10(MeshBlockSize.+1)
	field = fill(NaN32, nx,ny,nz)

	sx::Float32 = (nx-1)/(xmax-xmin); sy::Float32 = (ny-1)/(ymax-ymin); sz::Float32 = (nz-1)/(zmax-zmin)
	X, Y, Z, C = ntuple(_ -> zeros(Float32, nx*ny*nz), 4)

	@threads for b in 1:blocks
		@views r, θ, ϕ = cx_all[:, b], cy_all[:, b], cz_all[:, b]
		@views re, θe, ϕe = cxe_all[:, b], cye_all[:, b], cze_all[:, b]
		@views Bp_block = Bp[:, :, :, b]
		@views size(Bp_block)
		ext_itp = extrapolate(interpolate((log.(r), θ, ϕ), Bp_block, Gridded(Linear())), Interpolations.Line())
		r_eval = exp.(LinRange(log(re[1]), log(re[end]), nx))
		θ_eval = LinRange(θe[1], θe[end], ny)
		ϕ_eval = LinRange(ϕe[1], ϕe[end], nz)
		iex::Int32 = 1
		@inbounds for ii in 1:nx, jj in 1:ny, kk in 1:nz
			cart(iex,ii,jj,kk,r_eval,θ_eval,ϕ_eval,ext_itp,X,Y,Z,C)
			clp(X[iex],Y[iex],Z[iex],C[iex],xmin,ymin,zmin,sx,sy,sz,nx,ny,nz,field)
			iex += 1
		end
	end
	return field
end

field = loaddata(Bp, cx_all, cy_all, cz_all, cxe_all, cye_all, cze_all, MeshBlockSize, blocks, xmin, xmax, ymin, ymax, zmin, zmax)

function tdplot(xmin, xmax, ymin, ymax, zmin, zmax, field)
	finite_vals = filter(!isnan, vec(field))
    q_low, q_high = quantile(finite_vals, (0.05, 0.95))
    fig = Figure()
    ax::LScene = LScene(fig[1,1], show_axis=false)
	volume!(ax, xmin..xmax, ymin..ymax, zmin..zmax, field, transparency=true; colorrange=(q_low, q_high))
	display(fig)
end

tdplot(xmin, xmax, ymin, ymax, zmin, zmax, field)
end