I have written massive ‘for loop’ code as follows:
Meta_start=390; Meta_end=400; Meta_div=trunc(Int,((Meta_end-Meta_start)*100)); #temporary
Meta_cal=LinRange(Meta_start,Meta_end,Meta_div); k=1:1:Meta_div #::Vector{Int64}
I_sharp=Array{Float64,2}(undef,150,27);
I_broad=Array{Float64,1}(undef,Meta_div);
Re=rand(5,11); FCF=rand(5,11); IB=0.1;
Meta=rand(150,27,55); Meta_shift=zeros(55);
for v_C=1:1:5
for v_B=1:1:11
numb_v=(v_B+11*(v_C-1))
for bra=1:1:27
for J1=1:1:150 # 150
I_sharp[J1,bra]=Re[v_C,v_B]^2+*(1/(Meta[J1,bra,numb_v].-Meta_shift[numb_v])^4)*FCF[v_C,v_B]
I_broad[k]+=I_sharp[J1,bra].*exp.(-2*(Meta_cal[k].-(Meta[J1,bra,numb_v].-Meta_shift[numb_v])).^2/IB)
end
end
end
end
It took 3.448 seconds with 8691813 allocations and 10.30 GiB on my computer.
I tried to use @inbounds @simd and mul!(C,A,B), but it didn’t work, or I guess I used it the wrong way.
How can I improve the speed of this code?