OpenBLAS vs. "for loops" demo

It’ll go just a little faster if you add @inbounds. Or use broadcasting, which does that for you:

julia> b_axpy!(a,b,c) = (c .= a .* b .+ c; nothing)
b_axpy! (generic function with 1 method)

julia> @btime b_axpy!(a,b,c);
  92.255 ms (0 allocations: 0 bytes)

julia> function inbounds_axpy!(a,b,c)
               for i = 1:length(c)
                       @inbounds c[i] = a*b[i]+c[i]
               end
               return nothing
       end
inbounds_axpy! (generic function with 1 method)

julia> @btime inbounds_axpy!(a,b,c);
  92.229 ms (0 allocations: 0 bytes)

julia> @btime LinearAlgebra.BLAS.axpy!(a,b,c);
  92.531 ms (0 allocations: 0 bytes)

julia> @btime custom_non_threaded_axpy!(a,b,c);
  99.350 ms (0 allocations: 0 bytes)
3 Likes