If pow is the limiting factor, then you can look at using VML or Vectorize to get a faster pow function. But even @inbounds or a broadcasted calculation is a bit faster than what’s shown. Toy example:
function pow_loop(a, b, N)
a[1] = zero(eltype(a))
for T = 2:N
a[T] = (1/b[T])^(12/(T-1)) - 1
end
return a
end
function pow_loop_inv_inb(a, b, N)
a[1] = zero(eltype(a))
@inbounds for T = 2:N
a[T] = (b[T])^(-12/(T-1)) - 1
end
return a
end
function pow_loop_vector(a, b, N)
a[1] = zero(eltype(a))
@views a[2:N] .= b[2:N].^(-12 ./ ((2:N).-1)) .- 1
return a
end
using Vectorize # this will only work with Intel's VML library
function pow_loop_vml(a, b, N)
Vectorize.pow!(a, b, (-12 ./ ((1:N) .- 1)))
a .-= 1
a[1] = zero(eltype(a))
return a
end
Results:
a = rand(125); b = rand(125);
@btime pow_loop($a, $b, 125);
# 4.403 μs (0 allocations: 0 bytes)
@btime pow_loop_inv_inb($a, $b, 125);
# 2.508 μs (0 allocations: 0 bytes)
@btime pow_loop_vector($a, $b, 125);
# 2.703 μs (2 allocations: 96 bytes)
@btime pow_loop_vml($a, $b, 125);
# 1.826 μs (1 allocation: 1.06 KiB)