Or is there something wrong with my code?
Below is a MVE of my code. func1
and func2
do the same thing. Why is func2
faster than func1
?
Why can’t the compiler figure it out? I think func1
more clearly expresses my intent of performing a vectorized operation. Is there something in func1
that prevents the compiler from generating good code?
I am using Julia Version 1.3.1
, Commit 2d57411 (2019-12-30 21:36 UTC)
.
function func1(a, b, c)
for jdx = 2:size(a, 2)
a[:, jdx] .= a[:, jdx - 1] .+ b[:, jdx] .* c[jdx]
end
return a
end
function func2(a, b, c)
for jdx = 2:size(a, 2)
for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end
With
a = zeros(3, 100)
b = randn(3, 100)
c = randn(100)
The results are:
@benchmark func1(a, b, c)
BenchmarkTools.Trial:
allocs estimate: 198
--------------
median time: 6.413 μs (0.00% GC)
--------------
and
@benchmark func2(a, b, c)
BenchmarkTools.Trial:
allocs estimate: 0
--------------
median time: 407.980 ns (0.00% GC)
--------------
For a comprehensive comparison—and an example of what a newbie tries—, see below: (PS: interpolation as in func1($a, $b, $c)
does not seem to matter, contrary to what we are told).
Time in seconds
Function | No interp. | Interpolation | Interp. vs. no-interp |
---|---|---|---|
func1 | 6.41E-06 | 6.20E-06 | 3.50% |
func2 | 4.08E-07 | 3.97E-07 | 2.79% |
func1b | 6.42E-06 | 6.19E-06 | 3.60% |
func2b | 4.08E-07 | 3.97E-07 | 2.85% |
func1c | 6.28E-06 | 6.15E-06 | 2.06% |
func2c | 3.99E-07 | 3.82E-07 | 4.53% |
func2d | 3.83E-07 | 3.72E-07 | 2.75% |
func1e | 4.84E-06 | 4.80E-06 | 0.77% |
func2e | 4.01E-07 | 3.89E-07 | 3.04% |
function func1(a, b, c)
for jdx = 2:size(a, 2)
a[:, jdx] .= a[:, jdx - 1] .+ b[:, jdx] .* c[jdx]
end
return a
end
function func2(a, b, c)
for jdx = 2:size(a, 2)
for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end
function func1b(a::Matrix, b::Matrix, c::Vector)
for jdx = 2:size(a, 2)
a[:, jdx] .= a[:, jdx - 1] .+ b[:, jdx] .* c[jdx]
end
return a
end
function func2b(a::Matrix, b::Matrix, c::Vector)
for jdx = 2:size(a, 2)
for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end
function func1c(a, b, c)
@inbounds for jdx = 2:size(a, 2)
a[:, jdx] .= a[:, jdx - 1] .+ b[:, jdx] .* c[jdx]
end
return a
end
function func2c(a, b, c)
@inbounds for jdx = 2:size(a, 2)
for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end
function func2d(a, b, c)
@inbounds for jdx = 2:size(a, 2)
@simd for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end
function func1e(a, b, c)
@inbounds @views for jdx = 2:size(a, 2)
a[:, jdx] .= a[:, jdx - 1] .+ b[:, jdx] .* c[jdx]
end
return a
end
function func2e(a, b, c)
@inbounds for jdx = 2:size(a, 2)
for idx in axes(a, 1)
a[idx, jdx] = a[idx, jdx - 1] + b[idx, jdx] * c[jdx]
end
end
return a
end