The docs suggest preallocating arrays for performance in the following way:
function xinc!(ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
nothing
end
function loopinc_prealloc(; ret = Array{Int}(3), y = 0)
for i = 1:10^7
xinc!(ret, i)
y += ret[2]
end
y
end;
function loopinc_prealloc2()
ret = Array{Int}(3)
y = 0
for i = 1:10^7
xinc!(ret, i)
y += ret[2]
end
y
end;
If I pass named parameters in v0.6, I see significantly slower performance and significantly more memory allocations.
using Compat
@compat function xinc_named!(; ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
nothing
end;
function loopinc_prealloc_named(; ret=Vector{Int}(undef, 3), y=0)
for i = 1:10^7
xinc_named!(;ret=ret, x=i)
y += ret[2]
end
return y
end;
@time loopinc_prealloc_named()
Comparison:
@time loopinc_prealloc()
0.039436 seconds (7 allocations: 304 bytes)
50000015000000
@time loopinc_prealloc2()
0.037106 seconds (6 allocations: 288 bytes)
50000015000000
@time loopinc_prealloc_named()
2.895215 seconds (20.00 M allocations: 1.192 GiB, 2.42% gc time)
50000015000000
- What’s wrong with
loopinc_prealloc_named
and xinc_named!
?
- Why is there an extra allocation for
loopinc_prealloc2
?
Is there any way around these issued in v0.6? Btw, loopinc_prealloc_named
works fine like loopinc_prealloc2
in v1.0.
Keyword arguments are slow in 0.6. Use 1.0
Putting type assertions on the keyword args can help.
2 Likes
As a follow up, I’m calling the same function 10 times and want to reuse the data structures for each call. A small example is:
function xinc!(ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
nothing
end
function loopinc_prealloc(; ret=Array{Int}(undef,3), y=0)
for i = 1:10^7
xinc!(ret, i)
y += ret[2]
end
y
end;
function loopinc_prealloc_10()
ret = Array{Int}(undef, 3)
y = 0
@time for i = 1:10
@time loopinc_prealloc(;ret=ret,y=y)
end
end
## single
@time loopinc_prealloc(; ret=ret, y=y)
0.011260 seconds (12 allocations: 464 bytes)
50000015000000
## function wrap
@time loopinc_prealloc_10()
0.010497 seconds
0.009011 seconds
0.009536 seconds
0.008685 seconds
0.008868 seconds
0.011771 seconds
0.008947 seconds
0.009126 seconds
0.010338 seconds
0.008621 seconds
0.096465 seconds (158 allocations: 3.969 KiB)
0.096574 seconds (194 allocations: 5.313 KiB)
## for loop
@time for i = 1:10
@time loopinc_prealloc(;ret=ret,y=y)
end
0.009276 seconds (4 allocations: 96 bytes)
0.008699 seconds (4 allocations: 96 bytes)
0.008602 seconds (4 allocations: 96 bytes)
0.009644 seconds (4 allocations: 96 bytes)
0.008659 seconds (4 allocations: 96 bytes)
0.008709 seconds (4 allocations: 96 bytes)
0.009004 seconds (4 allocations: 96 bytes)
0.010692 seconds (4 allocations: 96 bytes)
0.009643 seconds (4 allocations: 96 bytes)
0.010994 seconds (4 allocations: 96 bytes)
0.095504 seconds (363 allocations: 10.797 KiB)
Why are the allocations in loopinc_prealloc_10
better on the whole (but worse for each call to loopinc_prealloc(; ret=ret, y=y)
) versus the for
-loop? Is it right that the loopinc_prealloc_10
function consumes less memory than the for
-loop, or how should I best write functions that call loopinc_prealloc
multiple times?