V0.6 vs v1.0: keyword memory allocation example

question

#1

The docs suggest preallocating arrays for performance in the following way:

function xinc!(ret::AbstractVector{T}, x::T) where T
    ret[1] = x
    ret[2] = x+1
    ret[3] = x+2
    nothing
end

function loopinc_prealloc(; ret = Array{Int}(3),  y = 0)
    for i = 1:10^7
        xinc!(ret, i)
        y += ret[2]
    end
    y
end;

function loopinc_prealloc2()
   ret = Array{Int}(3)
   y = 0
    for i = 1:10^7
        xinc!(ret, i)
        y += ret[2]
    end
    y
end;

If I pass named parameters in v0.6, I see significantly slower performance and significantly more memory allocations.

using Compat
@compat function xinc_named!(; ret::AbstractVector{T}, x::T) where T
    ret[1] = x
    ret[2] = x+1
    ret[3] = x+2
    nothing
end;
function loopinc_prealloc_named(; ret=Vector{Int}(undef, 3), y=0)
   for i = 1:10^7
       xinc_named!(;ret=ret, x=i)
       y += ret[2]
   end
   return y
end;
@time loopinc_prealloc_named()

Comparison:

@time loopinc_prealloc()
  0.039436 seconds (7 allocations: 304 bytes)
50000015000000

@time loopinc_prealloc2()
  0.037106 seconds (6 allocations: 288 bytes)
50000015000000

@time loopinc_prealloc_named()
  2.895215 seconds (20.00 M allocations: 1.192 GiB, 2.42% gc time)
50000015000000

  1. What’s wrong with loopinc_prealloc_named and xinc_named!?
  2. Why is there an extra allocation for loopinc_prealloc2?

Is there any way around these issued in v0.6? Btw, loopinc_prealloc_named works fine like loopinc_prealloc2 in v1.0.


#2

Keyword arguments are slow in 0.6. Use 1.0 :slight_smile:

Putting type assertions on the keyword args can help.


#3

Gotcha, thanks!


#4

As a follow up, I’m calling the same function 10 times and want to reuse the data structures for each call. A small example is:

function xinc!(ret::AbstractVector{T}, x::T) where T
    ret[1] = x
    ret[2] = x+1
    ret[3] = x+2
    nothing
end
function loopinc_prealloc(; ret=Array{Int}(undef,3),  y=0)
    for i = 1:10^7
        xinc!(ret, i)
        y += ret[2]
    end
    y
end;
function loopinc_prealloc_10()
    ret = Array{Int}(undef, 3)
    y = 0
    @time for i = 1:10
        @time loopinc_prealloc(;ret=ret,y=y)
    end
end

## single
@time loopinc_prealloc(; ret=ret, y=y)
  0.011260 seconds (12 allocations: 464 bytes)
50000015000000

## function wrap
@time loopinc_prealloc_10()
  0.010497 seconds
  0.009011 seconds
  0.009536 seconds
  0.008685 seconds
  0.008868 seconds
  0.011771 seconds
  0.008947 seconds
  0.009126 seconds
  0.010338 seconds
  0.008621 seconds
  0.096465 seconds (158 allocations: 3.969 KiB)
  0.096574 seconds (194 allocations: 5.313 KiB)

## for loop
@time for i = 1:10
           @time loopinc_prealloc(;ret=ret,y=y)
       end
  0.009276 seconds (4 allocations: 96 bytes)
  0.008699 seconds (4 allocations: 96 bytes)
  0.008602 seconds (4 allocations: 96 bytes)
  0.009644 seconds (4 allocations: 96 bytes)
  0.008659 seconds (4 allocations: 96 bytes)
  0.008709 seconds (4 allocations: 96 bytes)
  0.009004 seconds (4 allocations: 96 bytes)
  0.010692 seconds (4 allocations: 96 bytes)
  0.009643 seconds (4 allocations: 96 bytes)
  0.010994 seconds (4 allocations: 96 bytes)
  0.095504 seconds (363 allocations: 10.797 KiB)

Why are the allocations in loopinc_prealloc_10 better on the whole (but worse for each call to loopinc_prealloc(; ret=ret, y=y)) versus the for-loop? Is it right that the loopinc_prealloc_10 function consumes less memory than the for-loop, or how should I best write functions that call loopinc_prealloc multiple times?