Generator slow when passed as argument

(Julia 1.9.0-alpha1)

Why is this?

julia> @btime let x=(a=1,b=2,c=3,d=4,e=5,f=6,g=7,h=8)
           ns=propertynames(x)
           NamedTuple{ns}(map(n->getproperty(x,n), ns))
       end
  1.000 ns (0 allocations: 0 bytes)
(a = 1, b = 2, c = 3, d = 4, e = 5, f = 6, g = 7, h = 8)

julia> @btime let x=(a=1,b=2,c=3,d=4,e=5,f=6,g=7,h=8)
           ns=propertynames(x)
           NamedTuple{ns}(getproperty(x,n) for n ∈ ns)
       end
  579.670 ns (11 allocations: 928 bytes)
(a = 1, b = 2, c = 3, d = 4, e = 5, f = 6, g = 7, h = 8)

julia> @btime let x=(a=1,b=2,c=3,d=4,e=5,f=6,g=7,h=8)
           ns=propertynames(x)
           NamedTuple{ns}(((getproperty(x,n) for n ∈ ns)...,))
       end
  1.000 ns (0 allocations: 0 bytes)
(a = 1, b = 2, c = 3, d = 4, e = 5, f = 6, g = 7, h = 8)
1 Like

1ns means constant-folding, you’re not benchmarking anything

1 Like

Very well, I shall do things to make it not read 1ns:

Trial 1 : Assigning rand() values and wrapping in a function:

julia> @btime (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(map(n->getproperty(x,n), ns))
       end)()
  24.121 ns (0 allocations: 0 bytes)
(a = 0.5218322278054724, b = 0.6490145188159838, c = 0.4498112605320129, d = 0.2697056519272063, e = 0.6922591013353949, f = 0.5024083965138986, g = 0.45278375949414684, h = 0.24272686573005509)

julia> @btime (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(getproperty(x,n) for n ∈ ns)
       end)()
  1.570 ΞΌs (35 allocations: 1.28 KiB)
(a = 0.6107745757701861, b = 0.5396769758183816, c = 0.8256041605409281, d = 0.23232080960554702, e = 0.8874158525617497, f = 0.4739403505021156, g = 0.02362650987609838, h = 0.7094814751186344)

julia> @btime (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(((getproperty(x,n) for n ∈ ns)...,))
       end)()
  24.197 ns (0 allocations: 0 bytes)
(a = 0.03819138796796251, b = 0.6413247235460257, c = 0.261317462471773, d = 0.5650563667359987, e = 0.7155291639741849, f = 0.6288825770558325, g = 0.2565432038568122, h = 0.4965357876488232)

Trial 2 : using @time instead of @btime:

julia> @time for _=1:1_000_000 (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(map(n->getproperty(x,n), ns))
       end)() end
  0.230494 seconds (4.04 M allocations: 139.845 MiB, 13.44% gc time, 15.24% compilation time)

julia> @time for _=1:1_000_000 (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(getproperty(x,n) for n ∈ ns)
       end)() end
  1.537471 seconds (38.07 M allocations: 1.286 GiB, 7.40% gc time, 1.83% compilation time)

julia> @time for _=1:1_000_000 (()->begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(((getproperty(x,n) for n ∈ ns)...,))
       end)() end
  0.210565 seconds (4.05 M allocations: 140.694 MiB, 13.43% gc time, 16.36% compilation time)

It appears that order-of-magnitude differences persist.

Edit:

Trial 3: Using a list comprehension to populate an array with these:

julia> @time [begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(map(n->getproperty(x,n), ns))
       end for _=1:1_000_000];
  0.050902 seconds (64.67 k allocations: 65.224 MiB, 6.23% gc time, 58.25% compilation time)

julia> @time [begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(getproperty(x,n) for n ∈ ns)
       end for _=1:1_000_000];
  0.929006 seconds (35.10 M allocations: 1.288 GiB, 7.40% gc time, 6.24% compilation time)

julia> @time [begin x=(a=rand(),b=rand(),c=rand(),d=rand(),e=rand(),f=rand(),g=rand(),h=rand())
           ns=propertynames(x)
           NamedTuple{ns}(((getproperty(x,n) for n ∈ ns)...,))
       end for _=1:1_000_000];
  0.053575 seconds (78.28 k allocations: 66.077 MiB, 8.75% gc time, 57.46% compilation time)

ok yeah I think this is real, and the reason is probably because generator cannot be type inferred as well as the array

But I am indeed curious why splatting a generator does so well, maybe there’s special sauce in inference that deals with short splat

1 Like

@code_warntype does indeed show us a type-instability when passing the generator as an argument. It loses its ability to infer length.

Interestingly, this behavior extends to Tuple:

julia> @btime Tuple(map(identity, (1,2,3)))
  2.100 ns (0 allocations: 0 bytes)
(1, 2, 3)

julia> @btime Tuple(x for x ∈ (1,2,3))
  195.726 ns (2 allocations: 112 bytes)
(1, 2, 3)

julia> @btime Tuple(((x for x ∈ (1,2,3))...,))
  2.100 ns (0 allocations: 0 bytes)
(1, 2, 3)

but not collect:

julia> @btime collect(map(identity, (1,2,3)))
  37.298 ns (1 allocation: 80 bytes)
3-element Vector{Int64}:
 1
 2
 3

julia> @btime collect(x for x ∈ (1,2,3))
  38.182 ns (1 allocation: 80 bytes)
3-element Vector{Int64}:
 1
 2
 3

julia> @btime collect(((x for x ∈ (1,2,3))...,))
  37.298 ns (1 allocation: 80 bytes)
3-element Vector{Int64}:
 1
 2
 3

For larger Tuples, though (length>31), the penalty of type inference loss from passing a generator goes away (and the penalty shifts toward the splatted generator):

code
julia> using BenchmarkTools, Plots

julia> begin
           tests = BenchmarkGroup()
           tests[:map]       = BenchmarkGroup()
           tests[:generator] = BenchmarkGroup()
           tests[:splatted]  = BenchmarkGroup()
           for i=1:100 # this takes like ten minutes or so
               tests[:map][i]       = @benchmarkable Tuple(map(identity, xs))    setup=(xs=((1:$i)...,))
               tests[:generator][i] = @benchmarkable Tuple(x for x ∈ xs)         setup=(xs=((1:$i)...,))
               tests[:splatted][i]  = @benchmarkable Tuple(((x for x ∈ xs)...,)) setup=(xs=((1:$i)...,))
           end
       end

julia> begin
           tune!(tests)
           res = run(tests)
       end;

julia> begin
           plot(xlabel="tuple size", ylabel="min time [ns]")
           plot!(sort([(i, minimum(res[:map])[i].time) for i=eachindex(res[:map])]), label=":map")
           plot!(sort([(i, minimum(res[:generator])[i].time) for i=eachindex(res[:generator])]), label=":generator")
           plot!(sort([(i, minimum(res[:splatted])[i].time) for i=eachindex(res[:splatted])]), label=":splatted")
       end

Memory allocated:
julia> begin
           plot(xlabel="tuple size", ylabel="memory [bytes]")
           plot!(sort([(i, minimum(res[:map])[i].memory) for i=eachindex(res[:map])]), label=":map")
           plot!(sort([(i, minimum(res[:generator])[i].memory) for i=eachindex(res[:generator])]), label=":generator")
           plot!(sort([(i, minimum(res[:splatted])[i].memory) for i=eachindex(res[:splatted])]), label=":splatted")
       end

1 Like

A simple and noisy benchmark using the @time macro:

julia> let times = Matrix{Float64}(undef, 100, 3)
           for i=1:100
               local xs = ((1:i)...,)
               local arlen = 10_000_000 Γ· i
               times[i,1] = @timed([Tuple(map(identity, xs)) for j=1:arlen]).time/arlen
               times[i,2] = @timed([Tuple(x for x ∈ xs) for j=1:arlen]).time/arlen
               times[i,3] = @timed([Tuple(((x for x ∈ xs)...,)) for j=1:arlen]).time/arlen
           end
           plot(times, xlabel="tuple size", ylabel="time per", label=["map" "generator" "splatted"])
       end

1 Like

to address the op…

Is this simply a bug/lack of optimization, or is it a fundamental limitation of generators that we just have to watch out for?

And why does splatting the generator into a tuple before passing it to Tuple cause even more performance degradation?

It makes me hesitant to use generators.