I was a little surprised to find that broadcasting over slices allocates, even when using view
. Here’s a MWE.
u = randn(10, 100)
ids = reshape(1:100, 10, 10)
out = similar(u[ids])
function foo1!(out, u, ids)
@. out = u[ids]
# FastBroadcast.@.. does slightly better, but
# still allocates as much as `view(u, ids)`
end
function foo2!(out, u, ids)
out .= view(u, ids)
end
function foo3!(out, u, ids)
for (i, id) in enumerate(ids)
out[i] = u[id]
end
end
# run once for compilation
foo1!(out, u, ids); foo2!(out, u, ids); foo3!(out, u, ids);
# timings
@time foo1!(out, u, ids);
@time foo2!(out, u, ids);
@time foo3!(out, u, ids);
I see this for both 1.9.2 and 1.10-beta on Mac ARM.
Is there a way to avoid allocations without resorting to for loops?