I was a little surprised to find that broadcasting over slices allocates, even when using view. Here’s a MWE.
u = randn(10, 100)
ids = reshape(1:100, 10, 10)
out = similar(u[ids])
function foo1!(out, u, ids)
    @. out = u[ids] 
    # FastBroadcast.@.. does slightly better, but 
    # still allocates as much as `view(u, ids)`
end
function foo2!(out, u, ids)
    out .= view(u, ids)
end
function foo3!(out, u, ids)
    for (i, id) in enumerate(ids)
        out[i] = u[id]
    end
end
# run once for compilation 
foo1!(out, u, ids); foo2!(out, u, ids); foo3!(out, u, ids);
# timings
@time foo1!(out, u, ids);
@time foo2!(out, u, ids);
@time foo3!(out, u, ids);
I see this for both 1.9.2 and 1.10-beta on Mac ARM.
Is there a way to avoid allocations without resorting to for loops?