Wow! It’s naturally 5 times faster! Thanks!
using Tullio
function comp_tullio(a, c)
dist = zeros(Float32, size(a, 2))
@tullio dist[i] = (c[j] - a[j,i])^2
dist
end
@time comp_tullio(a, c)
@benchmark comp_tullio(a, c)
all(comp_tullio(a, c) .≈ comp(a,c))