With the following code:
function thread_test(x::Vector{Float64}, y::Vector{Float64}, z::Vector{Float64}, n::Int64)
for j = 1:10000
a::Vector{Int64} = rand(1:n, 128)
b::Vector{Int64} = rand(1:n, 128)
c::Vector{Int64} = rand(1:n, 128)
Threads.@threads for i::Int64 = 1:128
y[a[i]] += x[b[i]]*z[c[i]]
end
end
end
function no_thread_test(x::Vector{Float64}, y::Vector{Float64}, z::Vector{Float64}, n::Int64)
for j = 1:10000
a::Vector{Int64} = rand(1:n, 128)
b::Vector{Int64} = rand(1:n, 128)
c::Vector{Int64} = rand(1:n, 128)
for i::Int64 = 1:128
y[a[i]] += x[b[i]]*z[c[i]]
end
end
end
function test()
n = 1000
x = rand(n)
y = rand(n)
z = rand(n)
thread_test(x,y,z,n)
no_thread_test(x,y,z,n)
@time thread_test(x,y,z,n)
@time no_thread_test(x,y,z,n)
end
test()
I get the following output
0.177082 seconds (1.02 M allocations: 65.100 MiB, 23.14% gc time)
0.059489 seconds (30.00 k allocations: 33.417 MiB, 1.12% gc time)
Where are all these allocations coming from for the threaded case?
Additionally, I noticed that if I didn’t explicitly type i
, allocations increased further:
0.344228 seconds (3.45 M allocations: 94.762 MiB)
Why is this?