Hi,
one mistake is to call @btime multiplyMatrices_oneThreadLoop(A, B, N)
instead of @btime multiplyMatrices_oneThreadLoop($A, $B, $N)
. This avoids issues with global variables.
Further, you do assignments instead of +=
in the loop. So your code did not calculate a matmul.
This way of implementing a matmul is also the obvious one but unfortunately quite slow.
See also this comment:
This version of the code gives me the desired results, second runs are not faster or slower. I reduced N
to have more reasonable runtimes.
using Base.Threads
using BenchmarkTools
function multiplyMatrices_oneThreadLoop(A::Matrix{Float64}, B::Matrix{Float64}, N::Int64)
C = zeros(N, N)
Threads.@threads for i in 1:N
for j in 1:N
for k in 1:N
C[i, j] += A[i, k] * B[k, j]
end
end
end
return C
end
function multiplyMatrices_spawnExample(A::Matrix{Float64}, B::Matrix{Float64}, N::Int64)
C = zeros(N, N)
@sync Threads.@spawn for i in 1:N
for j in 1:N
for k in 1:N
C[i, j] += A[i, k] * B[k, j]
end
end
end
return C
end
function multiplyMatrices_default(A::Matrix{Float64}, B::Matrix{Float64}, N::Int64)
C = zeros(N,N)
for i in 1:N
for j in 1:N
for k in 1:N
C[i, j] += A[i, k] * B[k, j]
end
end
end
return C
end
N = 100
A = rand(N, N);
B = rand(N, N);
println("multi-threaded loop 1st run")
@btime multiplyMatrices_oneThreadLoop(A, B, N)
println("using sync spawn 1st run")
@btime multiplyMatrices_spawnExample($A,$B,$N)
println("default multiplication 1st run")
@btime multiplyMatrices_default($A, $B, $N)
println("multi-threaded loop 2nd run")
@btime multiplyMatrices_oneThreadLoop($A, $B, $N)
println("using sync spawn 2nd run")
@btime multiplyMatrices_spawnExample($A,$B,$N)
println("default multiplication 2nd run")
@btime multiplyMatrices_default($A, $B, $N)
println("multi-threaded loop 3rd run")
@btime multiplyMatrices_oneThreadLoop($A, $B, $N)
println("using sync spawn 3rd run")
@btime multiplyMatrices_spawnExample($A,$B,$N)
println("default multiplication 3rd run")
@btime multiplyMatrices_default($A, $B, $N)
~
``