Threads parallelization on different nested loops

calvar · June 21, 2021, 1:40pm

Hi,

I am learning Julia and I was testing how to parallelize a nested loop with the @threads macro, to compute the product between matrices. This is the code I am trying to parallelize:

function dot_prod!(A, B, C, i, j)
   c = 0 #local object
   for k = 1:size(A, 2)
       c += A[i,k] * B[k,j]
   end
   C[i,j] = c
end
   
function mat_prod!(A, B, C)
   for i = 1:size(A, 1)
       for j = 1:size(B, 2)
           dot_prod!(A, B, C, i, j)
       end
   end
end

I have used the @threads macro at three different levels in the code. This is the first version:

function dot_prod!(A, B, C, i, j)
    c = zeros(Threads.nthreads())
    for k = 1:size(A, 2)
        c[Threads.threadid()] += A[i,k] * B[k,j]
    end
    C[i,j] = sum(c)
end

function mat_prod!(A, B, C)
    Threads.@threads for i = 1:size(A, 1)
        for j = 1:size(B, 2)
            dot_prod!(A, B, C, i, j)
        end
    end
end

The second version:

function dot_prod!(A, B, C, i, j)
    c = zeros(Threads.nthreads())
    for k = 1:size(A, 2)
        c[Threads.threadid()] += A[i,k] * B[k,j]
    end
    C[i,j] = sum(c)
end

function mat_prod!(A, B, C)
    for i = 1:size(A, 1)
        Threads.@threads for j = 1:size(B, 2)
            dot_prod!(A, B, C, i, j)
        end
    end
end

Ant the third version:

function dot_prod!(A, B, C, i, j)
    c = zeros(Threads.nthreads())
    Threads.@threads for k = 1:size(A, 2)
        c[Threads.threadid()] += A[i,k] * B[k,j]
    end
    C[i,j] = sum(c)
end

function mat_prod!(A, B, C)
    for i = 1:size(A, 1)
        for j = 1:size(B, 2)
            dot_prod!(A, B, C, i, j)
        end
    end
end

Now, versions 1 and 3 have similar performance, while version 2, the one in which the middle cycle is parallelized, performs much better (half the time of the other two). I would be grateful if someone could explain to me the reason for that behavior.

Thank you!

Topic		Replies	Views
Multithreading for nested for loops General Usage parallel , multithreading , threads	13	1755	August 16, 2023
Multithreading for nested loops Performance multithreading	42	12502	January 20, 2022
Parallelize nested loop in v1.72 General Usage performance , parallel	5	728	August 11, 2022
Innefficient paralellization? Need some help optimizing a simple dot product Performance question , parallel	32	4795	March 28, 2018
Multi-threading on a 2 CPU system New to Julia multithreading	15	1089	February 2, 2023

Threads parallelization on different nested loops

Related topics