Hi, I am recently trying to leverage multiple threading to speed my code. However, even though I likely realized multiple threading, my code run slower. Could somebody proficient in this have a skim on this? My code is posted below. I am attempting to separately compute the four portions of the Jacobian matrix of my model system. The results from my code is right since the combined output is a right Jacobian. But the performance is not improved. The results from @btime shows the maximum time consumes by one of the four branches is 396 ms, while the total time for parallelization is 876 ms. This is beyond my expectation if they are really working simultaneously.
function constitutive_elast1(nodalforce1, noddis)
for jii = 1:length1_1
nodalforce1[jii] = 0.0
end
for kjh in element_num_inter1
epsiNk = BNjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BNik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
epsiMk = BMjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BMik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
epsiLk = BLjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BLik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
#epsiTk = sqrt(epsiMk^2 + epsiLk^2)
#zeroFloa = 0.0
sigmNk = epsiNk * EN
sigmMk = epsiMk * EN * alpha
sigmLk = epsiLk * EN * alpha
#println([sigmNk,sigmMk,sigmLk])
if elements41[kjh][1] >= 1 && elements41[kjh][1] <= portion_1
nodalforce1[(elements41[kjh][1]*6-5):(elements41[kjh][1]*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] >= 1 && elements41[kjh][2] <= portion_1
nodalforce1[(elements41[kjh][2]*6-5):(elements41[kjh][2]*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
for kjh in element_num_bound1
epsiNk = BNjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BNik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
epsiMk = BMjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BMik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
epsiLk = BLjk2[kjh] * noddis[(6*elements41[kjh][2]-5):(6*elements41[kjh][2])] - BLik2[kjh] * noddis[(6*elements41[kjh][1]-5):(6*elements41[kjh][1])]
sigmNk = epsiNk * EN * 50
sigmMk = epsiMk * EN * alpha * 50
sigmLk = epsiLk * EN * alpha * 50
if elements41[kjh][1] >= 1 && elements41[kjh][1] <= portion_1
nodalforce1[(elements41[kjh][1]*6-5):(elements41[kjh][1]*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] >= 1 && elements41[kjh][2] <= portion_1
nodalforce1[(elements41[kjh][2]*6-5):(elements41[kjh][2]*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
nothing
end
function constitutive_elast2(nodalforce1, noddis)
for jii = 1:length1_2
nodalforce1[jii] = 0.0
end
for kjh in element_num_inter2
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
#epsiTk = sqrt(epsiMk^2 + epsiLk^2)
#zeroFloa = 0.0
sigmNk = epsiNk * EN
sigmMk = epsiMk * EN * alpha
sigmLk = epsiLk * EN * alpha
#println([sigmNk,sigmMk,sigmLk])
if elements41[kjh][1] > portion_1 && elements41[kjh][1] <= portion_2
nodalforce1[((elements41[kjh][1]-portion_1)*6-5):((elements41[kjh][1]-portion_1)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_1 && elements41[kjh][2] <= portion_2
nodalforce1[((elements41[kjh][2]-portion_1)*6-5):((elements41[kjh][2]-portion_1)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
for kjh in element_num_bound2
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion2[1]+1)-5):(6*(elements41[kjh][2]-portion2[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion2[1]+1)-5):(6*(elements41[kjh][1]-portion2[1]+1))]
sigmNk = epsiNk * EN * 50
sigmMk = epsiMk * EN * alpha * 50
sigmLk = epsiLk * EN * alpha * 50
if elements41[kjh][1] > portion_1 && elements41[kjh][1] <= portion_2
nodalforce1[((elements41[kjh][1]-portion_1)*6-5):((elements41[kjh][1]-portion_1)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_1 && elements41[kjh][2] <= portion_2
nodalforce1[((elements41[kjh][2]-portion_1)*6-5):((elements41[kjh][2]-portion_1)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
nothing
end
function constitutive_elast3(nodalforce1, noddis)
for jii = 1:length1_3
nodalforce1[jii] = 0.0
end
for kjh in element_num_inter3
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
#epsiTk = sqrt(epsiMk^2 + epsiLk^2)
#zeroFloa = 0.0
sigmNk = epsiNk * EN
sigmMk = epsiMk * EN * alpha
sigmLk = epsiLk * EN * alpha
#println([sigmNk,sigmMk,sigmLk])
if elements41[kjh][1] > portion_2 && elements41[kjh][1] <= portion_3
nodalforce1[((elements41[kjh][1]-portion_2)*6-5):((elements41[kjh][1]-portion_2)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_2 && elements41[kjh][2] <= portion_3
nodalforce1[((elements41[kjh][2]-portion_2)*6-5):((elements41[kjh][2]-portion_2)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
for kjh in element_num_bound3
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion3[1]+1)-5):(6*(elements41[kjh][2]-portion3[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion3[1]+1)-5):(6*(elements41[kjh][1]-portion3[1]+1))]
sigmNk = epsiNk * EN * 50
sigmMk = epsiMk * EN * alpha * 50
sigmLk = epsiLk * EN * alpha * 50
if elements41[kjh][1] > portion_2 && elements41[kjh][1] <= portion_3
nodalforce1[((elements41[kjh][1]-portion_2)*6-5):((elements41[kjh][1]-portion_2)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_2 && elements41[kjh][2] <= portion_3
nodalforce1[((elements41[kjh][2]-portion_2)*6-5):((elements41[kjh][2]-portion_2)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
nothing
end
function constitutive_elast4(nodalforce1, noddis)
for jii = 1:length1_4
nodalforce1[jii] = 0.0
end
for kjh in element_num_inter4
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
#epsiTk = sqrt(epsiMk^2 + epsiLk^2)
#zeroFloa = 0.0
sigmNk = epsiNk * EN
sigmMk = epsiMk * EN * alpha
sigmLk = epsiLk * EN * alpha
#println([sigmNk,sigmMk,sigmLk])
if elements41[kjh][1] > portion_3 && elements41[kjh][1] <= portion_4
nodalforce1[((elements41[kjh][1]-portion_3)*6-5):((elements41[kjh][1]-portion_3)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_3 && elements41[kjh][2] <= portion_4
nodalforce1[((elements41[kjh][2]-portion_3)*6-5):((elements41[kjh][2]-portion_3)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
for kjh in element_num_bound4
epsiNk = BNjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BNik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
epsiMk = BMjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BMik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
epsiLk = BLjk2[kjh] * noddis[(6*(elements41[kjh][2]-portion4[1]+1)-5):(6*(elements41[kjh][2]-portion4[1]+1))] - BLik2[kjh] * noddis[(6*(elements41[kjh][1]-portion4[1]+1)-5):(6*(elements41[kjh][1]-portion4[1]+1))]
sigmNk = epsiNk * EN * 50
sigmMk = epsiMk * EN * alpha * 50
sigmLk = epsiLk * EN * alpha * 50
if elements41[kjh][1] > portion_3 && elements41[kjh][1] <= portion_4
nodalforce1[((elements41[kjh][1]-portion_3)*6-5):((elements41[kjh][1]-portion_3)*6)] += (-le2[kjh] * Ak2[kjh] * (sigmNk * BNik2[kjh] + sigmMk * BMik2[kjh] + sigmLk * BLik2[kjh]))'
end
if elements41[kjh][2] > portion_3 && elements41[kjh][2] <= portion_4
nodalforce1[((elements41[kjh][2]-portion_3)*6-5):((elements41[kjh][2]-portion_3)*6)] += (le2[kjh] * Ak2[kjh] * (sigmNk * BNjk2[kjh] + sigmMk * BMjk2[kjh] + sigmLk * BLjk2[kjh]))'
end
end
nothing
end
@btime begin
task15 = Threads.@spawn forwarddiff_color_jacobian!(jac1, constitutive_elast1, input[portion1[1]*6-5:portion1[end]*6], colorvec=colors1_1)
task16 = Threads.@spawn forwarddiff_color_jacobian!(jac2, constitutive_elast2, input[portion2[1]*6-5:portion2[end]*6], colorvec=colors1_2)
task17 = Threads.@spawn forwarddiff_color_jacobian!(jac3, constitutive_elast3, input[portion3[1]*6-5:portion3[end]*6], colorvec=colors1_3)
task18 = Threads.@spawn forwarddiff_color_jacobian!(jac4, constitutive_elast4, input[portion4[1]*6-5:portion4[end]*6], colorvec=colors1_4)
tangent_matrix1[1:(portion_1*6), portion1[1]*6-5:portion1[end]*6] = fetch(task15)
tangent_matrix1[(portion_1*6+1):(portion_2*6), portion2[1]*6-5:portion2[end]*6] = fetch(task16)
tangent_matrix1[(portion_2*6+1):(portion_3*6), portion3[1]*6-5:portion3[end]*6] = fetch(task17)
tangent_matrix1[(portion_3*6+1):(portion_4*6), portion4[1]*6-5:portion4[end]*6] = fetch(task18)
end