I am running the below code on 6 physical core computer:

```
using SparseArrays;
using LinearAlgebra;
using .Threads
using BenchmarkTools
dt = 0.001;
tmin = 0;
tmax = 5;
timeSim = tmin:dt:tmax;
NodeVoltage = zeros(3,2);
NodeVoltageRecord = zeros(3*size(timeSim,1),2);
A_Mat = spzeros(9,9);
for i in 1:9
A_Mat[i,i] = 1;
end
I_Mat = ones(9,3);
@btime begin
global iter = 0;
for t in timeSim
global iter += 1;
global V_Mat = A_Mat \ I_Mat;
for i in 1:3:6
j = Int((i-1)/3)+1;
global NodeVoltage[:,j]= [V_Mat[i,1]; V_Mat[i+1,2]; V_Mat[i+2,3]];#V_Mat[i:i+2]
end #for i in 1:3:6
global NodeVoltageRecord[(3*iter-2):3*iter,:] = NodeVoltage;
end #for t in timeSim
end #@btime begin
7.382 ms (153499 allocations: 8.45 MiB)
```

But when I use @threads, the execution time becomes worse than without @threads. Any idea?

```
using SparseArrays;
using LinearAlgebra;
using .Threads
using BenchmarkTools
dt = 0.001;
tmin = 0;
tmax = 5;
timeSim = tmin:dt:tmax;
NodeVoltage = zeros(3,2);
NodeVoltageRecord = zeros(3*size(timeSim,1),2);
A_Mat = spzeros(9,9);
for i in 1:9
A_Mat[i,i] = 1;
end
I_Mat = ones(9,3);
@btime begin
global iter = 0;
for t in timeSim
global iter += 1;
global V_Mat = A_Mat \ I_Mat;
@threads for i in 1:3:6
j = Int((i-1)/3)+1;
global NodeVoltage[:,j]= [V_Mat[i,1]; V_Mat[i+1,2]; V_Mat[i+2,3]];#V_Mat[i:i+2]
end #for i in 1:3:6
global NodeVoltageRecord[(3*iter-2):3*iter,:] = NodeVoltage;
end #for t in timeSim
end #@btime begin
julia> Threads.nthreads()
6
23.504 ms (308554 allocations: 23.71 MiB)
```