Ok. Here is a simple MWE to test this out.
Create a file “task1.jl” with the following code
Base.BLAS.set_num_threads(1)
@inline blas_A_mul_B!(α::T, A::Matrix{T}, B::Vector{T}, β::T, C::Vector{T}) where T<:Number = Base.BLAS.gemv!('N', α, A, B, β, C)
srand(1234)
const D = 720
const A = rand(D,D)
function test_matvec(IM::Matrix{Float64}, N::Int64)
x = rand(size(IM,1))
y = rand(size(IM,1))
t1 = time()
@inbounds for i=1:N
blas_A_mul_B!(one(Float64), IM, x, zero(Float64), y)
end
return time()-t1
end
test_matvec(A, 1)
t = test_matvec(A,500000)
io = open("out1.txt","w")
print(io, "$t\n")
close(io)
After that run in the bash
for i in `seq 2 4`; do cp task1.jl "task$i.jl"; sed -i -e "s/out1/out$i/g" "task$i.jl"; done
You can replace 4 with the number of processes you want.
After that I can start one process with julia task1.jl
.
Or I can start all 4 processes in parallel by
for i in `seq 1 4`; do julia "task$i.jl" & done
In the former case, observing the file “out1.txt” reveals execution time of approximately 36 seconds on my machine.
In the latter case I got approximately 250 seconds in each of out… files.