Simple example for multi-threading without dependencies in the data:
# Simple example of using Threads
# 1. create vector of vectors
# 2. write function that processes one vector
# 3. function that proccesses all vectors in serial
# 4. create threads and run function on each vector in parallel
# 5. compare results of serial and parallel processing
m = 4 # number of elements in each vector
n = Threads.nthreads()
println("Number of threads: $n")
# function that processes vector elements in place
function hardwork(vec)
for i in eachindex(vec)
vec[i] = exp(vec[i])
sleep(0.1)
end
nothing
end
vecvec = [rand(m) for i in 1:n]
function process_serial(vecvec)
for vec in vecvec
hardwork(vec)
end
end
function process_parallel(vecvec)
Threads.@threads for vec in vecvec
hardwork(vec)
end
end
work = deepcopy(vecvec)
process_serial(work)
work = deepcopy(vecvec)
@time process_serial(work)
result1=deepcopy(work)
work = deepcopy(vecvec)
process_parallel(work)
work = deepcopy(vecvec)
@time process_parallel(work)
result2=deepcopy(work)
# assert that results are the same
@assert all(result1 .== result2)
nothing
Output:
julia> include("simple.jl")
Number of threads: 8
3.242664 seconds (161 allocations: 4.797 KiB)
0.406468 seconds (211 allocations: 8.938 KiB)
Example for parallel calculations, that write to a container that is not thread-safe using a lock:
# use a sparse matrix
# calculate new colums and write them in per slice in parallel
using SparseArrays
n = Threads.nthreads()
println("Number of threads: $n")
# heavy calculation; multithreading does not help if the calculation is too short
function calc_triple(m, n, slice)
i= rand(1:m)
j= rand(1+n*(slice-1):n*slice)
v = rand()
sleep(0.002)
return (i,j,v)
end
# fill a slice of the array
# arraysize: m x m
# total number of slices: n
# slice: index of the slice to fill (1..n)
function fill_slice!(A, m, n, slice)
for k = 1:10m/n
i,j,v = calc_triple(m, n, slice)
A[i,j] = v
end
return A
end
# fill a slice of the array using the provided lock when writing to the array
function fill_slice_lock!(A, m, n, slice, lk)
for k = 1:10m/n
i,j,v = calc_triple(m, n, slice)
lock(lk) do
A[i,j] = v
end
end
return A
end
# create and fill an array slice by slice in serial
function calc_serial(m, n)
A = spzeros(m,m)
for slice=1:n
fill_slice!(A, m, n, slice)
end
return A
end
# create and fill an array slice by slice in parallel
function calc_parallel(m, n)
A = spzeros(m,m)
lk = ReentrantLock()
Threads.@threads for slice=1:n
fill_slice_lock!(A, m, n, slice, lk)
end
return A
end
# main program
m = 96
# call once to compile the code
calc_serial(m, n)
calc_parallel(m, n)
# measure the serial and parallel execution time
@time A = calc_serial(m, n)
@time A = calc_parallel(m, n)
nothing
You asked:
Using Threads.@spawn
or Threads.@threads
create several threads to run on a single cpu core, right?
No, threads are executed in parallel on all CPU cores that Julia is allowed to use. The difference between threads and processes are:
- All threads share the same memory which can make them faster
- all threads share the same garbage collector which can make them slower
- communication between threads is faster than between processes, but also more easy to get wrong