I’m trying to do a map-reduce style coding but I’m not getting exactly the same output with serial and parallel code and suspect that there is thread collision on data. I am trying to collate results indexing a data structure on each thread something like [zeros(T, size(x[1])[1]) for i in 1:nthreads()]
to collate the data then reduce further by summing then down to a vector serially. The full code is below. There really shouldn’t be any difference in the output between the serial and parallel algos but there is a small discrepancy each time.
The issue is certainly with the piece of code that sits inside the @threads for
loop but I’m not sure which side of the equal sign (both?) is the culprit.
The technique I am using was described in another query: Data structures for threaded computing - #16 by foobar_lv2 multi-threading by writing to copies of an intermediate result which is later reduced.
Using Julia Version 1.3.0 (2019-11-26)
Thank you
function generateRandomBlockMatrix(::Type{T}, ni::Int64, p::Int64, nBlocks::Int64) where {T <: Float64}
x = Array{Array{T, 2}, 1}(undef, nBlocks)
for i in 1:nBlocks
x[i] = rand(T, (ni, p))
end
return x
end
function serialAlgo(x::Array{Array{T, 2}, 1}, coef::Array{T, 1}) where {T <: AbstractFloat}
vect = zeros(T, size(x[1])[1])
for i in 1:length(x)
vect .+= sum(x[i] * coef)
end
return vect
end
function parallelAlgo(x::Array{Array{T, 2}, 1}, coef::Array{T, 1}) where {T <: AbstractFloat}
nBlocks = length(x)
vecStore = [zeros(T, size(x[1])[1]) for i in 1:nthreads()]
coefStore = [copy(coef) for i in 1:nthreads()]
@threads for i in 1:nBlocks
vecStore[threadid()] .+= sum(x[i] * coefStore[threadid()])
end
vecTotal = zeros(T, size(x[1])[1])
for i in 1:nthreads()
vecTotal .+= vecStore[i]
end
return vecTotal
end
x = generateRandomBlockMatrix(Float64, 10, 1000, 1000);
b = rand(Float64, 1000);
sum(abs.(serialAlgo(x, b) .- parallelAlgo(x, b)))