Data structures for threaded computing

Apart from perf issues, this looks like a big data race to me. I think you meant to write

function threadedGram2(x::Array{Array{Float64, 2}})
  p = size(x[1])[2]
  n = length(x)
  z = [zeros(Float64, p, p) for i in 1:Threads.nthreads()]
  @threads for i in 1:n
    LinearAlgebra.mul!(z[Threads.threadid()], x[i]', x[i], 1, 1)
  end
  r = pop!(z)
  for zz in z
      r .+= zz
  end
  return r
end
1 Like