When doing gaussian elimination in parallel, I noticed incorrect results when I pass tuples of several matrices:

```
using LinearAlgebra
function add_col!(X::Matrix{tw}, w::tw, j1::Int, j2::Int) where {tw}
for i=1:size(X,1) X[i,j2] += w*X[i,j1] end end
function add_row!(X::Matrix{tw}, w::tw, i1::Int, i2::Int) where {tw}
for j=1:size(X,2) X[i2,j] += w*X[i1,j] end end
function add_colrow!(X::Tuple{Vararg{Matrix{tw}}}, Y::Tuple{Vararg{Matrix{tw}}}, w::tw, i1::Int, i2::Int) where {tw}
#= Given many composable matrices R^l <--X-- R^m <--Y-- R^n (where l,n vary but m is fixed),
we replace the basis element e_i1 ∈ R^m with the linear combination e_i1-w*e_i2. =#
for t=1:length(Y) add_row!(Y[t], w, i1, i2) end
for t=1:length(X) add_col!(X[t], -w, i2, i1) end end
function mod_col!(D::Matrix{tw}, U::Matrix{tw}, k::Int; prl::Int=1) where {tw}
if prl==0 for i=k+1:size(D,1) add_colrow!((U,), (D,), -div(D[i,k],D[k,k]), k, i) end
elseif prl==1 Threads.@threads for i=k+1:size(D,1) add_colrow!((U,), (D,), -div(D[i,k],D[k,k]), k, i) end
else error("prl=$prl is not supported.") end end
begin m=n=300;
A1=rand(Int.(-9:9),m,n); A1[1,1]=3; A2=copy(A1);
U1,U2 = (Matrix{Int}(I,d,d) for d ∈ (m,m));
mod_col!(A1, U1, 1, prl=0); # unparallelized execution
mod_col!(A2, U2, 1, prl=1); # parallel execution gives different result
println(U1==U2)
end
false
```

I don’t see how this could create any data race, since distinct threads edit different cols/rows. Any ideas why this occurs? Is this perhaps a bug in Base.Threads?

Note that if in `mod_col!`

I instead used `add_colrow!((), (D,U), ...)`

then I would get correct results. The buggy output appears only when I edit cols of D and rows of U in parallel.