The following is a simplified version of a problem I’m working on. I get different answers depending on whether or not I use threads. This has got to be some sort of race condition, but I"m not sure how to straighten it out.
x0_vals = [[-1.],[1.],[2.],[5.]];
n_runs = 10;
function f!(x)
    for k in 1:10
        @. x += -0.5 * x
    end
    x
end
function class(x)
   return Int(x[1]>0) 
end
nx = length(x0_vals);
svals = zeros(Int,nx);
x = similar(x0_vals[1]);
without threads:
for l in 1:nx
    for k in 1:n_runs
        x .= deepcopy(x0_vals[l]);
        @. x +=k;
        f!(x);
        svals[l] += class(x);
    end
end
svals
4-element Vector{Int64}:
  9
 10
 10
 10
with threads:
@. svals = 0;
for l in 1:nx
    Threads.@threads for k in 1:n_runs
        x .= deepcopy(x0_vals[l]);
        @. x +=k;
        f!(x);
        svals[l] += class(x);
    end
end
svals 
4-element Vector{Int64}:
  6
  8
 10
  8
As should be clear, this is an entirely deterministic problem.