I think the reason is to be found behind ordering of operations. Julia may re-order operations to get optimal performance and if you sum floating point numbers in different orders you may get different results.

Small experiment below:

```
using Random
using LinearAlgebra
Random.seed!(125)
A = hcat(ones(200), randn(200, 100));
B = randn(200);
ApB = A'B;
idx = [1, 2];
# -- let's unroll the operations partially
ApB = Vector{Float64}(undef, 101)
@inbounds for i ∈ 1:101
ApB[i] = dot(view(A, :, i), B)
end
A2 = Vector{Float64}(undef, 2)
@inbounds for i ∈ idx
A2[i] = dot(view(A, :, i), B)
end
ApB[idx] == A2 # true as expected
# -- now let's do the same thing but using
# -- a different ordering for ApB and for A2
rp1 = randperm(200)
rp2 = randperm(200)
ApB = Vector{Float64}(undef, 101)
@inbounds for i ∈ 1:101
ApB[i] = dot(view(A, rp1, i), B[rp1])
end
A2 = Vector{Float64}(undef, 2)
@inbounds for i ∈ idx
A2[i] = dot(view(A, rp2, i), B[rp2])
end
ApB[idx] == A2 # false !
ApB[idx] ≈ A2 # still true
```