This is the last routine which does the sum:
@noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted,
ifirst::Integer, ilast::Integer, blksize::Int)
if ifirst == ilast
@inbounds a1 = A[ifirst]
return mapreduce_first(f, op, a1)
elseif ifirst + blksize > ilast
# sequential portion
@inbounds a1 = A[ifirst]
@inbounds a2 = A[ifirst+1]
v = op(f(a1), f(a2))
@simd for i = ifirst + 2 : ilast
@inbounds ai = A[i]
v = op(v, f(ai))
end
return v
else
# pairwise portion
imid = (ifirst + ilast) >> 1
v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
return op(v1, v2)
end
end
In case of SparseArrays the above function is called with a view over only the non-zero values, which is SparseArrays.nzvalview(sp_mat)
.
You can see, that there is no difference now, by calling it directly:
julia> using SparseArrays, Statistics
julia> n = 5000
5000
julia> sp_mat = sprand(Float32,n,n,0.99)./20;
julia> sum(sp_mat)
618840.25f0
julia> inds = LinearIndices(SparseArrays.nzvalview(sp_mat));
julia> Base.mapreduce_impl(identity, +, SparseArrays.nzvalview(sp_mat), first(inds), last(inds), Base.pairwise_blocksize(identity, +))
618840.25f0
The sum algorithms sums up blockwise, with blocksize of 1024. (better short description below)
Summing up floating point numbers in different orders gives different results because of floating point precision.
You can find out which function is doing the actual calculation by using @which
and @less
with the next function you see in @less
, e.g.:
julia> @less sum(sp_mat)
@inline ($fname)(a::AbstractArray; dims=:, kw...) = ($_fname)(a, dims; kw...)
...
julia> @less Base._sum(sp_mat,:)
($_fname)(a, ::Colon; kw...) = ($_fname)(identity, a, :; kw...)
...
julia> @less Base._sum(identity,sp_mat,:)
($_fname)(f, a, ::Colon; kw...) = mapreduce(f, $op, a; kw...)
...
julia> @which Base._sum(identity,sp_mat,:)
#because we need to see how $op is defined, which is :add_sum
_sum(f, a, ::Colon; kw...) in Base at reducedim.jl:878
julia> @less mapreduce(identity,Base.add_sum,sp_mat,:)
mapreduce(f, op, itrs...; kw...) = reduce(op, Generator(f, itrs...); kw...)
...
And so on until you reach the worker function which does the actual work.
I like to do this sometimes, despite it can be tedious, because there is always so much to learn.