If you want performance you probably need to use a custom function. This should be close to optimal:
julia> function rev_cumsum!(c,x)
cumsum = zero(eltype(x))
@inbounds for i in lastindex(x):-1:firstindex(x)
cumsum += x[i]
c[i] = cumsum
end
return c
end
rev_cumsum! (generic function with 1 method)
julia> function rev_cumsum(x)
c = similar(x)
return rev_cumsum!(c,x)
end
rev_cumsum (generic function with 1 method)
julia> x = rand(1:10,1000);
julia> @btime reverse(cumsum(reverse($x)));
2.516 μs (3 allocations: 23.81 KiB)
julia> @btime rev_cumsum($x);
570.379 ns (1 allocation: 7.94 KiB)
julia> @btime rev_cumsum!(c,$x) setup=(c=similar(x));
364.524 ns (0 allocations: 0 bytes)
(edit: added @inbounds
, which increases significantly the performance)