Optimizing Multidimensional Discrete Trapezoidal Integral

and now we beat the heck out of numpy

Best Version

using BenchmarkTools
@inline function bringlast(T::Tuple,el) 
    ifelse(el==T[1],
        (bringlast(Base.tail(T),el)...,T[1]) ,
        (T[1],bringlast(Base.tail(T),el)...)
    )
end
@inline function bringlast(T::Tuple{},el); T; end

function trapz(x::T1, y::T2) where {N,fT,T1<:AbstractVector{fT},T2<:AbstractArray{fT,N}}
    n = length(x)
    s = size(y)
    @assert s[end]==n
    @inbounds begin
        r =  zeros(fT,Base.reverse(Base.tail(Base.reverse(s))))
        id(i) = (ntuple(k->:,N-1)...,i)
        if n == 1; return r; end
        for i in 2:n-1
           @fastmath r .+= (x[i+1] - x[i-1]) .* view(y,id(i)...)
        end
        r .+= (x[end]-x[end-1]) .* view(y,id(n)...) + (x[2] - x[1]).* view(y,id(1)...)
        return r./2
    end
end

function trapz(x::T1, y::T2, axis::T3) where {N,fT,T1<:AbstractVector{fT},T2<:AbstractArray{fT,N},T3<:Integer}
    @assert 1<=axis<=N
    trapz(x,PermutedDimsArray(y,bringlast(ntuple(identity,N),axis)))
end

trapz (generic function with 3 methods)

Example Usage:

vx=range(0,1,length=100)
vy=range(0,2,length=200)
vz=range(0,3,length=300)
M=[x^2+y^2+z^2 for x=vx,y=vy,z=vz]
@show trapz(vx,trapz(vy,trapz(vz,M)));

M2=ones(100,3)
@show trapz(vx,M2,1);

trapz(vx, trapz(vy, trapz(vz, M))) = 28.00030370797026
trapz(vx, M2, 1) = [1.0, 1.0, 1.0]

Benchmarks

@benchmark trapz($vy,$M,$(2))
BenchmarkTools.Trial: 
  memory estimate:  1.16 MiB
  allocs estimate:  220
  --------------
  minimum time:     8.477 ms (0.00% GC)
  median time:      8.595 ms (0.00% GC)
  mean time:        8.709 ms (0.66% GC)
  maximum time:     10.798 ms (10.34% GC)
  --------------
  samples:          574
  evals/sample:     1
@benchmark trapz($vx,trapz($vy,trapz($vz,$M,3),2),1)
BenchmarkTools.Trial: 
  memory estimate:  819.89 KiB
  allocs estimate:  635
  --------------
  minimum time:     6.700 ms (0.00% GC)
  median time:      6.819 ms (0.00% GC)
  mean time:        6.904 ms (0.63% GC)
  maximum time:     9.999 ms (0.00% GC)
  --------------
  samples:          724
  evals/sample:     1
@benchmark trapz($vx,trapz($vy,trapz($vz,$M)))
BenchmarkTools.Trial: 
  memory estimate:  818.83 KiB
  allocs estimate:  614
  --------------
  minimum time:     6.348 ms (0.00% GC)
  median time:      6.501 ms (0.00% GC)
  mean time:        6.666 ms (0.67% GC)
  maximum time:     9.819 ms (12.43% GC)
  --------------
  samples:          749
  evals/sample:     1

Benchmark, when used inefficiently:

This code is optimized in order to perform the integral the fastest over the last dimension first, here instead we are performing integral in opposite order e.g. first x, then y, at last over z

@benchmark trapz($vz,trapz($vy,trapz($vx,$M,1),1),1)
BenchmarkTools.Trial: 
  memory estimate:  2.33 MiB
  allocs estimate:  635
  --------------
  minimum time:     59.702 ms (0.00% GC)
  median time:      60.982 ms (0.00% GC)
  mean time:        61.582 ms (0.20% GC)
  maximum time:     69.332 ms (0.00% GC)
  --------------
  samples:          82
  evals/sample:     1

sorry for the many replies, this last one finishes my crusade :smiley:

one more point to clarify, numpy version was already used in the most efficient manner
indeed to fastest numpy time is

%%timeit
np.trapz(np.trapz(np.trapz(M,x,axis=0),y,axis=0),z,axis=0)
59.3 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

while the slowest is

%%timeit
np.trapz(np.trapz(np.trapz(M,z,axis=2),y,axis=1),x,axis=0)
74.7 ms ± 1.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

Update:

Turned this bit of code in a small self contained package

7 Likes