A lot of memory allocations which I do not understand

I try to mimic the DifferentialEquations.jl solver where, as an example, I consider out of place second order Runge-Kutta method. The code below is the content of the .mem file obtained by running Julia with “–track-allocation=user” option.

        - using BenchmarkTools
        - using Profile
        - using PyPlot
        - import StaticArrays
        - 
        - function _tableau(T::Type)
        0     as = StaticArrays.SVector{1, T}(2/3)
        0     bs = StaticArrays.SVector{2, T}(1/4, 3/4)
        0     cs = StaticArrays.SVector{1, T}(2/3)
        0     return as, bs, cs
        - end
        - 
        - struct Problem{P}
        0     func :: Function
        -     u0 :: P
        -     tspan :: Tuple
        -     p :: Tuple
        - end
        - 
        - mutable struct Integrator{P, T}
        0     prob :: Problem{P}
        -     as :: StaticArrays.SVector{1, T}
        -     bs :: StaticArrays.SVector{2, T}
        -     cs :: StaticArrays.SVector{1, T}
        -     utmp :: P
        -     t :: T   # current time
        - end
        - 
        - function Integrator(prob::Problem{P}) where P
        0     u0 = prob.u0
        0     t0 = prob.tspan[1]
        0     T = typeof(t0)
        0     as, bs, cs = _tableau(T)
        -     utmp = zero(u0)
        0     return Integrator{P, T}(prob, as, bs, cs, utmp, t0)
        - end
        - 
        - function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
        0     func = integ.prob.func
        0     p = integ.prob.p
        - 
        0     a21, = integ.as
        0     b1, b2 = integ.bs
        0     c2, = integ.cs
        0     t = integ.t
        - 
124048000     k1 = func(u, p, t)
        - 
496192000     utmp = u + dt * a21 * k1
        0     ttmp = t + c2 * dt
124048000     k2 = func(utmp, p, ttmp)
        - 
1116432000     utmp = u + dt * (b1 * k1 + b2 * k2)
        - 
        0     integ.t = integ.t + dt
        - 
        0     return utmp
        - end
        - 
        - function solve!(integ, u, dt)
  2480960     integ.t = integ.prob.tspan[1]   # reinit!
        0     u[1] = integ.prob.u0
  4961920     for i=2:Nt
369663040         u[i] = step!(integ, u[i-1], dt)
        -     end
        0     return nothing
        - end
        - 
        - 
        - 
        - function func(u, p, t)
372144000     return p[1] * u
        - end
        - 
        - tmin, tmax, Nt = 0.0, 10.0, 101
        - t = range(tmin, tmax, length=Nt)
        - dt = t[2] - t[1]
        - 
        - u0 = 1.0
        - tspan = (0.0, 10.0)
        - p = (0.5, )
        - prob = Problem(func, u0, tspan, p)
        - integ = Integrator(prob)
        - 
        - u = zeros(Nt)
        - @btime solve!($integ, $u, $dt)
        - Profile.clear_malloc_data()
        - @btime solve!($integ, $u, $dt)
        - 
        - plot(t, u, "--")
        - show()
        - 

At the same time, the @btime macro shows

64.797 μs (2001 allocations: 32.84 KiB)

I am very curious why there are so many allocations in this code. In the lines marked by the profiler there are no array operations at all, only scalar variables. Can you, please, explain me what is going on and how I can avoid these allocations. Thank you.

P.S. Just in case, the original code without the profiler marks:

using BenchmarkTools
using Profile
using PyPlot
import StaticArrays

function _tableau(T::Type)
    as = StaticArrays.SVector{1, T}(2/3)
    bs = StaticArrays.SVector{2, T}(1/4, 3/4)
    cs = StaticArrays.SVector{1, T}(2/3)
    return as, bs, cs
end

struct Problem{P}
    func :: Function
    u0 :: P
    tspan :: Tuple
    p :: Tuple
end

mutable struct Integrator{P, T}
    prob :: Problem{P}
    as :: StaticArrays.SVector{1, T}
    bs :: StaticArrays.SVector{2, T}
    cs :: StaticArrays.SVector{1, T}
    utmp :: P
    t :: T   # current time
end

function Integrator(prob::Problem{P}) where P
    u0 = prob.u0
    t0 = prob.tspan[1]
    T = typeof(t0)
    as, bs, cs = _tableau(T)
    utmp = zero(u0)
    return Integrator{P, T}(prob, as, bs, cs, utmp, t0)
end

function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
    func = integ.prob.func
    p = integ.prob.p

    a21, = integ.as
    b1, b2 = integ.bs
    c2, = integ.cs
    t = integ.t

    k1 = func(u, p, t)

    utmp = u + dt * a21 * k1
    ttmp = t + c2 * dt
    k2 = func(utmp, p, ttmp)

    utmp = u + dt * (b1 * k1 + b2 * k2)

    integ.t = integ.t + dt

    return utmp
end

function solve!(integ, u, dt)
    integ.t = integ.prob.tspan[1]   # reinit!
    u[1] = integ.prob.u0
    for i=2:Nt
        u[i] = step!(integ, u[i-1], dt)
    end
    return nothing
end



function func(u, p, t)
    return p[1] * u
end

tmin, tmax, Nt = 0.0, 10.0, 101
t = range(tmin, tmax, length=Nt)
dt = t[2] - t[1]

u0 = 1.0
tspan = (0.0, 10.0)
p = (0.5, )
prob = Problem(func, u0, tspan, p)
integ = Integrator(prob)

u = zeros(Nt)
@btime solve!($integ, $u, $dt)
Profile.clear_malloc_data()
@btime solve!($integ, $u, $dt)

plot(t, u, "--")
show()

@code_warntype is your first friend. Here, it indicates that:

  • tspan and p in Problem are not concretely type (indeed, they are just Tuples).
  • A global non-const NT is used.
  • The Function in Problem is not parameterized on.

The fixed code is

here
using BenchmarkTools
using Profile
import StaticArrays

function _tableau(T::Type)
    as = StaticArrays.SVector{1, T}(2/3)
    bs = StaticArrays.SVector{2, T}(1/4, 3/4)
    cs = StaticArrays.SVector{1, T}(2/3)
    return as, bs, cs
end

struct Problem{P,F}
    func :: F
    u0 :: P
    tspan :: Tuple{Float64, Float64}
    p :: Tuple{Float64}
end

mutable struct Integrator{P, T, F}
    prob :: Problem{P, F}
    as :: StaticArrays.SVector{1, T}
    bs :: StaticArrays.SVector{2, T}
    cs :: StaticArrays.SVector{1, T}
    utmp :: P
    t :: T   # current time
end

function Integrator(prob::Problem)
    u0 = prob.u0
    t0 = prob.tspan[1]
    T = typeof(t0)
    as, bs, cs = _tableau(T)
    utmp = zero(u0)
    return Integrator(prob, as, bs, cs, utmp, t0)
end

function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
    func = integ.prob.func
    p = integ.prob.p

    a21, = integ.as
    b1, b2 = integ.bs
    c2, = integ.cs
    t = integ.t

    k1 = func(u, p, t)

    utmp = u + dt * a21 * k1
    ttmp = t + c2 * dt
    k2 = func(utmp, p, ttmp)

    utmp = u + dt * (b1 * k1 + b2 * k2)

    integ.t = integ.t + dt

    return utmp
end

function solve!(integ, u, dt)
    integ.t = integ.prob.tspan[1]   # reinit!
    u[1] = integ.prob.u0
    for i=2:Nt
        u[i] = step!(integ, u[i-1], dt)
    end
    return nothing
end



function func(u, p, t)
    return p[1] * u
end

tmin, tmax = 0.0, 10.0
const Nt = 101
t = range(tmin, tmax, length=Nt)
dt = t[2] - t[1]

u0 = 1.0
tspan = (0.0, 10.0)
p = (0.5, )
prob = Problem(func, u0, tspan, p)
integ = Integrator(prob)

u = zeros(Nt)
@btime solve!($integ, $u, $dt)

with the benchmark

@btime solve!($integ, $u, $dt)
  714.732 ns (0 allocations: 0 bytes)
5 Likes

Thank you very much. I still continue to enjoy the goodwill of the Julia community. Thank you.