A lot of memory allocations which I do not understand

fedoroff · March 23, 2020, 8:31am

I try to mimic the DifferentialEquations.jl solver where, as an example, I consider out of place second order Runge-Kutta method. The code below is the content of the .mem file obtained by running Julia with “–track-allocation=user” option.

        - using BenchmarkTools
        - using Profile
        - using PyPlot
        - import StaticArrays
        - 
        - function _tableau(T::Type)
        0     as = StaticArrays.SVector{1, T}(2/3)
        0     bs = StaticArrays.SVector{2, T}(1/4, 3/4)
        0     cs = StaticArrays.SVector{1, T}(2/3)
        0     return as, bs, cs
        - end
        - 
        - struct Problem{P}
        0     func :: Function
        -     u0 :: P
        -     tspan :: Tuple
        -     p :: Tuple
        - end
        - 
        - mutable struct Integrator{P, T}
        0     prob :: Problem{P}
        -     as :: StaticArrays.SVector{1, T}
        -     bs :: StaticArrays.SVector{2, T}
        -     cs :: StaticArrays.SVector{1, T}
        -     utmp :: P
        -     t :: T   # current time
        - end
        - 
        - function Integrator(prob::Problem{P}) where P
        0     u0 = prob.u0
        0     t0 = prob.tspan[1]
        0     T = typeof(t0)
        0     as, bs, cs = _tableau(T)
        -     utmp = zero(u0)
        0     return Integrator{P, T}(prob, as, bs, cs, utmp, t0)
        - end
        - 
        - function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
        0     func = integ.prob.func
        0     p = integ.prob.p
        - 
        0     a21, = integ.as
        0     b1, b2 = integ.bs
        0     c2, = integ.cs
        0     t = integ.t
        - 
124048000     k1 = func(u, p, t)
        - 
496192000     utmp = u + dt * a21 * k1
        0     ttmp = t + c2 * dt
124048000     k2 = func(utmp, p, ttmp)
        - 
1116432000     utmp = u + dt * (b1 * k1 + b2 * k2)
        - 
        0     integ.t = integ.t + dt
        - 
        0     return utmp
        - end
        - 
        - function solve!(integ, u, dt)
  2480960     integ.t = integ.prob.tspan[1]   # reinit!
        0     u[1] = integ.prob.u0
  4961920     for i=2:Nt
369663040         u[i] = step!(integ, u[i-1], dt)
        -     end
        0     return nothing
        - end
        - 
        - 
        - 
        - function func(u, p, t)
372144000     return p[1] * u
        - end
        - 
        - tmin, tmax, Nt = 0.0, 10.0, 101
        - t = range(tmin, tmax, length=Nt)
        - dt = t[2] - t[1]
        - 
        - u0 = 1.0
        - tspan = (0.0, 10.0)
        - p = (0.5, )
        - prob = Problem(func, u0, tspan, p)
        - integ = Integrator(prob)
        - 
        - u = zeros(Nt)
        - @btime solve!($integ, $u, $dt)
        - Profile.clear_malloc_data()
        - @btime solve!($integ, $u, $dt)
        - 
        - plot(t, u, "--")
        - show()
        -

At the same time, the @btime macro shows

64.797 μs (2001 allocations: 32.84 KiB)

I am very curious why there are so many allocations in this code. In the lines marked by the profiler there are no array operations at all, only scalar variables. Can you, please, explain me what is going on and how I can avoid these allocations. Thank you.

P.S. Just in case, the original code without the profiler marks:

using BenchmarkTools
using Profile
using PyPlot
import StaticArrays

function _tableau(T::Type)
    as = StaticArrays.SVector{1, T}(2/3)
    bs = StaticArrays.SVector{2, T}(1/4, 3/4)
    cs = StaticArrays.SVector{1, T}(2/3)
    return as, bs, cs
end

struct Problem{P}
    func :: Function
    u0 :: P
    tspan :: Tuple
    p :: Tuple
end

mutable struct Integrator{P, T}
    prob :: Problem{P}
    as :: StaticArrays.SVector{1, T}
    bs :: StaticArrays.SVector{2, T}
    cs :: StaticArrays.SVector{1, T}
    utmp :: P
    t :: T   # current time
end

function Integrator(prob::Problem{P}) where P
    u0 = prob.u0
    t0 = prob.tspan[1]
    T = typeof(t0)
    as, bs, cs = _tableau(T)
    utmp = zero(u0)
    return Integrator{P, T}(prob, as, bs, cs, utmp, t0)
end

function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
    func = integ.prob.func
    p = integ.prob.p

    a21, = integ.as
    b1, b2 = integ.bs
    c2, = integ.cs
    t = integ.t

    k1 = func(u, p, t)

    utmp = u + dt * a21 * k1
    ttmp = t + c2 * dt
    k2 = func(utmp, p, ttmp)

    utmp = u + dt * (b1 * k1 + b2 * k2)

    integ.t = integ.t + dt

    return utmp
end

function solve!(integ, u, dt)
    integ.t = integ.prob.tspan[1]   # reinit!
    u[1] = integ.prob.u0
    for i=2:Nt
        u[i] = step!(integ, u[i-1], dt)
    end
    return nothing
end



function func(u, p, t)
    return p[1] * u
end

tmin, tmax, Nt = 0.0, 10.0, 101
t = range(tmin, tmax, length=Nt)
dt = t[2] - t[1]

u0 = 1.0
tspan = (0.0, 10.0)
p = (0.5, )
prob = Problem(func, u0, tspan, p)
integ = Integrator(prob)

u = zeros(Nt)
@btime solve!($integ, $u, $dt)
Profile.clear_malloc_data()
@btime solve!($integ, $u, $dt)

plot(t, u, "--")
show()

kristoffer.carlsson · March 23, 2020, 9:21am

@code_warntype is your first friend. Here, it indicates that:

tspan and p in Problem are not concretely type (indeed, they are just Tuples).
A global non-const NT is used.
The Function in Problem is not parameterized on.

The fixed code is

here

using BenchmarkTools
using Profile
import StaticArrays

function _tableau(T::Type)
    as = StaticArrays.SVector{1, T}(2/3)
    bs = StaticArrays.SVector{2, T}(1/4, 3/4)
    cs = StaticArrays.SVector{1, T}(2/3)
    return as, bs, cs
end

struct Problem{P,F}
    func :: F
    u0 :: P
    tspan :: Tuple{Float64, Float64}
    p :: Tuple{Float64}
end

mutable struct Integrator{P, T, F}
    prob :: Problem{P, F}
    as :: StaticArrays.SVector{1, T}
    bs :: StaticArrays.SVector{2, T}
    cs :: StaticArrays.SVector{1, T}
    utmp :: P
    t :: T   # current time
end

function Integrator(prob::Problem)
    u0 = prob.u0
    t0 = prob.tspan[1]
    T = typeof(t0)
    as, bs, cs = _tableau(T)
    utmp = zero(u0)
    return Integrator(prob, as, bs, cs, utmp, t0)
end

function step!(integ::Integrator{P, T}, u::P, dt::T) where {P, T}
    func = integ.prob.func
    p = integ.prob.p

    a21, = integ.as
    b1, b2 = integ.bs
    c2, = integ.cs
    t = integ.t

    k1 = func(u, p, t)

    utmp = u + dt * a21 * k1
    ttmp = t + c2 * dt
    k2 = func(utmp, p, ttmp)

    utmp = u + dt * (b1 * k1 + b2 * k2)

    integ.t = integ.t + dt

    return utmp
end

function solve!(integ, u, dt)
    integ.t = integ.prob.tspan[1]   # reinit!
    u[1] = integ.prob.u0
    for i=2:Nt
        u[i] = step!(integ, u[i-1], dt)
    end
    return nothing
end



function func(u, p, t)
    return p[1] * u
end

tmin, tmax = 0.0, 10.0
const Nt = 101
t = range(tmin, tmax, length=Nt)
dt = t[2] - t[1]

u0 = 1.0
tspan = (0.0, 10.0)
p = (0.5, )
prob = Problem(func, u0, tspan, p)
integ = Integrator(prob)

u = zeros(Nt)
@btime solve!($integ, $u, $dt)

with the benchmark

@btime solve!($integ, $u, $dt)
  714.732 ns (0 allocations: 0 bytes)

fedoroff · March 23, 2020, 9:39am

Thank you very much. I still continue to enjoy the goodwill of the Julia community. Thank you.

Topic		Replies	Views
Reducing allocations to optimize (to match Java speed) Performance	22	1339	December 10, 2018
Confusing memory allocations when using the integrator of DifferentialEquations.jl Performance question , diffeq , memory-allocation	10	448	January 12, 2022
How to view intermediate steps in ODESolver during runtime without excess allocations? New to Julia differentialequation	16	263	September 3, 2024
Strange allocation in loop when calling function Performance	2	206	October 5, 2023
Memory allocation and performance Performance memory-allocation	7	641	June 26, 2020

A lot of memory allocations which I do not understand

Related topics