Memory allocation when solving ODE with Vern7 and ContinuousCallback

Hello,

I went through a memory allocation problem in a ODE system, when using Vern7() algorithm and ContinuousCallback. The minimal working example is the following:

using LinearAlgebra
using SparseArrays
using OrdinaryDiffEq
using DiffEqCallbacks
using BenchmarkTools

A = Diagonal(Array{ComplexF64}(1:20))
A += spdiagm(1=>sqrt.(0.01 .* Array(1:19)))
A -= 0.01im * Diagonal(Array{ComplexF64}(1:20))
u0 = rand(ComplexF64, 20)
prm = (U = A, rand_n = Ref(rand()))

my_condition_discrete(u, t, integrator) = real(dot(u, u)) < integrator.p.rand_n[]
my_condition_continuous(u, t, integrator) = integrator.p.random_n[] - real(dot(u, u))
my_affect!(integrator) = u_modified!(integrator, false)
cb1 = DiscreteCallback(my_condition_discrete, my_affect!, save_positions=(false, false))
cb2 = ContinuousCallback(my_condition_continuous, my_affect!, nothing, save_positions=(false, false))

prob1 = ODEProblem((du,u,p,t)->mul!(du, p.U, u), u0, (0.0, 100.0), prm, callback=cb1)
prob2 = ODEProblem((du,u,p,t)->mul!(du, p.U, u), u0, (0.0, 100.0), prm, callback=cb2)



@benchmark sol1 = solve($prob1, $Vern7(), saveat=$[100.0])

BenchmarkTools.Trial: 9021 samples with 1 evaluation.
 Range (min … max):  383.100 ΞΌs …   5.543 ms  β”Š GC (min … max): 0.00% … 0.00%
 Time  (median):     475.300 ΞΌs               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   546.050 ΞΌs Β± 198.079 ΞΌs  β”Š GC (mean Β± Οƒ):  0.07% Β± 0.91%

  β–ˆβ–ƒ                                                             
  β–ˆβ–ˆβ–ˆβ–‡β–‡β–†β–†β–…β–…β–„β–…β–„β–ƒβ–ƒβ–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–‚β–β–β–‚β–β–β–β–β–β–β–β–β–β–β–β–β–β–β–β–β–β–β–β– β–‚
  383 ΞΌs           Histogram: frequency by time          1.2 ms <

 Memory estimate: 10.34 KiB, allocs estimate: 50.




@benchmark sol2 = solve($prob2, $Vern7(), saveat=$[100.0])

BenchmarkTools.Trial: 2566 samples with 1 evaluation.
 Range (min … max):  1.274 ms …  12.947 ms  β”Š GC (min … max): 0.00% … 79.46%
 Time  (median):     1.709 ms               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   1.931 ms Β± 783.272 ΞΌs  β”Š GC (mean Β± Οƒ):  2.63% Β±  7.29%

  β–‡β–ˆβ–†β–ˆβ–‡β–„β–…β–ƒβ–ƒβ–‚β–                                                  
  β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‡β–ˆβ–†β–…β–…β–…β–„β–„β–„β–„β–…β–„β–„β–…β–ƒβ–„β–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–‚β–‚β–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–‚β–β–‚β–‚β–‚β–‚β–‚β–β–‚β–‚ β–„
  1.27 ms         Histogram: frequency by time        4.71 ms <

 Memory estimate: 784.83 KiB, allocs estimate: 2032.

While it doesn’t happen for example with the Tsit5() algorithm:

@benchmark sol1 = solve($prob1, $Tsit5(), saveat=$[100.0])

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range (min … max):  309.600 ΞΌs …   5.215 ms  β”Š GC (min … max): 0.00% … 89.28%
 Time  (median):     392.600 ΞΌs               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):   436.892 ΞΌs Β± 136.050 ΞΌs  β”Š GC (mean Β± Οƒ):  0.11% Β±  0.89%

   β–‡β–ˆ                                                            
  β–…β–ˆβ–ˆβ–ˆβ–‡β–†β–ˆβ–‡β–…β–†β–„β–…β–„β–„β–…β–„β–ƒβ–„β–ƒβ–„β–„β–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–β–‚β–β–β–β–β–β–β–β–β–β–β–β–β–β– β–‚
  310 ΞΌs           Histogram: frequency by time          859 ΞΌs <

 Memory estimate: 9.44 KiB, allocs estimate: 48.


@benchmark sol2 = solve($prob2, $Tsit5(), saveat=$[100.0])

BenchmarkTools.Trial: 4593 samples with 1 evaluation.
 Range (min … max):  744.500 ΞΌs …   3.187 ms  β”Š GC (min … max): 0.00% … 0.00%
 Time  (median):     968.600 ΞΌs               β”Š GC (median):    0.00%
 Time  (mean Β± Οƒ):     1.079 ms Β± 306.638 ΞΌs  β”Š GC (mean Β± Οƒ):  0.00% Β± 0.00%

    β–ˆβ–ƒβ–ƒ                                                          
  β–ƒβ–†β–ˆβ–ˆβ–ˆβ–‡β–‡β–‡β–‡β–†β–„β–„β–„β–…β–…β–„β–„β–„β–„β–ƒβ–„β–„β–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–ƒβ–‚β–‚β–‚β–‚β–ƒβ–‚β–‚β–‚β–‚β–‚β–‚β–‚β–‚β–β–‚β–‚β–‚β–‚β–‚β–β–‚β–β–β–β–β–β–β–β–β–β–β– β–‚
  744 ΞΌs           Histogram: frequency by time         2.07 ms <

 Memory estimate: 9.69 KiB, allocs estimate: 48.
1 Like

Yes it’s something odd with type inference. This is a good example of it and I’ll see if someone from the compiler team can help figure out what it is. I’ve seen this for years but haven’t figured out how to handle it yet, but this is a cleaner example than the others I had.

2 Likes
using LinearAlgebra
using SparseArrays
using OrdinaryDiffEq
using DiffEqCallbacks
using BenchmarkTools

A = Diagonal(Array{ComplexF64}(1:20))
A += spdiagm(1=>sqrt.(0.01 .* Array(1:19)))
A -= 0.01im * Diagonal(Array{ComplexF64}(1:20))
u0 = rand(ComplexF64, 20)
prm = (U = A, rand_n = Ref(rand()))

my_condition_discrete(u, t, integrator) = real(dot(u, u)) < integrator.p.rand_n[]
my_condition_continuous(u, t, integrator) = integrator.p.rand_n[] - real(dot(u, u))
my_affect!(integrator) = u_modified!(integrator, false)
cb1 = DiscreteCallback(my_condition_discrete, my_affect!, save_positions=(false, false))
cb2 = ContinuousCallback(my_condition_continuous, my_affect!, nothing, save_positions=(false, false))

prob1 = ODEProblem((du,u,p,t)->mul!(du, p.U, u), u0, (0.0, 100.0), prm, callback=cb1)
prob2 = ODEProblem((du,u,p,t)->mul!(du, p.U, u), u0, (0.0, 100.0), prm, callback=cb2)



@benchmark sol1 = solve($prob1, $Vern7(lazy=false), saveat=$[100.0])

@benchmark sol2 = solve($prob2, $Vern7(lazy=false), saveat=$[100.0])

The actual reason for this is because Vern7 is uses a lazy interpolation which only extends to every f call if the interpolation is used, but if you use a ContinuousCallback it will always extend, in which case simply turning off the lazy interpolation would be more efficient.