Hi, in my project I have to define a sequence of operations that need to performed on some data. Two ways of capturing this in a struct came to my mind: either store them as a vector of operations (FooVec
) (which suggests some typing issues in my @code_warntype calls) or in a recursive definition (FooRec
).
I found that in my toy example, FooVec
is much quicker despite there being some types that are not being inferred in the function. I was wondering: are both my recursive methods of calculating with FooRec
inefficient, or should I expect the vector-version to be much quicker? And in that case, is there a way to improve type inference, or is this unavoidable?
using BenchmarkTools
abstract type OpType end
struct Op1 <: OpType end
run_op!(x, ::Op1) = sqrt(x)
struct Op2 <: OpType end
run_op!(x, ::Op2) = (x+Ο)^2
struct FooVec{T <: OpType}
op_vec::Vector{T}
end
abstract type FooRecType end
struct FooRecEnd <: FooRecType end
struct FooRec{T<:OpType, U<:FooRecType} <: FooRecType
op::T
remainder::U
end
function MakeFooRec(op_vec::Vector{T}) where T<:OpType
op_rec = FooRecEnd()
for op in reverse(op_vec)
op_rec = FooRec(op, op_rec)
end
return op_rec
end
function run_ops(foo_vec::FooVec)
x = 0.
for op in foo_vec.op_vec
x = run_op!(x, op)
end
return x
end
function run_ops(foo_rec::FooRec)
x = 0.
return run_ops_inner!(x, foo_rec)
end
run_ops_inner!(x, ::FooRecEnd) = x
function run_ops_inner!(x, foo_rec::FooRec)
x = run_op!(x, foo_rec.op)
run_ops_inner!(x, foo_rec.remainder)
end
function run_ops_alt(foo_rec::FooRec)
x = 0.
while true
x = run_op!(x, foo_rec.op)
foo_rec = foo_rec.remainder
if foo_rec isa FooRecEnd
break
end
end
return x
end
function RunTestsBasic()
Op1(); #Force compilation of Op1
Op2(); #Force compilation of Op2
end
function RunTests_Vec(N; verbose=false)
op_vec = [n % 2 == 0 ? Op1() : Op2() for n in 1:N]
foo_vec_timed = @timed FooVec(op_vec)
verbose && display("foo_vec took $(foo_vec_timed.time) seconds to compile and create the object. ")
foo_vec_compute = @timed run_ops(foo_vec_timed.value);
verbose && @code_warntype run_ops(foo_vec_timed.value)
verbose && display("foo_vec took $(foo_vec_compute.time) seconds to compute. ")
verbose && display(foo_vec_compute.value)
end
function RunTests_Rec(N; verbose=false)
op_vec = [n % 2 == 0 ? Op1() : Op2() for n in 1:N]
foo_rec_timed = @timed MakeFooRec(op_vec)
verbose && display("foo_rec took $(foo_rec_timed.time) seconds to compile and create the object. ")
foo_rec_compute = @timed run_ops(foo_rec_timed.value);
verbose && @code_warntype run_ops(foo_rec_timed.value)
verbose && display("foo_rec took $(foo_rec_compute.time) seconds to compute. ")
verbose && display(foo_rec_compute.value)
end
function RunTests_Rec2(N; verbose=false)
op_vec = [n % 2 == 0 ? Op1() : Op2() for n in 1:N]
foo_rec = MakeFooRec(op_vec)
verbose && display("FooRec didn't need compilation")
foo_rec_compute = @timed run_ops_alt(foo_rec);
verbose && @code_warntype run_ops_alt(foo_rec)
verbose && display("foo_rec alt took $(foo_rec_compute.time) seconds to compute. ")
verbose && display(foo_rec_compute.value)
end
RunTestsBasic()
N = 5
RunTests_Vec(N, verbose=true)
RunTests_Rec(N, verbose=true)
RunTests_Rec2(N, verbose=true)
N_glob = 400
RunTests_Vec(N_glob)
RunTests_Rec(N_glob)
RunTests_Rec2(N_glob)
bench1 = @benchmark RunTests_Vec($N_glob)
display(bench1)
bench2 = @benchmark RunTests_Rec($N_glob)
display(bench2)
bench3 = @benchmark RunTests_Rec2($N_glob)
display(bench3)
The non-benchmarked lines give the following output, where I added comments to lines that are red:
"foo_vec took 2.5e-7 seconds to compile and create the object. "
MethodInstance for run_ops(::FooVec{OpType})
from run_ops(foo_vec::FooVec) @ Main ~/Documents/ConOptStop_Julia_v0.1/test_compilation_speeds.jl:29
Arguments
#self#::Core.Const(Main.run_ops)
foo_vec::FooVec{OpType}
Locals
@_3::Union{Nothing, Tuple{OpType, Int64}} # RED
x::Float64
op::OpType # RED
Body::Float64
1 β (x = 0.0)
β %2 = Base.getproperty(foo_vec, :op_vec)::Vector{OpType}
β (@_3 = Base.iterate(%2))
β %4 = @_3::Union{Nothing, Tuple{OpType, Int64}} # RED
β %5 = (%4 === nothing)::Bool
β %6 = Base.not_int(%5)::Bool
βββ goto #4 if not %6
2 β %8 = @_3::Tuple{OpType, Int64}
β (op = Core.getfield(%8, 1)) # RED
β %10 = Core.getfield(%8, 2)::Int64
β %11 = x::Float64
β %12 = op::OpType # RED
β (x = Main.run_op!(%11, %12))
β (@_3 = Base.iterate(%2, %10))
β %15 = @_3::Union{Nothing, Tuple{OpType, Int64}} # RED
β %16 = (%15 === nothing)::Bool
β %17 = Base.not_int(%16)::Bool
βββ goto #4 if not %17
3 β goto #2
4 β %20 = x::Float64
βββ return %20
"foo_vec took 6.375e-6 seconds to compute. "
88.82643960980423
"foo_rec took 0.000125958 seconds to compile and create the object. "
MethodInstance for run_ops(::FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}})
from run_ops(foo_rec::FooRec) @ Main ~/Documents/ConOptStop_Julia_v0.1/test_compilation_speeds.jl:37
Arguments
#self#::Core.Const(Main.run_ops)
foo_rec::Core.Const(FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}}(Op2(), FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}(Op1(), FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}(Op2(), FooRec{Op1, FooRec{Op2, FooRecEnd}}(Op1(), FooRec{Op2, FooRecEnd}(Op2(), FooRecEnd()))))))
Locals
x::Float64
Body::Float64
1 β (x = 0.0)
β %2 = x::Core.Const(0.0)
β %3 = Main.run_ops_inner!(%2, foo_rec)::Core.Const(88.82643960980423)
βββ return %3
"foo_rec took 0.002928167 seconds to compute. "
88.82643960980423
"FooRec didn't need compilation"
MethodInstance for run_ops_alt(::FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}})
from run_ops_alt(foo_rec::FooRec) @ Main ~/Documents/ConOptStop_Julia_v0.1/test_compilation_speeds.jl:48
Arguments
#self#::Core.Const(Main.run_ops_alt)
foo_rec@_2::Core.Const(FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}}(Op2(), FooRec{Op1, FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}}(Op1(), FooRec{Op2, FooRec{Op1, FooRec{Op2, FooRecEnd}}}(Op2(), FooRec{Op1, FooRec{Op2, FooRecEnd}}(Op1(), FooRec{Op2, FooRecEnd}(Op2(), FooRecEnd()))))))
Locals
x::Float64
foo_rec@_4::Any # RED
Body::Float64
1 β (foo_rec@_4 = foo_rec@_2)
βββ (x = 0.0)
2 β goto #6 if not true
3 β %4 = Main.run_op!::Core.Const(Main.run_op!)
β %5 = x::Float64
β %6 = foo_rec@_4::Any # RED
β %7 = Base.getproperty(%6, :op)::Any # RED
β (x = (%4)(%5, %7))
β %9 = foo_rec@_4::Any # RED
β (foo_rec@_4 = Base.getproperty(%9, :remainder))
β %11 = foo_rec@_4::Any # RED
β %12 = (%11 isa Main.FooRecEnd)::Bool
βββ goto #5 if not %12
4 β goto #6
5 β goto #2
6 β %16 = x::Float64
βββ return %16
The benchmarking steps give:
BenchmarkTools.Trial: 10000 samples with 10 evaluations.
Range (min β¦ max): 1.829 ΞΌs β¦ 949.221 ΞΌs β GC (min β¦ max): 0.00% β¦ 99.53%
Time (median): 2.054 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 2.320 ΞΌs Β± 12.976 ΞΌs β GC (mean Β± Ο): 7.88% Β± 1.41%
ββ
βββββββββββββββββββ β
ββββββββββββββββββββββββββββββββββββββββ
βββββ
ββ
β
β
β
ββ
ββββββ
β
β
1.83 ΞΌs Histogram: log(frequency) by time 3.59 ΞΌs <
Memory estimate: 3.83 KiB, allocs estimate: 18.
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min β¦ max): 70.166 ΞΌs β¦ 204.625 ΞΌs β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 71.500 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 72.862 ΞΌs Β± 4.999 ΞΌs β GC (mean Β± Ο): 0.00% Β± 0.00%
ββββββ
ββββββββ β
ββββββββββββββββββ
ββββββββββββββββββ
β
ββββββ
βββ
ββ
ββ
ββ
β
β
ββ
ββ
ββ
β
70.2 ΞΌs Histogram: log(frequency) by time 95.5 ΞΌs <
Memory estimate: 7.00 KiB, allocs estimate: 22.
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min β¦ max): 206.583 ΞΌs β¦ 568.875 ΞΌs β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 213.250 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 216.658 ΞΌs Β± 12.829 ΞΌs β GC (mean Β± Ο): 0.00% Β± 0.00%
ββ
βββββββ
β
βββββββββββββββ β
βββββββββββββββββββββββββββββββββββββββββββ
β
β
ββ
β
β
β
β
β
β
ββ
β
βββ
β
β
β
207 ΞΌs Histogram: log(frequency) by time 276 ΞΌs <
Memory estimate: 6.45 KiB, allocs estimate: 9.