Hi!
I recently revised some aspects of a large project Iβm working on, just moving around some data/functions without altering the behavior of the code. However, it resulted in significant decrease in performance in seemingly other parts of the code.
The performance decrease stems from the calcInput! function of a struct I have (see the bottom of this post). There are some changes in how this function behaves (e.g. how \gamma parameter is handled), I am having performance issues in places never expected.
As an example, consider near the last line, u ./= asum
. Profiling in the old code with dt = 0.001 gives:
8 ...BaseController.jl:37; calcInput!(::SpecificationM...
7 ./broadcast.jl:751; materialize!
1 ./abstractarray.jl:75; axes
1 ./array.jl:155; size
6 ./broadcast.jl:792; copyto!
6 ./broadcast.jl:837; copyto!
6 ./simdloop.jl:73; macro expansion
5 ./broadcast.jl:838; macro expansion
2 ./array.jl:769; setindex!
3 ./broadcast.jl:507; getindex
2 ./broadcast.jl:546; _broadcast_getindex
2 ./broadcast.jl:570; _getindex
2 ./broadcast.jl:540; _broadcast_getindex
2 ./array.jl:731; getindex
1 ./broadcast.jl:547; _broadcast_getindex
1 ./broadcast.jl:574; _broadcast_getindex_evalf
1 ./float.jl:401; /
1 ./int.jl:53; macro expansion
While the new code produces:
1117 ...aseController.jl:37; calcInput!(::Specification...
16 ./broadcast.jl:1163; broadcasted(::Function, :...
382 ./broadcast.jl:1166; broadcasted(::Function, :...
330 ./broadcast.jl:1168; broadcasted
31 ./broadcast.jl:176; Base.Broadcast.Broadcaste...
20 ./broadcast.jl:176; Type
20 ./broadcast.jl:167; Type
21 ./broadcast.jl:751; materialize!(::Array{Floa...
11 ./broadcast.jl:792; copyto!
4 ./broadcast.jl:836; copyto!
4 ./broadcast.jl:819; preprocess
4 ./broadcast.jl:822; preprocess_args
4 ./broadcast.jl:823; preprocess_args
4 ./broadcast.jl:820; preprocess
4 ./broadcast.jl:813; broadcast_unalias
6 ./broadcast.jl:837; copyto!
6 ./simdloop.jl:73; macro expansion
6 ./broadcast.jl:838; macro expansion
5 ./broadcast.jl:507; getindex
2 ./broadcast.jl:546; _broadcast_getindex
2 ./broadcast.jl:570; _getindex
2 ./broadcast.jl:540; _broadcast_getindex
2 ./array.jl:731; getindex
3 ./float.jl:401; _broadcast_getindex
1 ./int.jl:53; +
1 ./simdloop.jl:0; copyto!
Just that line of division now takes an entire second in running my code, whereas I am operating on the same data types! Note that the way parameters are allocated and passed to this function, e.g. the input u
, as well as the number of times the function is called, has not changed. Even the line before, checking asum > 0
, now takes 57 ticks instead of the old 1, and profiling gives no indication on where that time is spent:
57 ...aseController.jl:36; calcInput!(::Specification...
6 ./operators.jl:286; >(::Float64, ::Int64)
5 ./float.jl:448; <
1 ./float.jl:488; <
1 ./float.jl:452; <
Any ideas on what I should look into that can produce such a behavior?
Thanks,
Tusike
Here is the code for the struct with the calcInput! function.
Before changes:
mutable struct SimpleBaseController <: AbstractBaseController
BTs::Vector{AbstractBarrierTransform}
Ξ::Vector{Float64}
# pre-allocate variables used in calculations
dΟdx::Vector{Float64}
vi::Vector{Float64}
function SimpleBaseController(BTs::Vector{AbstractBarrierTransform}, Ξ::Vector{Float64})
new(BTs, Ξ)
end
end
function init(specManager::SpecificationManager, bc::SimpleBaseController, agent::Agent)
# pre-allocate variables used in calculations
bc.dΟdx = Vector{Float64}(undef, agent.n)
bc.vi = Vector{Float64}(undef, agent.m)
end
function calcInput!(specManager::SpecificationManager, bc::SimpleBaseController, agent::Agent, Ξ³::Vector{Float64}, Ο::Vector{Float64}, t::Float64, u::Vector{Float64})
u .= 0.0
asum = 0.0
for i = 1:specManager.M
specManager.APs[i].dΟdx!(specManager.APs[i], agent.x, bc.dΟdx)
ΞΊ, Ξ = calcΞΊ(bc.BTs[i], Ξ³[i], Ο[i], t)
agent.dynamics.applyG!(agent.x, bc.dΟdx, bc.vi)
if (Ο[i] < Ξ)
ai = (Ξ - Ο[i])/(Ξ - Ξ³[i])
u .+= (ai*ΞΊ/(bc.Ξ[i] + dot(bc.vi,bc.vi)))*bc.vi
else
ai = 0.0
end
asum += ai
end
if (asum > 0)
u ./= asum
end
end
After changes:
mutable struct SimpleBaseController <: AbstractBaseController
BTs::Vector{AbstractBarrierTransform}
Ξ::Vector{Float64}
# pre-allocate variables used in calculations
dΟdx::Vector{Float64}
vi::Vector{Float64}
function SimpleBaseController(BTs::Vector{AbstractBarrierTransform}, Ξ::Vector{Float64})
new(BTs, Ξ)
end
end
function init(specManager::SpecificationManager, bc::SimpleBaseController, agent::Agent)
# pre-allocate variables used in calculations
bc.dΟdx = Vector{Float64}(undef, agent.dynamics.n)
bc.vi = Vector{Float64}(undef, agent.dynamics.m)
end
function calcInput!(specManager::SpecificationManager, bc::SimpleBaseController, agent::Agent, Ο::Vector{Float64}, tIndex::Int, t::Float64, u::Vector{Float64})
u .= 0.0
asum = 0.0
for i = 1:specManager.M
specManager.TSs[i].AP.dΟdx!(specManager.TSs[i].AP, agent.x, bc.dΟdx)
ΞΊ, Ξ = calcΞΊ(bc.BTs[i], specManager.TSs[i].Ξ³[tIndex], Ο[i], t)
agent.dynamics.applyG!(agent.x, bc.dΟdx, bc.vi)
if (Ο[i] < Ξ)
ai = (Ξ - Ο[i])/(Ξ - specManager.TSs[i].Ξ³[tIndex])
u .+= (ai*ΞΊ/(bc.Ξ[i] + dot(bc.vi,bc.vi)))*bc.vi
else
ai = 0.0
end
asum += ai
end
if (asum > 0)
u ./= asum
end
end