I’m really surprised by some heap allocations being made within a package I maintain. I don’t expect them because:
- I’ve use Cthulhu to ensure that all variables in the code are concretely typed.
- The variables in question are all fairly small (192 bytes max!), including one Int64.
- Some of the variables are mutable structs, but none of these escape the function in which they’re created.
- Several of the variables are immutable return values, and annotating the return type of the relevant functions makes no difference; they’re concretely typed anyway.
As well as using Cthulhu, I’ve used the allocation profiler to see where and what the allocations are, but given what I said above, I’m unable to understand why the allocations are occurring (in Julia Version 1.12.6). It’s my belief that I should be able to avoid them, and that the code should (in principle) require no heap allocations at all.
Here is a screenshot of the allocations being made:
Does anyone have any further tips for understanding the cause of such allocations?
If anyone wants to recreate the problem, you can do so with the following code, which should (using latest NLLSsolver, v4.0.5) produce the same graph as above:
using NLLSsolver, Random, Static, StaticArrays, LinearAlgebra, Profile, PProf
# Simple affine projection transform
NLLSsolver.generatemeasurement(pose::EuclideanVector{6, T}, X::EuclideanVector{3, U}) where {T, U} = SVector(dot(@inbounds(view(pose, NLLSsolver.SR(1, 3))), X), dot(@inbounds(view(pose, NLLSsolver.SR(4, 6))), X))
MyResType = SimpleError2{2, Float64, EuclideanVector{6, Float64}, EuclideanVector{3, Float64}}
function create_ba_problem(ncameras, nlandmarks, propvisible)
problem = NLLSProblem(Union{EuclideanVector{6, Float64}, EuclideanVector{3, Float64}}, MyResType)
# Generate the cameras on a unit sphere, pointing to the origin
camoffset = SVector(1.0, 0.0, 0.0, 0.0, 1.0, 0.0)
for i = 1:ncameras
addvariable!(problem, randn(EuclideanVector{6, Float64}) .+ camoffset)
end
# Generate the landmarks in a unit cube centered on the origin
lmoffset = SVector(-0.5, -0.5, 10.0)
for i = 1:nlandmarks
addvariable!(problem, rand(EuclideanVector{3, Float64}) .+ lmoffset)
end
# Generate the measurements
visibility = abs.(repeat(vec(1:ncameras), outer=(1, nlandmarks)) .- LinRange(2, ncameras-1, nlandmarks)')
visibility = visibility .<= sort(vec(visibility))[Int(ceil(length(visibility)*propvisible))]
for camind = 1:ncameras
camera = problem.variables[camind]::EuclideanVector{6, Float64}
for (landmark, tf) in enumerate(view(visibility, camind, :)')
if tf
landmarkind = landmark + ncameras
addcost!(problem, SimpleError2{EuclideanVector{6, Float64}, EuclideanVector{3, Float64}}(generatemeasurement(camera, problem.variables[landmarkind]::EuclideanVector{3, Float64}), camind, landmarkind))
end
end
end
# Return the NLLSProblem
return problem
end
function perturb_ba_problem(problem, pointnoise, posenoise)
for ind in 1:lastindex(problem.variables)
if isa(problem.variables[ind], EuclideanVector{3, Float64})
problem.variables[ind]::EuclideanVector{3, Float64} += randn(SVector{3, Float64}) * pointnoise
else
problem.variables[ind]::EuclideanVector{6, Float64} += randn(SVector{6, Float64}) * posenoise
end
end
return problem
end
Random.seed!(1)
problem = create_ba_problem(3, 1, 1.0)
problem = perturb_ba_problem(problem, 0.003, 0.0)
options = NLLSOptions(numthreads = static(1))
function myfun(problem, options)
result = optimize!(problem, options, 4)
problem = perturb_ba_problem(problem, 0.003, 0.0)
Profile.clear_malloc_data()
Profile.Allocs.clear()
Profile.Allocs.@profile sample_rate=1.0 optimize!(problem, options, 4)
end
myfun(problem, options)
PProf.Allocs.pprof(from_c = false)
