I’m trying to further optimize performance-critical function that locates values on a grid. Here I have tried using that the values will be sorted. And the bulk of the code is fast and has only 3 allocations, but then the final statement hits me with 2131 allocations Could you help me minimize them?
function locate_vector(
values::AbstractVector{<:Real},
gridpoints;
locate_below::Bool=false,
locate_above::Bool=false,
)
function incrementalsearch(low, value, gridpoints)
while value >= gridpoints[low + 1]
low += 1
end
return low
end
indices = ones(Integer, size(values))
weights = Vector{Tuple}(undef, size(values))
low = 1
high = length(gridpoints) - 1
for ix in eachindex(values)
if values[ix] <= gridpoints[1]
# indices[ix] = low = 1 # No need as indices and low are initialized to 1
if !locate_below
weights[ix] = (1.0, 0.0)
end
elseif values[ix] >= gridpoints[end]
indices[ix:end] .= length(gridpoints) - 1
if !locate_above
weights[ix:end] .= repeat((0.0, 1.0), length(weights[ix:end]))
end
break
elseif low == high
indices[ix] = high
else
indices[ix] = low = incrementalsearch(low, values[ix], gridpoints)
end
end
no_weights = filter(ix -> !isassigned(weights, ix), 1:length(weights))
weights[no_weights] .= map( # Crazy number of allocations
ix -> (
gridpoints[indices[ix] .+ 1] .- values[ix],
values[ix] .- gridpoints[indices[ix]]
) ./ (
gridpoints[indices[ix] .+ 1]
.- gridpoints[indices[ix]]
),
no_weights,
)
return indices, weights
end
using BenchmarkTools
gridpoints = collect(range(0.1, 0.9, 100));
values = sort(rand(101));
@btime locate_vector(values, gridpoints; locate_above=true);
# 256.400 μs (2134 allocations: 63.00 KiB)