materializes a vector of Bools of length(x), hence the slowdown.
But, there is still a bit more to optimize, as:
still searches the vector x
from the start each time, and not from the last position - this is a O(n^2) time algo and when x
grows it will get worse.
On a different matter, looking at the bin counts in the results:
julia> ans2 = bin2(x, y, xbin_edges)
([0.05250221420254687, 0.15445935346995493, 0.2509019085421154, 0.34946848820087534, 0.4489906779469514, 0.5480592397942313, 0.6459265647762795, 0.7482272000306962, 0.8492190264532677, 0.0], [0.479559946078053; 0.5409834483815402; β¦ ; 0.49189476192543485; 0.0;;], [100, 95, 95, 106, 107, 112, 99, 91, 87, 0])
julia> sum([100, 95, 95, 106, 107, 112, 99, 91, 87, 0])
892
892 isnβt a good sum, as it is, ehβ¦ less than 1000. This is a correctness bug.
Here is another version, which hopefully addresses this bug (and adds some others probs):
function bin3(
x::Vector{<:Number},
y::Array{<:Number},
xbin_edges::Union{Vector{<:Number},StepRangeLen{}};
method::F = mean,
) where {F<:Function}
issorted(xbin_edges; lt = <) || error("xbin_edges not sorted")
ndims(y) > 2 && error("y must be a vector or matrix")
size(x, 1) == size(y, 1) || error("x and y must have the same number of rows")
# sort along x for faster binning
if !issorted(x)
p = sortperm(x)
x = x[p]
y = y[p]
end
# initialize outputs
n_bin = length(xbin_edges) - 1
ind = zeros(Int, n_bin + 1)
i, bi = 1, 1
for be in xbin_edges
ff = findfirst(β₯(be), @view x[i:end])
if isnothing(ff)
ind[bi] = length(x)
break
end
ind[bi] = ff + i - 1
bi += 1
i += ff
end
last_bin = bi - 1
x_binned = zeros(eltype(x), n_bin)
y_binned = zeros(eltype(y), n_bin, size(y, 2))
bin_count = zeros(Int, size(x_binned))
# find binned values
for i = 1:last_bin
bin_count[i] = ind[i+1] - ind[i]
x_binned[i] = method(@view x[ind[i]:(ind[i+1]-1)])
y_binned[i, :] = method((@view y[ind[i]:(ind[i+1]-1), :]), dims = 1)
end
return x_binned, y_binned, bin_count
end
Edit: As PeterSimon pointed out, using findnext
would be more readable (should have remembered this).