Sorting in-place several Vector's simultaneously

What’s happening here is that @btime repeats the code block sort!(Zip2(x, y)) multiple times and reports the lowest execution time. But after the first run x and y are already sorted, so subsequent runs will be much faster. In this case you should use setup=..., which will be executed before every run, to set x and y.

Also, the default alg seems to perform really poorly here. (The default by/lt is lexicographical, so essentially the same as by=first.)

julia> @btime sort!(Zip2(xc,yc)) setup=(t=deepcopy.(($x, $y)); xc = t[1]; yc = t[2])  # (or just xc=deepcopy($x); yc=deepcopy($y)
  7.259 s (132638930 allocations: 4.03 GiB)
10000000-element Zip2{Int64, Float64}:
 (1, 6.993507e6)
 (1, 7.686177e6)
 (2, 3.0)
 (2, 2.562542e6)
...

julia> @btime sort!(Zip2(xc,yc), alg=QuickSort) setup=(t=deepcopy.(($x, $y)); xc = t[1]; yc = t[2])  # (or just xc=deepcopy($x); yc=deepcopy($y)
  838.394 ms (0 allocations: 0 bytes)
10000000-element Zip2{Int64, Float64}:
 (1, 6.993507e6)
 (1, 7.686177e6)
 (2, 3.0)
 (2, 2.562542e6)
...

You can check that the problem with Zip lies within setindex!:

julia> z = Zip((x, y));

julia> @btime ($z)[1] = (2, 3.);
  29.719 ns (2 allocations: 48 bytes)

@code_warntype doesn’t show any issues, but I’m pretty sure these are hidden by the foreach. If you use the equivalent

function Base.setindex!(z::Zip, vals::Tuple, i::Int)
    for j in eachindex(vals)
        z.vec[j][i] = vals[j]
    end
end

you get

julia> @code_warntype z[1] = (2, 3.)
MethodInstance for setindex!(::Zip{Tuple{Vector{Int64}, Vector{Float64}}}, ::Tuple{Int64, Float64}, ::Int64)
  from setindex!(z::Zip, vals::Tuple, i::Int64) @ Main REPL[37]:1
Arguments
  #self#::Core.Const(setindex!)
  z::Zip{Tuple{Vector{Int64}, Vector{Float64}}}
  vals::Tuple{Int64, Float64}
  i::Int64
Locals
  @_5::Union{Nothing, Tuple{Int64, Int64}}
  j::Int64
Body::Nothing
1 ─ %1  = Main.eachindex(vals)::Core.Const(Base.OneTo(2))
│         (@_5 = Base.iterate(%1))
│   %3  = @_5::Core.Const((1, 1))
│   %4  = (%3 === nothing)::Core.Const(false)
│   %5  = Base.not_int(%4)::Core.Const(true)
└──       goto #4 if not %5
2 ┄ %7  = @_5::Tuple{Int64, Int64}
│         (j = Core.getfield(%7, 1))
│   %9  = Core.getfield(%7, 2)::Int64
│   %10 = j::Int64
│   %11 = Base.getindex(vals, %10)::Union{Float64, Int64}
│   %12 = Base.getproperty(z, :vec)::Tuple{Vector{Int64}, Vector{Float64}}
│   %13 = j::Int64
│   %14 = Base.getindex(%12, %13)::Union{Vector{Float64}, Vector{Int64}}
│         Base.setindex!(%14, %11, i)
│         (@_5 = Base.iterate(%1, %9))
│   %17 = @_5::Union{Nothing, Tuple{Int64, Int64}}
│   %18 = (%17 === nothing)::Bool
│   %19 = Base.not_int(%18)::Bool
└──       goto #4 if not %19
3 ─       goto #2
4 ┄       return nothing

(it’s clearer in the REPL where you get colour coding). The issue is that the compiler does not know the type of vals[j] (it’s either Int64 or Float64, but no idea which one). The same applies for z.vec[j][i]. This is also the reason that the (Float64, Float64) example does work fine.


Simply unrolling the loop gives the compiler enough information: since vals isa Tuple{Int, Float64} and z.vec isa Tuple{Vector{Int64}, Vector{Float64}}, it knows that e.g. vals[1] isa Int, as is z.vec[1][i].

function Base.setindex!(z::Zip, vals::Tuple, i::Int)
    z.vec[1][i] = vals[1]
    z.vec[2][i] = vals[2]
end
@btime sort!(Zip((x, y)), by=first, alg=QuickSort) setup=(x=rand(1:$n,$n);y=rand(1.0:$n,$n))
#   799.739 ms (0 allocations: 0 bytes)
# 10000000-element Zip{Tuple{Vector{Int64}, Vector{Float64}}}:

The downside is that to unroll the loop you have to know length(z.vec) or length(vals) (which feels like it really should be inferable from Tuple{Int, Float64}, but apparently isn’t) in advance. Zip2 solves the issue essentially in this way, by restricting to precisely two vectors x and y.


To be more generic you could add length information in the Zip type.

struct ZipN{N, T<:Tuple} <: AbstractVector{Tuple}
    vec::T
    ZipN{N, T}(vecs::T) where {N, T} = length(vecs) == N ? new(vecs) : error("Lengths don't match!")
end
	   
ZipN(vecs) = ZipN{length(vecs), typeof(vecs)}(vecs)

Base.length(z::ZipN{N}) where N = minimum(length(z.vec[i]) for i = 1:N)  # truncate to shortest vector
Base.size(z::ZipN) = (length(z),)  #  for AbstractVector compliance
Base.getindex(z::ZipN, i::Int) = map(v -> v[i], z.vec)  # Get tuple (x[i], y[i], z[i], ...)
# (Could probably exploit knowledge of N here, as we did for length)

To unroll the loop in setindex! you can use a generated function.

@generated function Base.setindex!(z::ZipN{N}, vals::Tuple, i::Int) where N
    ex = :()
    for j = 1:N
        ex = :($ex; z.vec[$j][i] = vals[$j])
	end
    return ex
end

This should then be essentially as fast as Zip2, while allowing for arbitrary many elements in the Tuples:

julia> @btime sort!(ZipN((x, y)), alg=QuickSort) setup=(x=rand(1:$n,$n);y=rand(1.0:$n,$n))
  847.480 ms (0 allocations: 0 bytes)
10000000-element ZipN{2, Tuple{Vector{Int64}, Vector{Float64}}}:
 (1, 9.908178e6)
 (2, 8.058796e6)
 (3, 8.157882e6)
 (4, 8.805712e6)
...

julia> @btime sort!(ZipN((x, y, z)), alg=QuickSort) setup=(x=rand(1:$n,$n);y=rand(1.0:$n,$n);z=rand(1:$n, $n))
  965.327 ms (0 allocations: 0 bytes)
10000000-element ZipN{3, Tuple{Vector{Int64}, Vector{Float64}, Vector{Int64}}}:
 (1, 1.640825e6, 2285048)
 (2, 429818.0, 4893870)
 (4, 4.449581e6, 3214969)
 (5, 1.02763e6, 3245078)
...
3 Likes