In C, I pass around pointers and lengths, constructing views with pointer arithmetic. This is annoying to debug, but quite performant. Is it possible to get equivalent performance with Julia views? I am okay with segfaulting on out of bounds reads or writes.
So far, I’ve tried three view-like approaches, and have not been able to match the performance of manual pointer handling.
My implementations and benchmarks
abstract type ViewLike{T} <: AbstractVector{T} end
struct PointerView{T} <: ViewLike{T}
ptr::Ptr{T}
len::Int
end
Base.view(v::PointerView, range::UnitRange) = PointerView(v.ptr+(first(range)-1)*sizeof(v.ptr), length(range))
Base.getindex(v::PointerView, i::Int) = unsafe_load(v.ptr, i)
Base.setindex!(v::PointerView, x, i::Int) = unsafe_store!(v.ptr, x, i)
Base.size(v::PointerView) = (v.len,)
PointerView(v::Vector) = PointerView(pointer(v), length(v))
struct LoHiView{T} <: ViewLike{T}
data::Vector{T}
lo::Int
hi::Int
end
Base.view(v::LoHiView, range::UnitRange) = LoHiView(v.data, v.lo+first(range)-1, v.lo+last(range)-1)
Base.getindex(v::LoHiView, i::Int) = @inbounds v.data[v.lo+i-1]
Base.setindex!(v::LoHiView, x, i::Int) = @inbounds v.data[v.lo+i-1] = x
Base.size(v::LoHiView) = (v.hi-v.lo+1,)
HiLoView(v::AbstractVector) = LoHiView(v, firstindex(v), lastindex(v))
struct ViewView{T} <: ViewLike{T}
data::SubArray{T, 1, Vector{T}, Tuple{UnitRange{Int64}}, true}
end
Base.view(v::ViewView, range::UnitRange) = @inbounds ViewView(view(v.data, range))
Base.getindex(v::ViewView, i::Int) = @inbounds v.data[i]
Base.setindex!(v::ViewView, x, i::Int) = @inbounds v.data[i] = x
Base.size(v::ViewView) = size(v.data)
ViewView(v::AbstractVector) = ViewView(view(v, 1:length(v)))
function merge!(target::ViewLike, source::ViewLike)
target_i = source_i = 1
source_in_target_i = length(source)+1
hi = length(target)
while target_i < source_in_target_i <= hi
if target[source_in_target_i] < source[source_i]
x = target[source_in_target_i]
source_in_target_i += 1
else
x = source[source_i]
source_i += 1
end
target[target_i] = x
target_i += 1
end
while target_i < source_in_target_i
x = source[source_i]
target[target_i] = x
source_i += 1
target_i += 1
end
end
function mergesort_to!(target::ViewLike, source::ViewLike)
len = length(source)
len > 0 || return
if len == 1
target[1] = source[1]
return
end
m = len >> 1
mergesort_to!(view(target, m+1:len), view(source, m+1:len))
mergesort!(view(source, 1:m), view(target, 1:m))
merge!(target, view(source, 1:m))
end
function mergesort!(data::ViewLike, scratch::ViewLike)
len = length(data)
len > 1 || return
m = len >> 1
mergesort!(view(data, m+1:len), scratch)
mergesort_to!(view(scratch, 1:m), view(data, 1:m))
merge!(data, view(scratch, 1:m))
end
function mergesort_pointer!(v::AbstractVector, scratch::AbstractVector=similar(v, length(v)>>1))
GC.@preserve v scratch mergesort!(PointerView(v), PointerView(scratch))
v
end
function mergesort_lohi!(v::AbstractVector, scratch::AbstractVector=similar(v, length(v)>>1))
mergesort!(HiLoView(v), HiLoView(scratch))
v
end
function mergesort_view!(v::AbstractVector, scratch::AbstractVector=similar(v, length(v)>>1))
mergesort!(ViewView(v), ViewView(scratch))
v
end
# -------------------------
function merge!(target::Ptr, source::Ptr, offset::Int, source_len::Int)
# assert source[1:source_len] does not share memory with target[1:offset+source_len]
target_i = source_i = 1
source_in_target_i = 1+offset
hi = source_len + offset
while target_i < source_in_target_i <= hi
if unsafe_load(target, source_in_target_i) < unsafe_load(source, source_i)
x = unsafe_load(target, source_in_target_i)
source_in_target_i += 1
else
x = unsafe_load(source, source_i)
source_i += 1
end
unsafe_store!(target, x, target_i)
target_i += 1
end
while target_i < source_in_target_i
x = unsafe_load(source, source_i)
unsafe_store!(target, x, target_i)
source_i += 1
target_i += 1
end
end
function mergesort_to!(target::Ptr, source::Ptr, len::Int)
len > 0 || return
if len == 1
unsafe_store!(target, unsafe_load(source, 1), 1)
return
end
m = len >> 1
mergesort_to!(target+m*sizeof(target), source+m*sizeof(source), len-m)
mergesort!(source, m, target)
merge!(target, source, m, len-m)
end
function mergesort!(source::Ptr, len::Int, scratch::Ptr)
len > 1 || return
m = len >> 1
mergesort!(source+m*sizeof(source), len-m, scratch)
mergesort_to!(scratch, source, m)
merge!(source, scratch, m, len-m)
end
function mergesort_pointer_manual!(v::AbstractVector, scratch::AbstractVector=similar(v, length(v)>>1))
GC.@preserve v scratch mergesort!(pointer(v), length(v), pointer(scratch))
v
end
# ---------------------
using Random
function test(f, n=500)
for i in 1:n
for _ in 1:n ÷ i
x = randperm(i)
sort(x) == f(copy(x)) || return x
end
end
end
function test(n::Int=500)
for f in (mergesort_pointer_manual!, mergesort_pointer!, mergesort_lohi!, mergesort_view!)
x = test(f, n)
x === nothing || return f, x
end
end
# ---------------------
using BenchmarkTools
@btime mergesort_pointer_manual!(x) setup=(x=rand(100)) evals=1;
# 2.250 μs (1 allocation: 496 bytes)
@btime mergesort_pointer!(x) setup=(x=rand(100)) evals=1;
# 2.583 μs (1 allocation: 496 bytes)
@btime mergesort_lohi!(x) setup=(x=rand(100)) evals=1;
# 3.250 μs (1 allocation: 496 bytes)
@btime mergesort_view!(x) setup=(x=rand(100)) evals=1;
# 3.333 μs (1 allocation: 496 bytes)