I want to load several variables of different bitstypes from a packed byte vector.
For example, Int16
, Int32
, Int16
from 8-byte chunk of memory.
I’ve tried several approaches to find a most efficient way:
- Using
IOBuffer
. - Using raw pointers with manual unrolled loop.
- Using
map
with raw piointers. - Using
generated
function that produce code with raw pointers.
So, I have a couple of questions:
- Why a local IOBuffer cannot be optimized-out and still allocates?
- Why
map
is boxingind
variable from outer scope and how to deal with? - Is that possible to use type inference to write fast function without
@generated
code?
Source code:
function load_io(vect::Vector{UInt8})
io = IOBuffer(vect)
x1 = read(io, Int16)
x2 = read(io, Int32)
x3 = read(io, Int16)
x1, x2, x3
end
function load_unroll(vect::Vector{UInt8})
GC.@preserve vect begin
ind::Int = 1
ptr1::Ptr{Int16} = pointer(vect, ind)
x1 = unsafe_load(ptr1)
ind += sizeof(Int16)
ptr2::Ptr{Int32} = pointer(vect, ind)
x2 = unsafe_load(ptr2)
ind += sizeof(Int32)
ptr3::Ptr{Int16} = pointer(vect, ind)
x3 = unsafe_load(ptr3)
ind += sizeof(Int16)
end
x1, x2, x3
end
function load_map(vect::Vector{UInt8})
GC.@preserve vect begin
ind::Int = 1
out = map((Int16, Int32, Int16)) do T
p::Ptr{T} = pointer(vect, ind)
x = unsafe_load(p)
ind += sizeof(T)
x
end
end
out
end
@generated function load_gen(vect::Vector{UInt8})
exprs = Expr[]
for T in (Int16, Int32, Int16)
ex = quote
let
p::Ptr{$T} = pointer(vect, ind)
x = unsafe_load(p)
ind += sizeof($T)
x
end
end
push!(exprs, ex)
end
loop_unroll = :(tuple($(exprs...)))
out_expr = quote
GC.@preserve vect begin
ind::Int = 1
$loop_unroll
end
end
return out_expr
end
using BenchmarkTools
bytes = UInt8[0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8]
load_io(bytes) == load_unroll(bytes) == load_map(bytes) == load_gen(bytes)
@btime load_io($bytes) # 14.227 ns (1 allocation: 64 bytes)
@btime load_unroll($bytes) # 1.399 ns (0 allocations: 0 bytes)
@btime load_map($bytes) # 866.129 ns (13 allocations: 256 bytes)
@btime load_gen($bytes) # 1.399 ns (0 allocations: 0 bytes)
@code_warntype load_io(bytes)
@code_warntype load_unroll(bytes)
@code_warntype load_map(bytes) # why ind is Core.Box?
@code_warntype load_gen(bytes)