I recently discovered the bijou64 variable-length integer encoding. The reference implementation is in Rust, and it appears to outperform LEB128 for most encoding and decoding. While I donβt have any need for bijou64 in my own work, I saw it as a tractable challenge to learn about profiling, benchmarking, etc.[1]
I put together a small package to implement the specification.[2] I have been reading the performance tips and experimenting with variations to get closer to the Rust performance.
I noticed clear improvement by reducing the number of allocations (see this), but itβs not obvious to me where to look next. Type instability seems to be limited to Base.iterate and pretty minor situations. I added @inbounds in a few places to little effect. Without any supporting data, I was about to explore replacing reinterpret with bit operations when I decided I might have better luck reaching out to the discourse community for suggestions.
Source Code
The full source is here: Bijou64.jl/src/Bijou64.jl at main Β· KyleSJohnston/Bijou64.jl Β· GitHub.
The function in question:
"""
encode(values)
Encode `values` using the bijou64 variable-length integer encoding into a Vector{UInt8}
`values` must be a `Vector{T}`, where `T` may be UInt8, UInt16, UInt32, or UInt64.
"""
function encode(values::Vector{T})::Vector{UInt8} where {T <: UNSIGNED}
if length(values) == 0
return UInt8[]
end
# Pre-allocate an array for the results and fill it.
# `maxbytes` inspired by https://github.com/davidssmith/LittleEndianBase128.jl/blob/85f2c1e6b8041e9bcfbab897e673a0a45186d3db/src/LittleEndianBase128.jl#L38
# Because `bytes` will always be large enough for all of `values`, `@inbounds` can be
# used when indexing into `bytes`.
maxbytes = length(values) * (0x01 + value2tier(typemax(T)))
bytes = Vector{UInt8}(undef, maxbytes)
# Pre-allocate payload array for `reinterpret` in the loop.
# This avoids incurring the cost of temporary array construction
# during each iteration.
# The eltype is UInt64 because `tier2offset` always returns a UInt64.
payload = Vector{UInt64}(undef, 1)
i = firstindex(bytes)
for v in values
if v < 248 # T(248)
@inbounds bytes[i] = v
else
tier = value2tier(v)
@inbounds bytes[i] = tier2tag(tier)
payload[1] = hton(v - tier2offset(tier)) # big-endian unsigned integer
payload_bytes = @views reinterpret(UInt8, payload)[end-tier+1 : end]
for pb in payload_bytes
i = nextind(bytes, i)
@inbounds bytes[i] = pb
end
end
i = nextind(bytes, i)
end
return bytes[begin:prevind(bytes, i)]
end
Type Stability
(Bijou64) pkg> precompile
julia> using Bijou64
julia> const x = rand(UInt64(248):UInt64(65_535), 4086);
julia> @time Bijou64.encode(x);
0.000020 seconds (8 allocations: 48.141 KiB)
julia> @code_warntype Bijou64.encode(x);
MethodInstance for Bijou64.encode(::Vector{UInt64})
from encode(values::Vector{T}) where T<:Union{UInt16, UInt32, UInt64, UInt8} @ Bijou64 ~/vcs/Bijou64.jl/src/Bijou64.jl:143
Static Parameters
T = UInt64
Arguments
#self#::Core.Const(Bijou64.encode)
values::Vector{UInt64}
Locals
@_3::Union{Nothing, Tuple{UInt64, Int64}}
i::Int64
payload::Vector{UInt64}
bytes::Vector{UInt8}
maxbytes::Int64
@_8::Union{Nothing, Tuple{UInt8, Tuple{Base.OneTo{Int64}, Int64}}}
val@_9::UInt8
val@_10::UInt64
v::UInt64
payload_bytes::SubArray{UInt8, 1, Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Tuple{UnitRange{Int64}}, true}
tier::UInt8
S#277::Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}
val@_15::UInt8
pb::UInt8
@_17::Vector{UInt8}
@_18::Vector{UInt8}
Body::Vector{UInt8}
1 ββ Core.NewvarNode(:(@_3))
β Core.NewvarNode(:(i))
β Core.NewvarNode(:(payload))
β Core.NewvarNode(:(bytes))
β Core.NewvarNode(:(maxbytes))
β %6 = Bijou64.Vector::Core.Const(Vector)
β %7 = Bijou64.UInt8::Core.Const(UInt8)
β %8 = Core.apply_type(%6, %7)::Core.Const(Vector{UInt8})
β %9 = Bijou64.:(==)::Core.Const(==)
β %10 = Bijou64.length::Core.Const(length)
β %11 = (%10)(values)::Int64
β %12 = (%9)(%11, 0)::Bool
ββββ goto #6 if not %12
2 ββ %14 = Bijou64.UInt8::Core.Const(UInt8)
β %15 = Base.getindex(%14)::Vector{UInt8}
β (@_17 = %15)
β %17 = @_17::Vector{UInt8}
β %18 = (%17 isa %8)::Core.Const(true)
ββββ goto #4 if not %18
3 ββ goto #5
4 ββ Core.Const(:(@_17))
β Core.Const(:(Base.convert(%8, %21)))
ββββ Core.Const(:(@_17 = Core.typeassert(%22, %8)))
5 ββ %24 = @_17::Vector{UInt8}
ββββ return %24
6 ββ %26 = Bijou64.:*::Core.Const(*)
β %27 = Bijou64.length::Core.Const(length)
β %28 = (%27)(values)::Int64
β %29 = Bijou64.:+::Core.Const(+)
β %30 = Bijou64.value2tier::Core.Const(Bijou64.value2tier)
β %31 = Bijou64.typemax::Core.Const(typemax)
β %32 = $(Expr(:static_parameter, 1))::Core.Const(UInt64)
β %33 = (%31)(%32)::Core.Const(0xffffffffffffffff)
β %34 = (%30)(%33)::Core.Const(0x08)
β %35 = (%29)(0x01, %34)::Core.Const(0x09)
β (maxbytes = (%26)(%28, %35))
β %37 = Bijou64.Vector::Core.Const(Vector)
β %38 = Bijou64.UInt8::Core.Const(UInt8)
β %39 = Core.apply_type(%37, %38)::Core.Const(Vector{UInt8})
β %40 = Bijou64.undef::Core.Const(UndefInitializer())
β %41 = maxbytes::Int64
β (bytes = (%39)(%40, %41))
β %43 = Bijou64.Vector::Core.Const(Vector)
β %44 = Bijou64.UInt64::Core.Const(UInt64)
β %45 = Core.apply_type(%43, %44)::Core.Const(Vector{UInt64})
β %46 = Bijou64.undef::Core.Const(UndefInitializer())
β (payload = (%45)(%46, 1))
β %48 = Bijou64.firstindex::Core.Const(firstindex)
β %49 = bytes::Vector{UInt8}
β (i = (%48)(%49))
β %51 = values::Vector{UInt64}
β (@_3 = Base.iterate(%51))
β %53 = @_3::Union{Nothing, Tuple{UInt64, Int64}}
β %54 = (%53 === nothing)::Bool
β %55 = Base.not_int(%54)::Bool
ββββ goto #14 if not %55
7 ββ Core.NewvarNode(:(@_8))
β Core.NewvarNode(:(val@_9))
β Core.NewvarNode(:(val@_10))
β Core.NewvarNode(:(payload_bytes))
β Core.NewvarNode(:(tier))
β %62 = @_3::Tuple{UInt64, Int64}
β (v = Core.getfield(%62, 1))
β %64 = Core.getfield(%62, 2)::Int64
β %65 = Bijou64.:<::Core.Const(<)
β %66 = v::UInt64
β %67 = (%65)(%66, 248)::Bool
ββββ goto #9 if not %67
8 ββ nothing
β %70 = bytes::Vector{UInt8}
β %71 = v::UInt64
β %72 = i::Int64
β Base.setindex!(%70, %71, %72)
β %74 = v::UInt64
β (val@_10 = %74)
β nothing
β val@_10
ββββ goto #12
9 ββ %79 = Bijou64.value2tier::Core.Const(Bijou64.value2tier)
β %80 = v::UInt64
β (tier = (%79)(%80))
β nothing
β %83 = Bijou64.tier2tag::Core.Const(Bijou64.tier2tag)
β %84 = tier::UInt8
β %85 = (%83)(%84)::UInt8
β %86 = bytes::Vector{UInt8}
β %87 = i::Int64
β Base.setindex!(%86, %85, %87)
β (val@_9 = %85)
β nothing
β val@_9
β %92 = Bijou64.hton::Core.Const(hton)
β %93 = Bijou64.:-::Core.Const(-)
β %94 = v::UInt64
β %95 = Bijou64.tier2offset::Core.Const(Bijou64.tier2offset)
β %96 = tier::UInt8
β %97 = (%95)(%96)::UInt64
β %98 = (%93)(%94, %97)::UInt64
β %99 = (%92)(%98)::UInt64
β %100 = payload::Vector{UInt64}
β Base.setindex!(%100, %99, 1)
β %102 = Bijou64.reinterpret::Core.Const(reinterpret)
β %103 = Bijou64.UInt8::Core.Const(UInt8)
β %104 = payload::Vector{UInt64}
β %105 = (%102)(%103, %104)::Core.PartialStruct(Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Any[Vector{UInt64}, Core.Const(true), Core.Const(true)])
β (S#277 = %105)
β %107 = S#277::Core.PartialStruct(Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Any[Vector{UInt64}, Core.Const(true), Core.Const(true)])
β %108 = Bijou64.:(:)::Core.Const(Colon())
β %109 = Bijou64.:+::Core.Const(+)
β %110 = Bijou64.:-::Core.Const(-)
β %111 = S#277::Core.PartialStruct(Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Any[Vector{UInt64}, Core.Const(true), Core.Const(true)])
β %112 = (lastindex)(%111)::Int64
β %113 = tier::UInt8
β %114 = (%110)(%112, %113)::Int64
β %115 = (%109)(%114, 1)::Int64
β %116 = S#277::Core.PartialStruct(Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Any[Vector{UInt64}, Core.Const(true), Core.Const(true)])
β %117 = (lastindex)(%116)::Int64
β %118 = (%108)(%115, %117)::UnitRange{Int64}
β (payload_bytes = (Base.maybeview)(%107, %118))
β %120 = payload_bytes::Core.PartialStruct(SubArray{UInt8, 1, Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Tuple{UnitRange{Int64}}, true}, Any[Core.PartialStruct(Base.ReinterpretArray{UInt8, 1, UInt64, Vector{UInt64}, false}, Any[Vector{UInt64}, Core.Const(true), Core.Const(true)]), Tuple{UnitRange{Int64}}, Int64, Core.Const(1)])
β (@_8 = Base.iterate(%120))
β %122 = @_8::Union{Nothing, Tuple{UInt8, Tuple{Base.OneTo{Int64}, Int64}}}
β %123 = (%122 === nothing)::Bool
β %124 = Base.not_int(%123)::Bool
ββββ goto #12 if not %124
10 β %126 = @_8::Tuple{UInt8, Tuple{Base.OneTo{Int64}, Int64}}
β (pb = Core.getfield(%126, 1))
β %128 = Core.getfield(%126, 2)::Tuple{Base.OneTo{Int64}, Int64}
β %129 = Bijou64.nextind::Core.Const(nextind)
β %130 = bytes::Vector{UInt8}
β %131 = i::Int64
β (i = (%129)(%130, %131))
β nothing
β %134 = bytes::Vector{UInt8}
β %135 = pb::UInt8
β %136 = i::Int64
β Base.setindex!(%134, %135, %136)
β %138 = pb::UInt8
β (val@_15 = %138)
β nothing
β val@_15
β (@_8 = Base.iterate(%120, %128))
β %143 = @_8::Union{Nothing, Tuple{UInt8, Tuple{Base.OneTo{Int64}, Int64}}}
β %144 = (%143 === nothing)::Bool
β %145 = Base.not_int(%144)::Bool
ββββ goto #12 if not %145
11 β goto #10
12 β %148 = Bijou64.nextind::Core.Const(nextind)
β %149 = bytes::Vector{UInt8}
β %150 = i::Int64
β (i = (%148)(%149, %150))
β (@_3 = Base.iterate(%51, %64))
β %153 = @_3::Union{Nothing, Tuple{UInt64, Int64}}
β %154 = (%153 === nothing)::Bool
β %155 = Base.not_int(%154)::Bool
ββββ goto #14 if not %155
13 β goto #7
14 β %158 = bytes::Vector{UInt8}
β %159 = Bijou64.:(:)::Core.Const(Colon())
β %160 = bytes::Vector{UInt8}
β %161 = Base.firstindex(%160)::Core.Const(1)
β %162 = Bijou64.prevind::Core.Const(prevind)
β %163 = bytes::Vector{UInt8}
β %164 = i::Int64
β %165 = (%162)(%163, %164)::Int64
β %166 = (%159)(%161, %165)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β %167 = Base.getindex(%158, %166)::Vector{UInt8}
β (@_18 = %167)
β %169 = @_18::Vector{UInt8}
β %170 = (%169 isa %8)::Core.Const(true)
ββββ goto #16 if not %170
15 β goto #17
16 β Core.Const(:(@_18))
β Core.Const(:(Base.convert(%8, %173)))
ββββ Core.Const(:(@_18 = Core.typeassert(%174, %8)))
17 β %176 = @_18::Vector{UInt8}
ββββ return %176
What are the next tools or evaluations you would recommend? Are there good heuristics for knowing when further optimization is unlikely to be helpful?
Iβd appreciate any pointers or suggestions, and thanks in advance for your thoughts/links/suggestions/etc.
With a few guidelines, I find it pretty easy to write Julia code thatβs as performant as I need. I mostly try to avoid the obvious problems, and I end up mostly happy with the results. β©οΈ
The package is not current registered, but if I would submit it to the general registry if there were interest. β©οΈ