How about with this: I made a generated function that reads each field from a reinterpreted view of the buffer with the length of exactly one element. I assumed that this might be optimized quite well by the compiler because it can delete the reinterpret machinery if it’s not needed further than in the constructor.
I didn’t set up the example like you did but instead just read 100 of these objects from a buffer and add some fields together to show they can be accessed. The whole loop then takes 4.5 microseconds, so hopefully that is faster than the other solutions?
using BenchmarkTools
struct TLM
a::UInt16
b::UInt32
c::UInt32
d::UInt32
e::UInt32
f::UInt32
g::UInt32
h::UInt32
i::UInt32
j::UInt32
k::UInt32
end
@generated function struct_at_index(TType, buf::Vector{UInt8}, i)
getparam(::Type{Type{T}}) where T = T
T = getparam(TType)
fs = fieldtypes(T)
sizes = sizeof.(fs)
sz = sum(sizes)
offsets = cumsum([0; sizes[1:end-1]...])
constructor_args = map(fs, sizes, offsets) do _ft, _sz, _offset
:(reinterpret($_ft, view(buf, location + $_offset : location + $_offset + $_sz - 1))[1])
end
constructor = Expr(:call, T, constructor_args...)
quote
location = $sz * (i - 1) + 1
$constructor
end
end
buf = rand(UInt8, 42 * 100)
struct_at_index(TLM, buf, 1)
function testfunc(buf)
sum(1:100) do i
tlm = struct_at_index(TLM, buf, i)
tlm.a + tlm.d
end
end
julia> @btime testfunc($buf)
4.472 μs (0 allocations: 0 bytes)
0x0000003624182e4c