Aiming to be good citizen, I’ve followed PSA Make it easier to help you and prepared an, as much as possible, stripped down example
Not sure if that matters, I’m on Win10 using Julia 1.5.3
abstract type AbstractField{T} end
pos(this::AbstractField) ::UnitRange{Int} = this.pos
pos(this::AbstractField, start::Int) ::UnitRange{Int} = (start -= 1; start + pos(this).start : start + pos(this).stop)
start_pos(this::AbstractField) ::Int = pos(this).start
length(this::AbstractField) ::Int = pos(this).stop - pos(this).start + 1
get_bytes(this::AbstractField, buf::AbstractVector{UInt8}, start::Int=1) = view(buf, pos(this, start))
# -----------------------------------------------------------------------------
struct AsciiField <: AbstractField{AbstractString}
name::AbstractString
pos::UnitRange{Int}
end
# --------------------------------------------------------------------
struct Record
fields::Vector{AbstractField}
end
get_field(this::Record, idx::Union{AbstractString,Int}) = this.fields[idx]
# --------------------------------------------------------------------
struct FWFile
reclen::Int
buf::Vector{UInt8}
recdef::Record
end
function FWFile(; recdef::Record)
buf = repeat(b"x", 1_000_000_000)
return FWFile(105, buf, recdef)
end
function get_field(file::FWFile, idx::Union{AbstractString,Int})
return get_field(file.recdef, 1)
end
# --------------------------------------------------------------------
values_2(this::FWFile, field::AbstractString) = values_2(this, get_field(this, field))
function values_2(this::FWFile, field::AbstractField)
start::Int = start_pos(field)
reclen::Int = this.reclen
stop::Int = sizeof(this.buf) - reclen
range = start : reclen : stop
flen::Int = length(field)
@show range
return (get_bytes(field, this.buf, i) for i in range)
end
function iter_loop(file::FWFile, field::Union{AbstractField,AbstractString})
for x in values_2(file, field)
# empty
end
end
function my_test()
f1 = AsciiField("A", 1:10)
rec = Record([f1])
fwf = FWFile(recdef=rec)
@time iter_loop(fwf, f1) # (1)
@time iter_loop(fwf, get_field(fwf, "A")) # (2)
@time iter_loop(fwf, "A") # (3)
GC.gc()
end
The output on my laptop looks as follows
julia> my_test()
range = 1:105:999999841
0.105971 seconds (59.44 k allocations: 3.010 MiB)
range = 1:105:999999841
0.037077 seconds (37 allocations: 1.531 KiB)
range = 1:105:999999841
3.323590 seconds (38.10 M allocations: 1.419 GiB, 14.19% gc time)
(1) I fully understand why this version can be best optimized/spezialised by a compiler
(2) Can only be optimized at runtime, but obviously Julia does well and specializes them
(3) From my point of view, not really different to (2). I would think even easier to optimize. But something is clearly wrong here. I must have crossed some borderline, but I don’t know what that is.
My question: I did not find documentation or blogs that explain the rules (or maybe I didn’t understand them) if and when boxing/unboxing applies, or specialisation isn’t possible, leading to more allocations. I did read about function barriers, but as you can see the functions are really tiny, I’m not re-using local variables, and pretty everthing is typed. If somebody could please help me and explain to me the exact rules, or point me to the right documentation, would be very much appreciated.