Using do with file handle somehow causes allocations?

I was implementing some code regarding my prev question and came across some behavior that I can’t wrap my head around when reading chunks from a file. I put the reduced code below. Actually readbytes! doesn’t make the difference, just iterating over buffer in do_open seems to somehow cause these allocations.

function do_open(f::String)
    b_size = 100_000_000
    buffer=zeros(UInt8, b_size)
    index_track = 1
    open(f, "r") do handle
        for i in 1:10 
            readbytes!(handle, buffer, b_size)
            for elem in buffer 
                index_track +=1 
            end 
        end 
    end 
    return index_track
end
    
function regular_open(f::String)
    b_size = 100_000_000
    buffer=zeros(UInt8, b_size)
    index_track = 1
    handle = open(f, "r")
    for i in 1:10 
        readbytes!(handle, buffer, b_size)
        for elem in buffer 
           index_track +=1 
        end 
    end 
    close(handle)   
    return index_track
end

function gen_test()
    test_handle = open("remove_test.txt", "w")
    write(test_handle, "ABCDEFGHIJKLMNOPQ"^1_000_000)
    close(test_handle)
end

gen_test()
@btime regular_open("remove_test.txt")
@btime do_open("remove_test.txt")

Giving:

40.369 ms (13 allocations: 95.37 MiB)
26.129 s (999999505 allocations: 14.99 GiB)

Am I doing something stupid here causing this behavior?


Some research
I noticed when running @code_warntype that index_tracking gets some ::Core.Box warning in red:

MethodInstance for do_open(::String)
  from do_open(f::String) in Main at REPL[1]:1
Arguments
  #self#::Core.Const(do_open)
  f::String
Locals
  #1::var"#1#2"{Vector{UInt8}, Int64}
  index_track@_4::Core.Box # <-- HERE
  buffer::Vector{UInt8}
  b_size::Int64
  index_track@_7::Union{}
Body::Any
1 ─       (index_track@_4 = Core.Box()) # <-- HERE
│         (b_size = 100000000)
│         (buffer = Main.zeros(Main.UInt8, b_size::Core.Const(100000000)))
│         Core.setfield!(index_track@_4, :contents, 1)
│   %5  = Main.:(var"#1#2")::Core.Const(var"#1#2")
│   %6  = Core.typeof(buffer)::Core.Const(Vector{UInt8})
│   %7  = Core.typeof(b_size::Core.Const(100000000))::Core.Const(Int64)
│   %8  = Core.apply_type(%5, %6, %7)::Core.Const(var"#1#2"{Vector{UInt8}, Int64})
│   %9  = index_track@_4::Core.Box
│   %10 = buffer::Vector{UInt8}
│         (#1 = %new(%8, %9, %10, b_size::Core.Const(100000000)))
│   %12 = #1::Core.PartialStruct(var"#1#2"{Vector{UInt8}, Int64}, Any[Core.Box, Vector{UInt8}, Core.Const(100000000)])
│         Main.open(%12, f, "r")
│   %14 = Core.isdefined(index_track@_4, :contents)::Bool
└──       goto #3 if not %14
2 ─       goto #4
3 ─       Core.NewvarNode(:(index_track@_7))
└──       index_track@_7
4 ┄ %19 = Core.getfield(index_track@_4, :contents)::Any # <-- HERE
└──       return %19

I do not see this back for the regular one:

MethodInstance for regular_open(::String)
  from regular_open(f::String) in Main at REPL[3]:1
Arguments
  #self#::Core.Const(regular_open)
  f::String
Locals
  @_3::Union{Nothing, Tuple{Int64, Int64}}
  handle::IOStream
  index_track::Int64
  buffer::Vector{UInt8}
  b_size::Int64
  @_8::Union{Nothing, Tuple{UInt8, Int64}}
  i::Int64
  elem::UInt8
Body::Int64
1 ─       (b_size = 100000000)
│         (buffer = Main.zeros(Main.UInt8, b_size::Core.Const(100000000)))
│         (index_track = 1)
│         (handle = Main.open(f, "r"))
│   %5  = (1:10)::Core.Const(1:10)
│         (@_3 = Base.iterate(%5))
│   %7  = (@_3::Core.Const((1, 1)) === nothing)::Core.Const(false)
│   %8  = Base.not_int(%7)::Core.Const(true)
└──       goto #7 if not %8
2 ┄ %10 = @_3::Tuple{Int64, Int64}
│         (i = Core.getfield(%10, 1))
│   %12 = Core.getfield(%10, 2)::Int64
│         Main.readbytes!(handle, buffer, b_size::Core.Const(100000000))
│   %14 = buffer::Vector{UInt8}
│         (@_8 = Base.iterate(%14))
│   %16 = (@_8 === nothing)::Bool
│   %17 = Base.not_int(%16)::Bool
└──       goto #5 if not %17
3 ┄ %19 = @_8::Tuple{UInt8, Int64}
│         (elem = Core.getfield(%19, 1))
│   %21 = Core.getfield(%19, 2)::Int64
│         (index_track = index_track + 1)
│         (@_8 = Base.iterate(%14, %21))
│   %24 = (@_8 === nothing)::Bool
│   %25 = Base.not_int(%24)::Bool
└──       goto #5 if not %25
4 ─       goto #3
5 ┄       (@_3 = Base.iterate(%5, %12))
│   %29 = (@_3 === nothing)::Bool
│   %30 = Base.not_int(%29)::Bool
└──       goto #7 if not %30
6 ─       goto #2
7 ┄       Main.close(handle)
└──       return index_track

Also when running --track-allocation=all the allocs in do_open indeed seem to happen at index_track +=1:

        - function do_open(f::String)
        -     b_size = 100_000_000
        -     buffer=zeros(UInt8, b_size)
        -     index_track = 1
        -     open(f, "r") do handle
        0         for i in 1:10 
        0             readbytes!(handle, buffer, b_size)
        0             for elem in buffer 
4205588144                 index_track +=1 
        -             end 
        -         end 
        -     end 
        -     return index_track
        - end

So probably somehow a type instability gets triggered (like mentioned here), but I don’t get how the do handle makes this difference, or am I misusing it?

The do construction creates an anonymous function which is a closure, and you probably get hit by the infamous issue performance of captured variables in closures · Issue #15276 · JuliaLang/julia · GitHub.

1 Like

The easiest workaround in this case is to move the variables into to the do block so you don’t need to capture them at all:

function do_open(f::String)
    open(f, "r") do handle
        b_size = 100_000_000
        buffer = zeros(UInt8, b_size)
        index_track = 1
        for i in 1:10 
            readbytes!(handle, buffer, b_size)
            for elem in buffer 
                index_track +=1 
            end 
        end
        index_track
    end 
end

In particular, I think it’s index_track being captured that causes the allocations here - I can’t check right now, but using index_track = Ref(1) and index_track[] for reading/writing should work too.

1 Like

Yeah that seems to solve it, at least with @btime it does show 15 allocs, with @time it’s still around 3.4K compared to 13 with the regular handle.