Why does an if statement force a value to be boxed?

Consider this simple function, which is type stable and always returns an Int:

function no_box(lambda::Float64)
	alpha = lambda / 30.0
	n = 50
	return sum(rand() < alpha for i in 1:n)
end
julia> @code_warntype no_box(25.0)
Variables:
  #self#::#no_box
  lambda::Float64
  #65::##65#66{Float64}
  alpha::Float64
  n::Int64

Body:
  begin
      alpha::Float64 = (Base.div_float)(lambda::Float64, 30.0)::Float64 # line 37:
      #65::##65#66{Float64} = $(Expr(:new, ##65#66{Float64}, :(alpha)))
      SSAValue(1) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(2) = $(Expr(:new, Base.Generator{UnitRange{Int64},##65#66{Float64}}, :(#65), SSAValue(1)))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##65#66{Float64}}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(2)))
  end::Int64

If the body of that function is inside an if statement, the resulting function is no longer type stable:

function forces_box(lambda::Float64)
	if lambda >= 30.0
		return 5
	else
		alpha = lambda / 30.0
		n = 50
		return sum(rand() < alpha for i in 1:n)
	end
end
julia> @code_warntype forces_box(25.0)
Variables:
  #self#::#forces_box
  lambda::Float64
  #63::##63#64
  alpha::Core.Box
  n::Int64

Body:
  begin
      NewvarNode(:(#63::##63#64))
      alpha::Core.Box = $(Expr(:new, :(Core.Box)))
      unless (Base.le_float)(30.0, lambda::Float64)::Bool goto 6 # line 26:
      return 5
      6:  # line 28:
      SSAValue(0) = (Base.div_float)(lambda::Float64, 30.0)::Float64
      (Core.setfield!)(alpha::Core.Box, :contents, SSAValue(0))::Float64 # line 30:
      #63::##63#64 = $(Expr(:new, :(Main.##63#64), :(alpha)))
      SSAValue(2) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(3) = $(Expr(:new, Base.Generator{UnitRange{Int64},##63#64}, :(#63), SSAValue(2)))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##63#64}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(3)))
  end::Any

Given that the compiler knows no_box always returns an Int, I would think the same code with the same types inside a branch of forces_box would be similarly stable. Apparently not. Why is that?

Edit: This is on Julia 0.6

Moving the code following the then to it’s own function (eg, the one you’ve already written) solves the problem (EDIT: also worth pointing out that the separation stops the boxing):

julia> function forces_box2(lambda::Float64)
               if lambda >= 30.0
                       return 5
               else
                       return no_box(lambda)
               end
       end
forces_box2 (generic function with 1 method)

julia> @code_warntype forces_box2(25.0)
Variables:
  #self#::#forces_box2
  lambda::Float64
  #1::##1#2{Float64}
  alpha::Float64
  n::Int64

Body:
  begin 
      unless (Base.le_float)(30.0, lambda::Float64)::Bool goto 4 # line 3:
      return 5
      4:  # line 5:
      $(Expr(:inbounds, false))
      # meta: location REPL[1] no_box 2
      alpha::Float64 = (Base.div_float)(lambda::Float64, 30.0)::Float64 # line 4:
      #1::##1#2{Float64} = $(Expr(:new, ##1#2{Float64}, :(alpha)))
      SSAValue(1) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(2) = $(Expr(:new, Base.Generator{UnitRange{Int64},##1#2{Float64}}, :(#1), SSAValue(1)))
      # meta: pop location
      $(Expr(:inbounds, :pop))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##1#2{Float64}}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(2)))
  end::Int64

Good to know, thanks. But why is it necessary in the first place?

This code is type-stable:

function forces_box(lambda::Float64)
    alpha = lambda / 30.0
	if lambda >= 30.0
		return 5
	else
		n = 50
		return sum(rand() < alpha for i in 1:n)
	end
end

The rand() < alpha expression in the sum generator is a closure. My understanding is that because of the if, the compiler isn’t quite sure whether alpha will be bound or not when it’s used, and so generates suboptimal code. See also performance of captured variables in closures · Issue #15276 · JuliaLang/julia · GitHub

2 Likes