Why does an if statement force a value to be boxed?


#1

Consider this simple function, which is type stable and always returns an Int:

function no_box(lambda::Float64)
	alpha = lambda / 30.0
	n = 50
	return sum(rand() < alpha for i in 1:n)
end
julia> @code_warntype no_box(25.0)
Variables:
  #self#::#no_box
  lambda::Float64
  #65::##65#66{Float64}
  alpha::Float64
  n::Int64

Body:
  begin
      alpha::Float64 = (Base.div_float)(lambda::Float64, 30.0)::Float64 # line 37:
      #65::##65#66{Float64} = $(Expr(:new, ##65#66{Float64}, :(alpha)))
      SSAValue(1) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(2) = $(Expr(:new, Base.Generator{UnitRange{Int64},##65#66{Float64}}, :(#65), SSAValue(1)))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##65#66{Float64}}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(2)))
  end::Int64

If the body of that function is inside an if statement, the resulting function is no longer type stable:

function forces_box(lambda::Float64)
	if lambda >= 30.0
		return 5
	else
		alpha = lambda / 30.0
		n = 50
		return sum(rand() < alpha for i in 1:n)
	end
end
julia> @code_warntype forces_box(25.0)
Variables:
  #self#::#forces_box
  lambda::Float64
  #63::##63#64
  alpha::Core.Box
  n::Int64

Body:
  begin
      NewvarNode(:(#63::##63#64))
      alpha::Core.Box = $(Expr(:new, :(Core.Box)))
      unless (Base.le_float)(30.0, lambda::Float64)::Bool goto 6 # line 26:
      return 5
      6:  # line 28:
      SSAValue(0) = (Base.div_float)(lambda::Float64, 30.0)::Float64
      (Core.setfield!)(alpha::Core.Box, :contents, SSAValue(0))::Float64 # line 30:
      #63::##63#64 = $(Expr(:new, :(Main.##63#64), :(alpha)))
      SSAValue(2) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(3) = $(Expr(:new, Base.Generator{UnitRange{Int64},##63#64}, :(#63), SSAValue(2)))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##63#64}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(3)))
  end::Any

Given that the compiler knows no_box always returns an Int, I would think the same code with the same types inside a branch of forces_box would be similarly stable. Apparently not. Why is that?

Edit: This is on Julia 0.6


#2

Moving the code following the then to it’s own function (eg, the one you’ve already written) solves the problem (EDIT: also worth pointing out that the separation stops the boxing):

julia> function forces_box2(lambda::Float64)
               if lambda >= 30.0
                       return 5
               else
                       return no_box(lambda)
               end
       end
forces_box2 (generic function with 1 method)

julia> @code_warntype forces_box2(25.0)
Variables:
  #self#::#forces_box2
  lambda::Float64
  #1::##1#2{Float64}
  alpha::Float64
  n::Int64

Body:
  begin 
      unless (Base.le_float)(30.0, lambda::Float64)::Bool goto 4 # line 3:
      return 5
      4:  # line 5:
      $(Expr(:inbounds, false))
      # meta: location REPL[1] no_box 2
      alpha::Float64 = (Base.div_float)(lambda::Float64, 30.0)::Float64 # line 4:
      #1::##1#2{Float64} = $(Expr(:new, ##1#2{Float64}, :(alpha)))
      SSAValue(1) = $(Expr(:new, UnitRange{Int64}, 1, :((Base.select_value)((Base.sle_int)(1, 50)::Bool, 50, (Base.sub_int)(1, 1)::Int64)::Int64)))
      SSAValue(2) = $(Expr(:new, Base.Generator{UnitRange{Int64},##1#2{Float64}}, :(#1), SSAValue(1)))
      # meta: pop location
      $(Expr(:inbounds, :pop))
      return $(Expr(:invoke, MethodInstance for mapfoldl(::Base.#identity, ::Function, ::Base.Generator{UnitRange{Int64},##1#2{Float64}}), :(Base.mapfoldl), :(Base.identity), :(Base.+), SSAValue(2)))
  end::Int64

#3

Good to know, thanks. But why is it necessary in the first place?


#4

This code is type-stable:

function forces_box(lambda::Float64)
    alpha = lambda / 30.0
	if lambda >= 30.0
		return 5
	else
		n = 50
		return sum(rand() < alpha for i in 1:n)
	end
end

The rand() < alpha expression in the sum generator is a closure. My understanding is that because of the if, the compiler isn’t quite sure whether alpha will be bound or not when it’s used, and so generates suboptimal code. See also https://github.com/JuliaLang/julia/issues/15276