Is the if statement inside my for loop body being optimized out or not?

question

#1

Consider the following MWE:

abstract type AbstractSimulation end

struct Simulation <: AbstractSimulation
  ux::Array{Float64,3}
  uy::Array{Float64,3}
  uz::Array{Float64,3}
  rhsx::Array{Float64,3}
  rhsy::Array{Float64,3}
  rhsz::Array{Float64,3}
  auxx::Array{Float64,3}
  auxy::Array{Float64,3}
  auxz::Array{Float64,3}
end

struct PassiveScalarSimulation <: AbstractSimulation
  ux::Array{Float64,3}
  uy::Array{Float64,3}
  uz::Array{Float64,3}
  rhsx::Array{Float64,3}
  rhsy::Array{Float64,3}
  rhsz::Array{Float64,3}
  auxx::Array{Float64,3}
  auxy::Array{Float64,3}
  auxz::Array{Float64,3}
  ρ::Array{Float64,3}
end

function testfunction(s::T) where T<:AbstractSimulation
  scale=0.5
  mscale=-0.5
  isitscalar = T<:PassiveScalarSimulation
  for i in 1:length(s.ux)
    s.ux[i] *= scale
    s.uy[i] *= scale
    s.uz[i] *= scale

    s.auxx[i] = muladd(scale*s.uy[i],s.rhsz[i], mscale*s.uz[i]*s.rhsy[i])
    s.auxy[i] = muladd(scale*s.uz[i],s.rhsx[i], mscale*s.ux[i]*s.rhsz[i])
    s.auxz[i] = muladd(scale*s.ux[i],s.rhsy[i], mscale*s.uy[i]*s.rhsx[i])
    if isitscalar
      s.ρ[i] *= scale
      s.rhsx[i] = s.ux[i]*s.ρ[i]
      s.rhsy[i] = s.uy[i]*s.ρ[i]
      s.rhsz[i] = s.uz[i]*s.ρ[i]
    end
  end
end

The isitscalar Boolean can be known at compile time, potentially simplifying the compiled function.

Here is what I get for Julia 6.2:

julia> s1 = Simulation(rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10));

julia> s2 = PassiveScalarSimulation(rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10));

julia> @code_warntype testfunction(s1)
Variables:
  #self# <optimized out>
  s::Simulation
  i::Int64
  #temp#::Int64
  scale <optimized out>
  mscale <optimized out>
  isitscalar::Bool

Body:
  begin  # line 4:
      isitscalar::Bool = ($(Expr(:static_parameter, 1)) <: Main.PassiveScalarSimulation)::Bool # line 5:
      SSAValue(12) = (Base.arraylen)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3})::Int64
      SSAValue(13) = (Base.select_value)((Base.sle_int)(1, SSAValue(12))::Bool, SSAValue(12), (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#::Int64 = 1
      7:
      unless (Base.not_int)((#temp#::Int64 === (Base.add_int)(SSAValue(13), 1)::Int64)::Bool)::Bool goto 36
      SSAValue(14) = #temp#::Int64
      SSAValue(15) = (Base.add_int)(#temp#::Int64, 1)::Int64
      i::Int64 = SSAValue(14)
      #temp#::Int64 = SSAValue(15) # line 6:
      SSAValue(2) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, SSAValue(2), i::Int64)::Array{Float64,3} # line 7:
      SSAValue(3) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, SSAValue(3), i::Int64)::Array{Float64,3} # line 8:
      SSAValue(4) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, SSAValue(4), i::Int64)::Array{Float64,3} # line 10:
      SSAValue(5) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :auxx)::Array{Float64,3}, SSAValue(5), i::Int64)::Array{Float64,3} # line 11:
      SSAValue(6) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :auxy)::Array{Float64,3}, SSAValue(6), i::Int64)::Array{Float64,3} # line 12:
      SSAValue(7) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::Simulation, :auxz)::Array{Float64,3}, SSAValue(7), i::Int64)::Array{Float64,3} # line 13:
      unless isitscalar::Bool goto 34 # line 17:
      34:
      goto 7
      36:
      return
  end::Void

julia> @code_warntype testfunction(s2)
Variables:
  #self# <optimized out>
  s::PassiveScalarSimulation
  i::Int64
  #temp#::Int64
  scale <optimized out>
  mscale <optimized out>
  isitscalar::Bool

Body:
  begin  # line 4:
      isitscalar::Bool = ($(Expr(:static_parameter, 1)) <: Main.PassiveScalarSimulation)::Bool # line 5:
      SSAValue(12) = (Base.arraylen)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3})::Int64
      SSAValue(13) = (Base.select_value)((Base.sle_int)(1, SSAValue(12))::Bool, SSAValue(12), (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#::Int64 = 1
      7:
      unless (Base.not_int)((#temp#::Int64 === (Base.add_int)(SSAValue(13), 1)::Int64)::Bool)::Bool goto 47
      SSAValue(14) = #temp#::Int64
      SSAValue(15) = (Base.add_int)(#temp#::Int64, 1)::Int64
      i::Int64 = SSAValue(14)
      #temp#::Int64 = SSAValue(15) # line 6:
      SSAValue(2) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, SSAValue(2), i::Int64)::Array{Float64,3} # line 7:
      SSAValue(3) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, SSAValue(3), i::Int64)::Array{Float64,3} # line 8:
      SSAValue(4) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, SSAValue(4), i::Int64)::Array{Float64,3} # line 10:
      SSAValue(5) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxx)::Array{Float64,3}, SSAValue(5), i::Int64)::Array{Float64,3} # line 11:
      SSAValue(6) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxy)::Array{Float64,3}, SSAValue(6), i::Int64)::Array{Float64,3} # line 12:
      SSAValue(7) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxz)::Array{Float64,3}, SSAValue(7), i::Int64)::Array{Float64,3} # line 13:
      unless isitscalar::Bool goto 45 # line 14:
      SSAValue(8) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ρ)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :ρ)::Array{Float64,3}, SSAValue(8), i::Int64)::Array{Float64,3} # line 15:
      SSAValue(9) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ρ)::Array{Float64,3}, i::Int64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, SSAValue(9), i::Int64)::Array{Float64,3} # line 16:
      SSAValue(10) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ρ)::Array{Float64,3}, i::Int64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, SSAValue(10), i::Int64)::Array{Float64,3} # line 17:
      SSAValue(11) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ρ)::Array{Float64,3}, i::Int64)::Float64)::Float64
      (Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, SSAValue(11), i::Int64)::Array{Float64,3}
      45:
      goto 7
      47:
      return
  end::Void

So, the output doesn’t show <optimized out> for isitscalar, but the body of the functions does suggest that the if statement is being optimized out.
Also, on Julia nightly it does show <optimized out> for the isitscalar.




I know what I’m trying to do go against this performance tip, but it really helps me to avoid code repetition, since for different AbstractSimulation types most of the for loop is the same, with small addition of lines for different kinds of Simulations. It would really be cumbersome to write a different method for each type of simulation.


#2

Yeah, the result of that variable is used to decide whether to jump to 34 or…jump to 34, so I agree that it’s actually been optimized out of the loop body. Further optimization passes in the llvm and native code generation may be able to eliminate the trivially useless unless, though i don’t personally know. It’s likely not worth worrying about.

know what I’m trying to do go against this performance tip1, but it really helps me to avoid code repetition, since for different AbstractSimulation types most of the for loop is the same, with small addition of lines for different kinds of Simulations

If most of the body is the same, with small additions for different kinds of simulations, then it seems likely that you could write one generic testfunction and have it call other functions at the points where you want the two types to behave differently. Then just implement those inner functions for each of your types (some of those might even be no-op functions, but that’s fine: inlining will mean that those no-op functions don’t have a run-time cost).


#3

Correct. Most control flow optimizations happen at the LLVM IR level.