# Is the if statement inside my for loop body being optimized out or not?

Consider the following MWE:

``````abstract type AbstractSimulation end

struct Simulation <: AbstractSimulation
ux::Array{Float64,3}
uy::Array{Float64,3}
uz::Array{Float64,3}
rhsx::Array{Float64,3}
rhsy::Array{Float64,3}
rhsz::Array{Float64,3}
auxx::Array{Float64,3}
auxy::Array{Float64,3}
auxz::Array{Float64,3}
end

struct PassiveScalarSimulation <: AbstractSimulation
ux::Array{Float64,3}
uy::Array{Float64,3}
uz::Array{Float64,3}
rhsx::Array{Float64,3}
rhsy::Array{Float64,3}
rhsz::Array{Float64,3}
auxx::Array{Float64,3}
auxy::Array{Float64,3}
auxz::Array{Float64,3}
Ļ::Array{Float64,3}
end

function testfunction(s::T) where T<:AbstractSimulation
scale=0.5
mscale=-0.5
isitscalar = T<:PassiveScalarSimulation
for i in 1:length(s.ux)
s.ux[i] *= scale
s.uy[i] *= scale
s.uz[i] *= scale

if isitscalar
s.Ļ[i] *= scale
s.rhsx[i] = s.ux[i]*s.Ļ[i]
s.rhsy[i] = s.uy[i]*s.Ļ[i]
s.rhsz[i] = s.uz[i]*s.Ļ[i]
end
end
end
``````

The `isitscalar` Boolean can be known at compile time, potentially simplifying the compiled function.

Here is what I get for Julia 6.2:

``````julia> s1 = Simulation(rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10));

julia> s2 = PassiveScalarSimulation(rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10),rand(10,10,10));

julia> @code_warntype testfunction(s1)
Variables:
#self# <optimized out>
s::Simulation
i::Int64
#temp#::Int64
scale <optimized out>
mscale <optimized out>
isitscalar::Bool

Body:
begin  # line 4:
isitscalar::Bool = (\$(Expr(:static_parameter, 1)) <: Main.PassiveScalarSimulation)::Bool # line 5:
SSAValue(12) = (Base.arraylen)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3})::Int64
SSAValue(13) = (Base.select_value)((Base.sle_int)(1, SSAValue(12))::Bool, SSAValue(12), (Base.sub_int)(1, 1)::Int64)::Int64
#temp#::Int64 = 1
7:
unless (Base.not_int)((#temp#::Int64 === (Base.add_int)(SSAValue(13), 1)::Int64)::Bool)::Bool goto 36
SSAValue(14) = #temp#::Int64
i::Int64 = SSAValue(14)
#temp#::Int64 = SSAValue(15) # line 6:
SSAValue(2) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, SSAValue(2), i::Int64)::Array{Float64,3} # line 7:
SSAValue(3) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, SSAValue(3), i::Int64)::Array{Float64,3} # line 8:
SSAValue(4) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, SSAValue(4), i::Int64)::Array{Float64,3} # line 10:
SSAValue(5) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :auxx)::Array{Float64,3}, SSAValue(5), i::Int64)::Array{Float64,3} # line 11:
SSAValue(6) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :auxy)::Array{Float64,3}, SSAValue(6), i::Int64)::Array{Float64,3} # line 12:
SSAValue(7) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::Simulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::Simulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::Simulation, :auxz)::Array{Float64,3}, SSAValue(7), i::Int64)::Array{Float64,3} # line 13:
unless isitscalar::Bool goto 34 # line 17:
34:
goto 7
36:
return
end::Void

julia> @code_warntype testfunction(s2)
Variables:
#self# <optimized out>
s::PassiveScalarSimulation
i::Int64
#temp#::Int64
scale <optimized out>
mscale <optimized out>
isitscalar::Bool

Body:
begin  # line 4:
isitscalar::Bool = (\$(Expr(:static_parameter, 1)) <: Main.PassiveScalarSimulation)::Bool # line 5:
SSAValue(12) = (Base.arraylen)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3})::Int64
SSAValue(13) = (Base.select_value)((Base.sle_int)(1, SSAValue(12))::Bool, SSAValue(12), (Base.sub_int)(1, 1)::Int64)::Int64
#temp#::Int64 = 1
7:
unless (Base.not_int)((#temp#::Int64 === (Base.add_int)(SSAValue(13), 1)::Int64)::Bool)::Bool goto 47
SSAValue(14) = #temp#::Int64
i::Int64 = SSAValue(14)
#temp#::Int64 = SSAValue(15) # line 6:
SSAValue(2) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, SSAValue(2), i::Int64)::Array{Float64,3} # line 7:
SSAValue(3) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, SSAValue(3), i::Int64)::Array{Float64,3} # line 8:
SSAValue(4) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, SSAValue(4), i::Int64)::Array{Float64,3} # line 10:
SSAValue(5) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxx)::Array{Float64,3}, SSAValue(5), i::Int64)::Array{Float64,3} # line 11:
SSAValue(6) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxy)::Array{Float64,3}, SSAValue(6), i::Int64)::Array{Float64,3} # line 12:
SSAValue(7) = (Base.muladd_float)((Base.mul_float)(0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, i::Int64)::Float64, (Base.mul_float)((Base.mul_float)(-0.5, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, i::Int64)::Float64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :auxz)::Array{Float64,3}, SSAValue(7), i::Int64)::Array{Float64,3} # line 13:
unless isitscalar::Bool goto 45 # line 14:
SSAValue(8) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :Ļ)::Array{Float64,3}, i::Int64)::Float64, 0.5)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :Ļ)::Array{Float64,3}, SSAValue(8), i::Int64)::Array{Float64,3} # line 15:
SSAValue(9) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :ux)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :Ļ)::Array{Float64,3}, i::Int64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsx)::Array{Float64,3}, SSAValue(9), i::Int64)::Array{Float64,3} # line 16:
SSAValue(10) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uy)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :Ļ)::Array{Float64,3}, i::Int64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsy)::Array{Float64,3}, SSAValue(10), i::Int64)::Array{Float64,3} # line 17:
SSAValue(11) = (Base.mul_float)((Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :uz)::Array{Float64,3}, i::Int64)::Float64, (Base.arrayref)((Core.getfield)(s::PassiveScalarSimulation, :Ļ)::Array{Float64,3}, i::Int64)::Float64)::Float64
(Base.arrayset)((Core.getfield)(s::PassiveScalarSimulation, :rhsz)::Array{Float64,3}, SSAValue(11), i::Int64)::Array{Float64,3}
45:
goto 7
47:
return
end::Void

``````

So, the output doesnāt show `<optimized out>` for `isitscalar`, but the body of the functions does suggest that the if statement is being optimized out.
Also, on Julia nightly it does show `<optimized out>` for the `isitscalar`.

I know what Iām trying to do go against this performance tip, but it really helps me to avoid code repetition, since for different `AbstractSimulation` types most of the for loop is the same, with small addition of lines for different kinds of `Simulations`. It would really be cumbersome to write a different method for each type of simulation.

Yeah, the result of that variable is used to decide whether to jump to 34 orā¦jump to 34, so I agree that itās actually been optimized out of the loop body. Further optimization passes in the llvm and native code generation may be able to eliminate the trivially useless `unless`, though i donāt personally know. Itās likely not worth worrying about.

know what Iām trying to do go against this performance tip1, but it really helps me to avoid code repetition, since for different AbstractSimulation types most of the for loop is the same, with small addition of lines for different kinds of Simulations

If most of the body is the same, with small additions for different kinds of simulations, then it seems likely that you could write one generic `testfunction` and have it call other functions at the points where you want the two types to behave differently. Then just implement those inner functions for each of your types (some of those might even be no-op functions, but thatās fine: inlining will mean that those no-op functions donāt have a run-time cost).

1 Like

Correct. Most control flow optimizations happen at the LLVM IR level.

1 Like