julia> function f()
a = 1
function g()
a += 1
return a
end
return g()+a+2
end
f (generic function with 1 method)
julia> @code_warntype f()
Variables:
a@_2::Core.Box
g::getfield(, Symbol("#g#1"))
a@_4::Any
Body:
begin
a@_2::Core.Box = $(Expr(:new, :(Core.Box)))
(Core.setfield!)(a@_2::Core.Box, :contents, 1)::Int64
#= line 3 =#
g::getfield(, Symbol("#g#1")) = $(Expr(:new, :(Main.#g#1), :(a@_2)))
#= line 7 =#
SSAValue(0) = $(Expr(:invoke, MethodInstance for (::getfield(, Symbol("#g#1")))(), :(g)))::Any
unless (Core.isdefined)(a@_2::Core.Box, :contents)::Bool goto 9
goto 12
9:
NewvarNode(:(a@_4::Any))
a@_4::Any
12:
SSAValue(1) = (Core.getfield)(a@_2::Core.Box, :contents)::Any
# meta: location operators.jl + 469
SSAValue(2) = ((SSAValue(0) + SSAValue(1))::Any + 2)::Any
# meta: pop location
return SSAValue(2)
end::Any
So an inner function should not change an outer function’s variable, or any such changes should be communicated through function arguments and returned objects, not by modifying the global variable directly. The let workaround seems to work only for read-only cases where the inner function is reading but not modifying the outer function’s variable.
Are there other styles you would not recommend to ensure type stability with little or no changes to the initial coding attempt? I am interested to hear your thoughts on this.
There’s nothing wrong with that style of programming, the compiler just happens to be confused by it for now, but of course we’d like to address that at some point. I would argue that this example is not actually type unstable since the result type is completely predictable from the input types. The only issue is that the compiler doesn’t know that.
The closure bug is the #1 performance killer in my Julia code (mainly mathematical models with lots of parameters & variables using JuMP and DifferentialEquations). In fact, I’d argue that it’s so hard to avoid in these applications that we should consider acknowledging the bug in the main Julia docs with some tips for workarounds, especially if it’s still alive and kicking in the 1.0 release.
The bug doesn’t only trigger for explicit closures (inner functions), but also array comprehensions and generator expressions. I actually wrote this code snippet yesterday just to avoid the bug in a hot inner loop:
# y = sum((a[k] + b[k]*T)*x^k for k=1:5)
y = (a[1] + b[1]*T)*x^1 +
(a[2] + b[2]*T)*x^2 +
(a[3] + b[3]*T)*x^3 +
(a[4] + b[4]*T)*x^4 +
(a[5] + b[5]*T)*x^5
I can’t even begin to express the pain my DRY OCD causes me every time I see this in my code. (Yes, I know I could have wrapped it in a let block instead, but that is painful too. EDIT: Vectorization is also an alternative for the expression above, but it would become too messy in my real code.)
Have you tried making a mutable struct callable to emulate a closure? That usually works for me. It is also easier to debug. Of course more hassle to set up than a closure.
Hmm I doubt it can always (easily) replace the above programming style if there are multiple closures sharing the same variables and modifying them. But entertaining the possibility:
julia> mutable struct G
a
end
julia> function f()
g = G(1)
function (g::G)()
g.a += 1
return g.a
end
return g()+g.a+2
end
f (generic function with 1 method)
julia> @code_warntype f()
Variables:
#self#::#f
g::G
Body:
begin
g::G = $(Expr(:new, :(Main.G), 1)) # line 3:
$(Expr(:method, false, :((Core.svec)((Core.svec)(Main.G)::Any, (Core.svec)()::Any)::Any), CodeInfo(:(begin # REPL[2], line 4:
SSAValue(0) = ((Core.getfield)(g::Any, :a)::Any + 1)::Any
(Core.setfield!)(g::Any, :a, (Base.convert)((Core.fieldtype)((Core.typeof)(g::Any)::Any, :a)::Any, SSAValue(0))::Any)::Any # line 5:
return (Core.getfield)(g::Any, :a)::Any
end::Any)), false)) # line 7:
SSAValue(1) = (g::G)()::Any
SSAValue(0) = (Core.getfield)(g::G, :a)::Any
$(Expr(:inbounds, false))
# meta: location operators.jl + 424
SSAValue(2) = ((SSAValue(1) + SSAValue(0))::Any + 2)::Any
# meta: pop location
$(Expr(:inbounds, :pop))
return SSAValue(2)
end::Any
This transformation is always possible since that’s exactly how closures are implemented internally. It may be slightly less convenient since you have to define the type yourself, but otherwise it’s exactly the same.
Since this is the case, is there some benefit to defining your own (immutable) struct for read-only captured variables instead of using a closure? I believe structs are supposed to be more performant than mutable structs. Usually when I use a closure I have no want to modify the captured variables.
For example, I rewrote this to return a new G instead of mutating the contents.
julia> struct G
a::Int
end
julia> function (g::G)()
newg = G(g.a + 1)
return newg
end
julia> function f()
g = G(1)
g = g()
return g.a + g.a + 2
end
f (generic function with 1 method)
julia> f()
6
julia> @code_warntype f()
Variables:
#self# <optimized out>
g::G
newg::G
Body:
begin
g::G = $(Expr(:new, :(Main.G), 1)) # line 3:
$(Expr(:inbounds, false))
# meta: location REPL[2] G 2
newg::G = $(Expr(:new, :(Main.G), :((Base.add_int)((Core.getfield)(g, :a)::Int64, 1)::Int64)))
# meta: pop location
$(Expr(:inbounds, :pop))
g::G = newg::G # line 4:
return (Base.add_int)((Base.add_int)((Core.getfield)(g::G, :a)::Int64, (Core.getfield)(g::G, :a)::Int64)::Int64, 2)::Int64
end::Int64
julia> using BenchmarkTools
julia> @btime f()
1.817 ns (0 allocations: 0 bytes)
6
Well on nightly this particular case is completely constant folded:
julia> @code_llvm f()
; Function f
; Location: REPL[1]:2
define i64 @julia_f_53210() {
top:
; Location: REPL[1]:7
ret i64 6
}
The compiler is getting better and better, so we’ll have to be more careful about ensuring that the simplified example is still representative of the actual use case. I would check that these results extrapolate to more complicated fs and gs.
using BenchmarkTools
struct G
a::Int
b::Int
end
@noinline (g::G)() = G(sum(i*g.b for i in 1:5), sum(i*g.a for i in 1:5))
function f()
g = G(1,3)
g = g()
g = G(g.a*g.b, g.b)
return g.a + g.b + 2
end
@btime f()
# 17.144 ns (0 allocations: 0 bytes)
using BenchmarkTools
mutable struct G
a::Int
b::Int
end
@noinline function (g::G)()
g.a = sum(i*g.b for i in 1:5)
g.b = sum(i*g.a for i in 1:5)
return
end
function f()
g = G(1,3)
g()
g.a *= g.b
return g.a + g.b + 2
end
@btime f()
# 23.755 ns (5 allocations: 128 bytes)
using BenchmarkTools
function f()
a = Ref(1)
b = Ref(3)
@noinline function g()
a[] = sum(i*b[] for i in 1:5)
b[] = sum(i*a[] for i in 1:5)
return
end
g()
a[] *= b[]
return a[] + b[] + 2
end
@btime f()
# 31.792 ns (7 allocations: 160 bytes)
This ranking is similar to that obtained by running the simple example on v0.6.1, so it seems that using the callable immutable struct in more complicated cases may actually be worth it.
using BenchmarkTools
struct G
a::Int
b::Int
end
@noinline function (g::G)(c)
a = sum(i*g.b for i in 1:c)
b = sum(i*a for i in 1:c)
return G(a, b)
end
function f(c1, c2, c3)
g = G(c1,c2)
g = g(c3)
g = G(g.a*g.b, g.b)
return g.a + g.b + 2
end
@btime f(1, 3, 5)
# 17.144 ns (0 allocations: 0 bytes)
Mutable:
using BenchmarkTools
mutable struct G
a::Int
b::Int
end
@noinline function (g::G)(c)
g.a = sum(i*g.b for i in 1:c)
g.b = sum(i*g.a for i in 1:c)
return
end
function f(c1, c2, c3)
g = G(c1,c2)
g(c3)
g.a *= g.b
return g.a + g.b + 2
end
@btime f(1, 3, 5)
# 23.766 ns (5 allocations: 128 bytes)
Ref:
using BenchmarkTools
function f(c1, c2, c3)
a = Ref(c1)
b = Ref(c2)
@noinline function g(c)
a[] = sum(i*b[] for i in 1:c)
b[] = sum(i*a[] for i in 1:c)
return
end
g(c3)
a[] *= b[]
return a[] + b[] + 2
end
@btime f(1, 3, 5)
# 32.289 ns (7 allocations: 160 bytes)