To give some reference, this is what Julia compiles iterate
on a matrix to:
julia> @code_native iterate(a)
.text
; ┌ @ array.jl:704 within `iterate' @ array.jl:704
cmpq $0, 8(%rsi)
jle L29
; │┌ @ array.jl:728 within `getindex'
movq (%rsi), %rax
movq (%rax), %rax
; │└
; │ @ array.jl:704 within `iterate'
movq %rax, (%rdi)
movq $2, 8(%rdi)
movb $2, %dl
xorl %eax, %eax
retq
L29:
movb $1, %dl
xorl %eax, %eax
; │ @ array.jl:704 within `iterate'
retq
nopw %cs:(%rax,%rax)
; └
It is basically nothing.
And here is an interpreter session stepping through the same
julia> a = rand(2,2); @enter iterate(a)
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (rem)(1, UInt64)
1|debug> s
In rem(x, #unused#) at int.jl:464
>464 @eval rem(x::($from), ::Type{$to}) = bitcast($to, x)
About to run: (bitcast)(UInt64, 1)
1|debug>
In rem(x, #unused#) at int.jl:464
>464 @eval rem(x::($from), ::Type{$to}) = bitcast($to, x)
About to run: return 0x0000000000000001
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (-)(0x0000000000000001, 1)
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
>871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (Base.promote_typeof)(0x0000000000000001, 1)
1|debug>
In promote_typeof(x, xs) at promotion.jl:264
>264 promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
About to run: (typeof)(0x0000000000000001)
1|debug>
In promote_typeof(x, xs) at promotion.jl:264
>264 promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
About to run: (Base.promote_typeof)(1)
1|debug>
In promote_typeof(x) at promotion.jl:263
>263 promote_typeof(x) = typeof(x)
About to run: (typeof)(1)
1|debug>
In promote_typeof(x) at promotion.jl:263
>263 promote_typeof(x) = typeof(x)
About to run: return Int64
1|debug>
In promote_typeof(x, xs) at promotion.jl:264
>264 promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
About to run: (promote_type)(UInt64, Int64)
1|debug>
In promote_type(#unused#, #unused#) at promotion.jl:215
217 # and there is a fallback returning Bottom below, so the common case is
218 # promote_type(T, S) =>
219 # promote_result(T, S, result, Bottom) =>
220 # typejoin(result, Bottom) => result
>221 promote_result(T, S, promote_rule(T,S), promote_rule(S,T))
222 end
About to run: (promote_rule)(UInt64, Int64)
1|debug>
In promote_rule(#unused#, #unused#) at int.jl:628
>628 promote_rule(::Type{UInt64}, ::Type{Int64} ) = UInt64
About to run: return UInt64
1|debug>
In promote_type(#unused#, #unused#) at promotion.jl:215
217 # and there is a fallback returning Bottom below, so the common case is
218 # promote_type(T, S) =>
219 # promote_result(T, S, result, Bottom) =>
220 # typejoin(result, Bottom) => result
>221 promote_result(T, S, promote_rule(T,S), promote_rule(S,T))
222 end
About to run: (promote_rule)(Int64, UInt64)
1|debug>
In promote_rule(#unused#, #unused#) at promotion.jl:233
>233 promote_rule(::Type{<:Any}, ::Type{<:Any}) = Bottom
About to run: return Union{}
1|debug>
In promote_type(#unused#, #unused#) at promotion.jl:215
217 # and there is a fallback returning Bottom below, so the common case is
218 # promote_type(T, S) =>
219 # promote_result(T, S, result, Bottom) =>
220 # typejoin(result, Bottom) => result
>221 promote_result(T, S, promote_rule(T,S), promote_rule(S,T))
222 end
About to run: (Base.promote_result)(UInt64, Int64, UInt64, Union{})
1|debug>
In promote_result(#unused#, #unused#, #unused#, #unused#) at promotion.jl:239
>239 promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@_inline_meta; promote_type(T,S))
About to run: (promote_type)(UInt64, Union{})
1|debug>
In promote_type(#unused#, #unused#) at promotion.jl:211
>211 promote_type(::Type{T}, ::Type{Bottom}) where {T} = T
About to run: return UInt64
1|debug>
In promote_result(#unused#, #unused#, #unused#, #unused#) at promotion.jl:239
>239 promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@_inline_meta; promote_type(T,S))
About to run: return UInt64
1|debug>
In promote_type(#unused#, #unused#) at promotion.jl:215
217 # and there is a fallback returning Bottom below, so the common case is
218 # promote_type(T, S) =>
219 # promote_result(T, S, result, Bottom) =>
220 # typejoin(result, Bottom) => result
>221 promote_result(T, S, promote_rule(T,S), promote_rule(S,T))
222 end
About to run: return UInt64
1|debug>
In promote_typeof(x, xs) at promotion.jl:264
>264 promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
About to run: return UInt64
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
>872 aT, bT = a % T, b % T
873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (rem)(0x0000000000000001, UInt64)
1|debug>
In rem(x, #unused#) at int.jl:493
>493 rem(x::T, ::Type{T}) where {T<:Integer} = x
About to run: return 0x0000000000000001
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
>872 aT, bT = a % T, b % T
873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (rem)(1, UInt64)
1|debug>
In rem(x, #unused#) at int.jl:464
>464 @eval rem(x::($from), ::Type{$to}) = bitcast($to, x)
About to run: (bitcast)(UInt64, 1)
1|debug>
In rem(x, #unused#) at int.jl:464
>464 @eval rem(x::($from), ::Type{$to}) = bitcast($to, x)
About to run: return 0x0000000000000001
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
>873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (tuple)(0x0000000000000001, 1)
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
>873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (tuple)(0x0000000000000001, 0x0000000000000001)
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
>873 not_sametype((a, b), (aT, bT))
874 return $op(aT, bT)
875 end
About to run: (Base.not_sametype)((0x0000000000000001, 1), (0x0000000000000001, 0x0000000000000001))
1|debug>
In not_sametype(x, y) at promotion.jl:304
>304 not_sametype(x, y) = nothing
About to run: return
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
873 not_sametype((a, b), (aT, bT))
>874 return $op(aT, bT)
875 end
About to run: (-)(0x0000000000000001, 0x0000000000000001)
1|debug>
In -(x, y) at int.jl:52
>52 (-)(x::T, y::T) where {T<:BitInteger} = sub_int(x, y)
About to run: (sub_int)(0x0000000000000001, 0x0000000000000001)
1|debug>
In -(x, y) at int.jl:52
>52 (-)(x::T, y::T) where {T<:BitInteger} = sub_int(x, y)
About to run: return 0x0000000000000000
1|debug>
In -(a, b) at int.jl:871
870 @eval function $op(a::Integer, b::Integer)
871 T = promote_typeof(a, b)
872 aT, bT = a % T, b % T
873 not_sametype((a, b), (aT, bT))
>874 return $op(aT, bT)
875 end
About to run: return 0x0000000000000000
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (length)([0.1916999763887164 0.5944419303038921; 0.3440468271194841 0.062447438819570156])
1|debug>
In length(a) at array.jl:200
>200 length(a::Array) = arraylen(a)
About to run: (arraylen)([0.1916999763887164 0.5944419303038921; 0.3440468271194841 0.062447438819570156])
1|debug>
In length(a) at array.jl:200
>200 length(a::Array) = arraylen(a)
About to run: return 4
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (<)(0x0000000000000000, 4)
1|debug>
In <(x, y) at int.jl:430
>430 <( x::BitUnsigned, y::BitSigned ) = (y >= 0) & (x < unsigned(y))
About to run: (>=)(4, 0)
1|debug>
In >=(x, y) at operators.jl:341
>341 >=(x, y) = (y <= x)
About to run: (<=)(0, 4)
1|debug>
In <=(x, y) at int.jl:424
>424 (<=)(x::T, y::T) where {T<:BitSigned} = sle_int(x, y)
About to run: (sle_int)(0, 4)
1|debug>
In <=(x, y) at int.jl:424
>424 (<=)(x::T, y::T) where {T<:BitSigned} = sle_int(x, y)
About to run: return true
1|debug>
In >=(x, y) at operators.jl:341
>341 >=(x, y) = (y <= x)
About to run: return true
1|debug>
In <(x, y) at int.jl:430
>430 <( x::BitUnsigned, y::BitSigned ) = (y >= 0) & (x < unsigned(y))
About to run: (unsigned)(4)
1|debug>
In unsigned(x) at essentials.jl:372
>372 unsigned(x::Int) = reinterpret(UInt, x)
About to run: (reinterpret)(UInt64, 4)
1|debug>
In reinterpret(#unused#, x) at essentials.jl:417
>417 reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
About to run: (bitcast)(UInt64, 4)
1|debug>
In reinterpret(#unused#, x) at essentials.jl:417
>417 reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
About to run: return 0x0000000000000004
1|debug>
In unsigned(x) at essentials.jl:372
>372 unsigned(x::Int) = reinterpret(UInt, x)
About to run: return 0x0000000000000004
1|debug>
In <(x, y) at int.jl:430
>430 <( x::BitUnsigned, y::BitSigned ) = (y >= 0) & (x < unsigned(y))
About to run: (<)(0x0000000000000000, 0x0000000000000004)
1|debug>
In <(x, y) at int.jl:423
>423 (< )(x::T, y::T) where {T<:BitUnsigned} = ult_int(x, y)
About to run: (ult_int)(0x0000000000000000, 0x0000000000000004)
1|debug>
In <(x, y) at int.jl:423
>423 (< )(x::T, y::T) where {T<:BitUnsigned} = ult_int(x, y)
About to run: return true
1|debug>
In <(x, y) at int.jl:430
>430 <( x::BitUnsigned, y::BitSigned ) = (y >= 0) & (x < unsigned(y))
About to run: (&)(true, true)
1|debug>
In &(x, y) at bool.jl:40
>40 (&)(x::Bool, y::Bool) = and_int(x, y)
About to run: (and_int)(true, true)
1|debug>
In &(x, y) at bool.jl:40
>40 (&)(x::Bool, y::Bool) = and_int(x, y)
About to run: return true
1|debug>
In <(x, y) at int.jl:430
>430 <( x::BitUnsigned, y::BitSigned ) = (y >= 0) & (x < unsigned(y))
About to run: return true
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (getindex)(<suppressed 80 bytes of output>, 1)
1|debug>
In getindex(A, i1) at array.jl:728
>728 @eval getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)
About to run: (Core.arrayref)(true, <suppressed 80 bytes of output>, 1)
1|debug>
In getindex(A, i1) at array.jl:728
>728 @eval getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)
About to run: return 0.1916999763887164
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (+)(1, 1)
1|debug>
In +(x, y) at int.jl:53
>53 (+)(x::T, y::T) where {T<:BitInteger} = add_int(x, y)
About to run: (add_int)(1, 1)
1|debug>
In +(x, y) at int.jl:53
>53 (+)(x::T, y::T) where {T<:BitInteger} = add_int(x, y)
About to run: return 2
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: (tuple)(0.1916999763887164, 2)
1|debug>
In iterate(A, i) at array.jl:704
>704 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
About to run: $(Expr(:inbounds, :pop))
1|debug>
(0.1916999763887164, 2)
That is 62 statements, each that need to be matched against. For each function call, data structures for local variables and temporary variables etc need to be setup. For each of the intrinsics a ccall into the runtime is made to evaluate it.
Julia code is designed with the fact that there is an optimizing compiler in mind. No one worries about cost of function calls or stuff like typeof
that are computed statically. On the other hand, the interpreter (with its current implementation) deals with it all. What if someone put a breakpoint in the convert(x, ::Any) = x
method? We have put in some work to make the cost per instruction not too big but as Tim already said, when there is a huge number of instructions it will take a long time no matter what. So to get to the next order of magnitude speedups, more sophisticated optimization techniques are needed.
Personally, I feel it is a bit silly to compare a ~6 man month effort into interpreting a compiled language against Python which was designed from the start for interpretation, has decades of work done on it with support of many very large companies etc.