Hi there
I spotred possibly a strange bug using Tullio on CPU. Here is my package status (Julia-1.8.2)
[bdcacae8] LoopVectorization v0.12.141
[bc48ee85] Tullio v0.3.5
And my minimal non-wroking example
using LoopVectorization, Tullio
function mytest_tullio(wdelta,probnew)
q = size(wdelta,3)
@tullio mat[a,b,j] := wdelta[j,m,a]*probnew[m,a] (a in 1:q, b in 1:q)
end
to generate the error (at least on a linux machine see below for more info)
julia> a = rand(2,3,4); b= rand(3,4); mytest_tullio(a,b)
here below the output
raised error
ERROR: MethodError: no method matching reduced_add(::Float64, ::VectorizationBase.Vec{8, Float64})
Closest candidates are:
reduced_add(::Union{Bool, Float16, Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, SIMDTypes.Bit}, ::Union{Bool, Float16, Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8, SIMDTypes.Bit}) at ~/.julia/packages/VectorizationBase/e4FnQ/src/llvm_intrin/intrin_funcs.jl:690
reduced_add(::VectorizationBase.AbstractSIMD{W}, ::VectorizationBase.AbstractSIMD{W}) where W at ~/.julia/packages/VectorizationBase/e4FnQ/src/llvm_intrin/intrin_funcs.jl:691
reduced_add(::VectorizationBase.AbstractSIMD, ::VectorizationBase.AbstractSIMD) at ~/.julia/packages/VectorizationBase/e4FnQ/src/llvm_intrin/intrin_funcs.jl:692
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/LoopVectorization/DDH6Z/src/reconstruct_loopset.jl:965 [inlined]
[2] _turbo_!(::Val{(false, 0, 0, 0, false, 8, 64, 32, 64, 0x0000000000000001, 1, true)}, ::Val{(Symbol("##DROPPED#CONSTANT##"), Symbol("##DROPPED#CONSTANT##"), LoopVectorization.OperationStruct(0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0001, 0x00), Symbol("##GLOBAL##CONSTANT##"), :nothing, LoopVectorization.OperationStruct(0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0002, 0x00), Symbol("##DROPPED#CONSTANT##"), Symbol("##DROPPED#CONSTANT##"), LoopVectorization.OperationStruct(0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0003, 0x00), :numericconstant, Symbol("###zero###18###"), LoopVectorization.OperationStruct(0x00000000000000000000000000000123, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0004, 0x00), :LoopVectorization, :conditionalload, LoopVectorization.OperationStruct(0x00000000000000000000000000000321, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000006, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0005, 0x01), :LoopVectorization, :LOOPCONSTANTINSTRUCTION, LoopVectorization.OperationStruct(0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0006, 0x00), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x00000000000000000000000000000143, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0007, 0x02), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x00000000000000000000000000000043, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0008, 0x03), :numericconstant, Symbol("###reduction##zero###29###"), LoopVectorization.OperationStruct(0x00000000000000000000000000000321, 0x00000000000000000000000000000000, 0x00000000000000000000000000000004, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0009, 0x00), :LoopVectorization, :vfmadd_fast, LoopVectorization.OperationStruct(0x00000000000000000000000000000143, 0x00000000000000000000000000000004, 0x00000000000000000000000000000000, 0x00000000000000000000000700080009, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.compute, 0x0009, 0x00), :LoopVectorization, :reduced_add, LoopVectorization.OperationStruct(0x00000000000000000000000000000321, 0x00000000000000000000000000000004, 0x00000000000000000000000000000000, 0x000000000000000000000000000a0005, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.compute, 0x0005, 0x00), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x00000000000000000000000000000321, 0x00000000000000000000000000000004, 0x00000000000000000000000000000000, 0x0000000000000000000000000000000b, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memstore, 0x000a, 0x01))}, ::Val{(LoopVectorization.ArrayRefStruct{:β, Symbol("##vptr##_β")}(0x00000000000000000000000000010101, 0x00000000000000000000000000030201, 0x00000000000000000000000000000000, 0x00000000000000000000000000010101), LoopVectorization.ArrayRefStruct{:wdelta, Symbol("##vptr##_wdelta")}(0x00000000000000000000000000010101, 0x00000000000000000000000000010403, 0x00000000000000000000000000000000, 0x00000000000000000000000000010101), LoopVectorization.ArrayRefStruct{:probnew, Symbol("##vptr##_probnew")}(0x00000000000000000000000000000101, 0x00000000000000000000000000000403, 0x00000000000000000000000000000000, 0x00000000000000000000000000000101))}, ::Val{(0, (), (1, 3, 6), (), (), ((4, LoopVectorization.IntOrFloat), (9, LoopVectorization.IntOrFloat)), ())}, ::Val{(:j, :b, :a, :m)}, ::Val{Tuple{NTuple{4, Static.OptionallyStaticUnitRange{Static.StaticInt{0}, Int64}}, Tuple{LayoutPointers.GroupedStridedPointers{Tuple{Ptr{Float64}, Ptr{Float64}, Ptr{Float64}}, (1, 1, 1), (0, 0, 0), ((1, 2, 3), (1, 2, 3), (1, 2)), ((1, 2, 3), (4, 5, 6), (7, 8)), Tuple{Static.StaticInt{8}, Int64, Int64, Static.StaticInt{8}, Int64, Int64, Static.StaticInt{8}, Int64}, NTuple{8, Static.StaticInt{0}}}, Bool}}}, ::Int64, ::Int64, ::Int64, ::Int64, ::Ptr{Float64}, ::Ptr{Float64}, ::Ptr{Float64}, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Bool)
@ LoopVectorization ~/.julia/packages/LoopVectorization/DDH6Z/src/reconstruct_loopset.jl:965
[3] ππΈπ!
@ ~/.julia/packages/Tullio/NGyNM/src/macro.jl:1093 [inlined]
[4] tile_halves(fun!::AttentionBasedPlmDCA.var"#ππΈπ!#814", ::Type{Array{Float64}}, As::Tuple{OffsetArrays.OffsetArray{Float64, 3, Array{Float64, 3}}, Array{Float64, 3}, Matrix{Float64}, UnitRange{Int64}}, Is::Tuple{UnitRange{Int64}, UnitRange{Int64}, UnitRange{Int64}}, Js::Tuple{UnitRange{Int64}}, keep::Nothing, final::Bool)
@ Tullio ~/.julia/packages/Tullio/NGyNM/src/threads.jl:139
[5] tile_halves
@ ~/.julia/packages/Tullio/NGyNM/src/threads.jl:136 [inlined]
[6] threader
@ ~/.julia/packages/Tullio/NGyNM/src/threads.jl:65 [inlined]
[7] (::AttentionBasedPlmDCA.var"#β³πΆπβ―#817"{AttentionBasedPlmDCA.var"#ππΈπ!#814"})(wdelta::Array{Float64, 3}, probnew::Matrix{Float64}, βͺ1:qβ«::UnitRange{Int64})
@ AttentionBasedPlmDCA ~/CODE/AttentionBasedPlmDCA.jl/src/wipattention.jl:140
[8] (::Tullio.Eval{AttentionBasedPlmDCA.var"#β³πΆπβ―#817"{AttentionBasedPlmDCA.var"#ππΈπ!#814"}, AttentionBasedPlmDCA.var"#8942#ββ³πΆπβ―#816"{AttentionBasedPlmDCA.var"#βππΈπ!#815"}})(::Array{Float64, 3}, ::Vararg{Any})
@ Tullio ~/.julia/packages/Tullio/NGyNM/src/eval.jl:20
[9] mytest_tullio(wdelta::Array{Float64, 3}, probnew::Matrix{Float64})
@ AttentionBasedPlmDCA ~/CODE/AttentionBasedPlmDCA.jl/src/wipattention.jl:303
[10] top-level scope
@ REPL[17]:1
On my mac it does not error but produces a strange
julia> mytest_tullio(a,b) # on mac os x
4Γ4Γ2 OffsetArray(::Array{Float64, 3}, 1:4, 1:4, 1:2) with eltype Float64 with indices 1:4Γ1:4Γ1:2:
[:, :, 1] =
0.718021 0.718021 0.718021 0.718021
0.747013 0.747013 0.747013 0.747013
0.596645 0.596645 0.596645 0.596645
0.852708 0.852708 0.852708 0.852708
[:, :, 2] =
0.33938 0.33938 0.33938 0.33938
0.93867 0.93867 0.93867 0.93867
0.341837 0.341837 0.341837 0.341837
0.26481 0.26481 0.26481 0.26481
Which is an offset array (???) rather than a 4x4x2 Array{Float64,3}
.
This is just a minimal nonworking example; I could do it in another way. But I find it odd:
- The error on linux
- The strange offset array output on mac
Before bothering the Tullio devs, maybe some of you see some evident mistakes on my side.
Thanks
A