Yes and yes. Thank you for adding the (let’s say clarification).
while your here: I tried running your example (current 1.6 nightly). What do I need to fix locally?
julia> using LoopVectorization
julia> function AmulB!(C, A, B)
@avx for m ∈ axes(C,1), n ∈ axes(C,2)
Cₘₙ = zero(eltype(C))
for k ∈ axes(B,1)
Cₘₙ += A[m,k] * B[k,n]
end
C[m,n] = Cₘₙ
end
C
end
AmulB! (generic function with 1 method)
julia> M = K = N = 4;
julia> A = rand(M, K); B = rand(K, N); C = Matrix{Float64}(undef, M, N);
julia> AmulB!(C, A, B)
ERROR: Module IR does not contain specified entry function
Stacktrace:
[1] assume
@ ~\.julia\packages\SIMDPirates\EVSvY\src\llvm_utils.jl:308 [inlined]
[2] macro expansion
@ ~\.julia\packages\LoopVectorization\pHMnJ\src\reconstruct_loopset.jl:503 [inlined]
[3] _avx_!(::Val{(0, 0, 0, 4)}, ::Type{Tuple{:numericconstant, Symbol("##zero#276"), LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000003, 0x0000000000000000, LoopVectorization.constant, 0x00, 0x01), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000013, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x01, 0x02), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000032, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x02, 0x03), :LoopVectorization, :vfmadd_fast, LoopVectorization.OperationStruct(0x0000000000000132, 0x0000000000000003, 0x0000000000000000, 0x0000000000020301, LoopVectorization.compute, 0x00, 0x01), :LoopVectorization, :identity, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000003, 0x0000000000000000, 0x0000000000000004, LoopVectorization.compute, 0x00, 0x01), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000003, 0x0000000000000000, 0x0000000000000005, LoopVectorization.memstore, 0x03, 0x04)}}, ::Type{Tuple{LoopVectorization.ArrayRefStruct{:A, Symbol("##vptr##_A")}(0x0000000000000101, 0x0000000000000103, 0x0000000000000000), LoopVectorization.ArrayRefStruct{:B, Symbol("##vptr##_B")}(0x0000000000000101, 0x0000000000000302, 0x0000000000000000), LoopVectorization.ArrayRefStruct{:C, Symbol("##vptr##_C")}(0x0000000000000101, 0x0000000000000102, 0x0000000000000000)}}, ::Type{Tuple{0, Tuple{}, Tuple{}, Tuple{}, Tuple{}, Tuple{(1, LoopVectorization.IntOrFloat)}, Tuple{}}}, ::Type{Tuple{:m, :n, :k}}, ::Tuple{VectorizationBase.StaticLowerUnitRange{1}, VectorizationBase.StaticLowerUnitRange{1}, VectorizationBase.StaticLowerUnitRange{1}}, ::VectorizationBase.PackedStridedPointer{Float64, 1}, ::VectorizationBase.PackedStridedPointer{Float64, 1}, ::VectorizationBase.PackedStridedPointer{Float64, 1})
@ LoopVectorization ~\.julia\packages\LoopVectorization\pHMnJ\src\reconstruct_loopset.jl:503
[4] AmulB!(C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64})
@ Main .\REPL[2]:2
[5] top-level scope
@ REPL[5]:1