For the following code
using CUDA
using Tullio
using BenchmarkTools
A = rand(500, 500)
B = rand(500, 500)
A_d = CUDA.CuArray(A)
B_d = CUDA.CuArray(B)
@btime @tullio C_d[i,k] := A_d[i,j] * B_d[j,k]
C = Array(C_d)
I tried to run from julia 1.9.2
and I got
ERROR: LoadError: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] assertscalar(op::String)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:103
[3] getindex(::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ::Int64, ::Int64)
@ GPUArrays ~/.julia/packages/GPUArrays/5XhED/src/host/indexing.jl:9
[4] ππΈπ!
@ ~/.julia/packages/Tullio/NGyNM/src/macro.jl:1211 [inlined]
[5] ππΈπ!
@ ~/.julia/packages/Tullio/NGyNM/src/macro.jl:1041 [inlined]
[6] threader(fun!::var"#ππΈπ!#3", ::Type{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, Z::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, As::Tuple{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, Is::Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, Js::Tuple{Base.OneTo{Int64}}, redfun::Function, block::Int64, keep::Nothing)
@ Tullio ~/.julia/packages/Tullio/NGyNM/src/eval.jl:104
[7] (::var"#β³πΆπβ―#6"{var"#ππΈπ!#3"})(A_d::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, B_d::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Main ~/.julia/packages/Tullio/NGyNM/src/macro.jl:807
[8] (::Tullio.Eval{var"#β³πΆπβ―#6"{var"#ππΈπ!#3"}, var"#14#ββ³πΆπβ―#5"{var"#βππΈπ!#4"}})(::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, ::Vararg{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}})
@ Tullio ~/.julia/packages/Tullio/NGyNM/src/eval.jl:20
[9] macro expansion
@ ~/.julia/packages/Tullio/NGyNM/src/macro.jl:976 [inlined]
[10] var"##core#297"()
@ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:489
[11] var"##sample#298"(::Tuple{}, __params::BenchmarkTools.Parameters)
@ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:495
[12] _run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; verbose::Bool, pad::String, kwargs::Base.Pairs{Symbol, Integer, NTuple{4, Symbol}, NamedTuple{(:samples, :evals, :gctrial, :gcsample), Tuple{Int64, Int64, Bool, Bool}}})
@ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:99
[13] #invokelatest#2
@ ./essentials.jl:818 [inlined]
[14] invokelatest
@ ./essentials.jl:813 [inlined]
[15] #run_result#45
@ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]
[16] run_result
@ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]
[17] run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; progressid::Nothing, nleaves::Float64, ndone::Float64, kwargs::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:verbose, :samples, :evals, :gctrial, :gcsample), Tuple{Bool, Int64, Int64, Bool, Bool}}})
@ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117
[18] run (repeats 2 times)
@ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117 [inlined]
[19] #warmup#54
@ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:169 [inlined]
[20] warmup(item::BenchmarkTools.Benchmark)
@ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:168
[21] top-level scope
@ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:575
what is the reason for it and how to fix it? Thank you very much