So I’ve finished setting up CUDA and wanted to do some simple benchmarking tests.
Using some more contrived examples I get a worse running time using the |> gpu
option. So I decide to do some benchmarking using the most simple neural network I could come up with:
learn_p = param([7.71]) |> gpu
function f(x)
x.*learn_p
end
# Single layer neural network
model_chain = Chain(x -> f(x)) |> gpu
tracking = params(learn_p)
loss(x,y) = sum(abs2, model_chain(x) - y)
rand_data = rand((1.0:0.1:100.0),1,100)
label = rand_data.*[2]
data = Iterators.repeated((rand_data, label), 5000)
@time Flux.train!(loss, tracking, data, ADAM(0.1))
But that gives me:
GPU compilation of #23(CuArrays.CuKernelState, CUDAnative.CuDeviceArray{Float64,2,CUDAnative.AS.Global}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}},Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}) failed
KernelError: passing and using non-bitstype argument
Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}},Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}.
That type is not isbits, and such arguments are only allowed when they are unused by the kernel.
in top-level scope at base/util.jl:156
in train! at Flux/8XpDt/src/optimise/train.jl:70
in #train!#12 at Flux/8XpDt/src/optimise/train.jl:72
in macro expansion at Juno/nDCSn/src/progress.jl:124
in macro expansion at Flux/8XpDt/src/optimise/train.jl:74
in loss at flux_example.jl:286
in at Flux/8XpDt/src/layers/basic.jl:33
in applychain at Flux/8XpDt/src/layers/basic.jl:31
in #218 at flux_example.jl:284
in f at flux_example.jl:281
in materialize at Flux/8XpDt/src/tracker/lib/array.jl:465
in ∇broadcast at Flux/8XpDt/src/tracker/lib/array.jl:434
in broadcast at base/broadcast.jl:707
in materialize at base/broadcast.jl:753
in copy at base/broadcast.jl:773
in copyto! at base/broadcast.jl:797
in copyto! at GPUArrays/t8tJB/src/broadcast.jl:48
in gpu_call at GPUArrays/t8tJB/src/abstract_gpu_interface.jl:128
in gpu_call at GPUArrays/t8tJB/src/abstract_gpu_interface.jl:151
in _gpu_call at CuArrays/PD3UJ/src/gpuarray_interface.jl:59
in macro expansion at CUDAnative/Mdd3w/src/execution.jl:202
in macro expansion at base/gcutils.jl:87
in macro expansion at CUDAnative/Mdd3w/src/execution.jl:205
in cufunction at CUDAnative/Mdd3w/src/execution.jl:237
in #cufunction#110 at CUDAnative/Mdd3w/src/execution.jl:237
in macro expansion at CUDAnative/Mdd3w/src/execution.jl:266
in compile at CUDAnative/Mdd3w/src/compiler/driver.jl:16
in #compile#95 at CUDAnative/Mdd3w/src/compiler/driver.jl:18
in compile at CUDAnative/Mdd3w/src/compiler/driver.jl:36
in #compile#96 at CUDAnative/Mdd3w/src/compiler/driver.jl:38
in compile at CUDAnative/Mdd3w/src/compiler/driver.jl:87
in check_invocation at CUDAnative/Mdd3w/src/compiler/validation.jl:35
So from this I gather that the dot operator in .*
is still not supported, is that it?
Anyhow, since I’m only trying to benchmark for now I remove the dot operator:
function f(x)
# x.*learn_p
for i in 1:length(x)
x[i]*=learn_p
end
x
end
Which in turn gets me:
MethodError: Cannot `convert` an object of type TrackedArray{…,CuArray{Float64,1}} to an object of type Float64
Closest candidates are:
convert(::Type{Float64}, !Matched::SymEngine.BasicType{Val{:RealDouble}}) at /home/andrew/.julia/packages/SymEngine/6KyFJ/src/numerics.jl:35
convert(::Type{Float64}, !Matched::SymEngine.Basic) at /home/andrew/.julia/packages/SymEngine/6KyFJ/src/numerics.jl:160
convert(::Type{Float64}, !Matched::LLVM.ConstantFP) at /home/andrew/.julia/packages/LLVM/tPWXv/src/core/value/constant.jl:86
...
in top-level scope at base/util.jl:156
in train! at Flux/8XpDt/src/optimise/train.jl:70
in #train!#12 at Flux/8XpDt/src/optimise/train.jl:72
in macro expansion at Juno/nDCSn/src/progress.jl:124
in macro expansion at Flux/8XpDt/src/optimise/train.jl:74
in loss at flux_example.jl:290
in at Flux/8XpDt/src/layers/basic.jl:33
in applychain at Flux/8XpDt/src/layers/basic.jl:31
in #254 at flux_example.jl:288
in f at flux_example.jl:283
in setindex! at base/array.jl:767
So I figure I should explicitly do the conversion:
function f(x)
# x.*learn_p
x = convert(typeof(learn_p),x)
for i in 1:length(x)
x[i]*=learn_p
end
x
end
However:
MethodError: no method matching CuArray{Float32,1}(::UndefInitializer, ::Tuple{Int64,Int64})
Closest candidates are:
CuArray{Float32,1}(::UndefInitializer, ::Tuple{Vararg{Int64,N}}) where {T, N} at /home/andrew/.julia/packages/CuArrays/PD3UJ/src/array.jl:33
CuArray{Float32,1}(!Matched::LinearAlgebra.UniformScaling, ::Tuple{Int64,Int64}) at /home/andrew/.julia/packages/GPUArrays/t8tJB/src/construction.jl:30
CuArray{Float32,1}(!Matched::CUDAdrv.Mem.Buffer, ::Tuple{Vararg{Int64,N}}; offset, own) where {T, N} at /home/andrew/.julia/packages/CuArrays/PD3UJ/src/array.jl:11
...
in top-level scope at base/util.jl:156
in train! at Flux/8XpDt/src/optimise/train.jl:70
in #train!#12 at Flux/8XpDt/src/optimise/train.jl:72
in macro expansion at Juno/nDCSn/src/progress.jl:124
in macro expansion at Flux/8XpDt/src/optimise/train.jl:74
in loss at flux_example.jl:291
in at Flux/8XpDt/src/layers/basic.jl:33
in applychain at Flux/8XpDt/src/layers/basic.jl:31
in #256 at flux_example.jl:289
in f at flux_example.jl:282
in convert at Flux/8XpDt/src/tracker/lib/array.jl:41
in convert at GPUArrays/t8tJB/src/construction.jl:80
in similar at base/abstractarray.jl:618
So I’m a bit lost right now. Any help would be appreciated.