Hi,
I am updating my library PseudoArcLengthContinuation.jl and one of the tutorials based on CuArrays
does not work anymore. It used to work by the end of December.
Can someone explain me the error please?
Thank you for your help,
The error I am getting is the following:
julia> F_shfft(sol0, -.1, 1.3, shlop = L)
ERROR: GPU compilation of #25(CuArrays.CuKernelState, CUDAnative.CuDeviceArray{Complex{Float64},2,CUDAnative.AS.Global}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},Type{Complex},Tuple{Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}) failed
KernelError: passing and using non-bitstype argument
Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}},Type{Complex},Tuple{Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}}}}.
That type is not isbits, and such arguments are only allowed when they are unused by the kernel. .f is of type Type{Complex} which is not isbits.
.args is of type Tuple{Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Array{Float64,2},Tuple{Bool,Bool},Tuple{Int64,Int64}} which is not isbits.
.x is of type Array{Float64,2} which is not isbits.
The code is as follows. Note that I remove the parts used by PseudoArcLengthContinuation.jl
so the code is independant of PseudoArcLengthContinuation
.
using AbstractFFTs, FFTW, KrylovKit
using LinearAlgebra, Plots
struct SHLinearOp
tmp_real # temporary
tmp_complex # temporary
l1
fftplan
ifftplan
end
function SHLinearOp(Nx, lx, Ny, ly; AF = Array{TY})
# AF is a type, it could be CuArray{TY} to run the following on GPU
k1 = vcat(collect(0:Nx/2), collect(Nx/2+1:Nx-1) .- Nx)
k2 = vcat(collect(0:Ny/2), collect(Ny/2+1:Ny-1) .- Ny)
d2 = [(1-(pi/lx * kx)^2 - (pi/ly * ky)^2)^2 + 1. for kx in k1, ky in k2]
tmpc = Complex.(AF(zeros(Nx,Ny)))
return SHLinearOp(AF(zeros(Nx,Ny)),tmpc,AF(d2),plan_fft!(tmpc),plan_ifft!(tmpc))
end
import Base: *, \
function *(c::SHLinearOp, u)
c.tmp_complex .= Complex.(u)
c.fftplan * c.tmp_complex
c.tmp_complex .= c.l1 .* c.tmp_complex
c.ifftplan * c.tmp_complex
c.tmp_real .= real.(c.tmp_complex)
return copy(c.tmp_real)
end
function \(c::SHLinearOp, u)
c.tmp_complex .= Complex.(u)
c.fftplan * c.tmp_complex
c.tmp_complex .= c.tmp_complex ./ c.l1
c.ifftplan * c.tmp_complex
c.tmp_real .= real.(c.tmp_complex)
return copy(c.tmp_real)
end
function F_shfft(u, l = -0.15, ν = 1.3; shlop::SHLinearOp)
return -(shlop * u) .+ ((l+1) .* u .+ ν .* u.^2 .- u.^3)
end
using CuArrays
CuArrays.allowscalar(false)
import LinearAlgebra: mul!, axpby!
mul!(x::CuArray, y::CuArray, α::T) where {T <: Number} = (x .= α .* y)
mul!(x::CuArray, α::T, y::CuArray) where {T <: Number} = (x .= α .* y)
axpby!(a::T, X::CuArray, b::T, Y::CuArray) where {T <: Number} = (Y .= a .* X .+ b .* Y)
TY = Float64
AF = CuArray{TY}
Nx = 2^10
Ny = 2^10
lx = 8pi * 2
ly = 2*2pi/sqrt(3) * 2
X = -lx .+ 2lx/(Nx) * collect(0:Nx-1)
Y = -ly .+ 2ly/(Ny) * collect(0:Ny-1)
sol0 = [(cos(x) .+ cos(x/2) * cos(sqrt(3) * y/2) ) for x in X, y in Y]
sol0 .= sol0 .- minimum(vec(sol0))
sol0 ./= maximum(vec(sol0))
sol0 = sol0 .- 0.25
sol0 .*= 1.7
L = SHLinearOp(Nx, lx, Ny, ly, AF = AF)
F_shfft(sol0, -.1, 1.3, shlop = L);