Hello,
I am trying to compute the Hessian vector product of a Flux model (with some vector) on the GPU. This is giving me some errors, which I think are realted specifically to ForwardDiff. Any help would be greatly appreciated!
The following defines an Hvp operator which works fine on the CPU:
using ForwardDiff: partials, Dual
using Zygote: pullback
using LinearAlgebra
mutable struct HvpOperator{F, T, I}
f::F
x::AbstractArray{T, 1}
dualCache1::AbstractArray{Dual{Nothing, T, 1}}
size::I
nProd::I
end
function HvpOperator(f, x::AbstractVector)
dualCache1 = Dual.(x, similar(x))
return HvpOperator(f, x, dualCache1, size(x, 1), 0)
end
Base.eltype(op::HvpOperator{F, T, I}) where{F, T, I} = T
Base.size(op::HvpOperator) = (op.size, op.size)
function LinearAlgebra.mul!(result::AbstractVector, op::HvpOperator, v::AbstractVector)
op.nProd += 1
op.dualCache1 .= Dual.(op.x, v)
val, back = pullback(op.f, op.dualCache1)
result .= partials.(back(one(val))[1], 1)
end
Ignore the Nothing
tags for now as I know I shouldn’t be doing that. The following code defines a MWE to use the Hvp operator on the GPU:
using Flux, CUDA
data = randn(10, 4) |> gpu
model = Dense(10, 1, σ) |> gpu
ps, re = Flux.destructure(model)
f(θ) = re(θ)(data)
Hop = HvpOperator(f, ps)
v, res = similar(ps), similar(ps)
LinearAlgebra.mul!(res, Hop, v)
This results in the following error:
ERROR: LoadError: MethodError: no method matching cudnnDataType(::Type{Dual{Nothing, Float32, 1}})
Closest candidates are:
cudnnDataType(::Type{Float16}) at /home/rs-coop/.julia/packages/CUDA/nYggH/lib/cudnn/util.jl:7
cudnnDataType(::Type{Float32}) at /home/rs-coop/.julia/packages/CUDA/nYggH/lib/cudnn/util.jl:8
cudnnDataType(::Type{Float64}) at /home/rs-coop/.julia/packages/CUDA/nYggH/lib/cudnn/util.jl:9
...
Stacktrace:
[1] CUDA.CUDNN.cudnnTensorDescriptor(array::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}; format::CUDA.CUDNN.cudnnTensorFormat_t, dims::Vector{Int32})
@ CUDA.CUDNN ~/.julia/packages/CUDA/nYggH/lib/cudnn/tensor.jl:9
[2] CUDA.CUDNN.cudnnTensorDescriptor(array::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer})
@ CUDA.CUDNN ~/.julia/packages/CUDA/nYggH/lib/cudnn/tensor.jl:8
[3] cudnnActivationForward!(y::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}, x::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}; o::Base.Iterators.Pairs{Symbol, CUDA.CUDNN.cudnnActivationMode_t, Tuple{Symbol}, NamedTuple{(:mode,), Tuple{CUDA.CUDNN.cudnnActivationMode_t}}})
@ CUDA.CUDNN ~/.julia/packages/CUDA/nYggH/lib/cudnn/activation.jl:22
[4] (::NNlibCUDA.var"#65#69")(src::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}, dst::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer})
@ NNlibCUDA ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/activations.jl:11
[5] materialize(bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Nothing, typeof(σ), Tuple{CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}}})
@ NNlibCUDA ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/activations.jl:30
[6] rrule(#unused#::typeof(Base.Broadcast.broadcasted), #unused#::typeof(σ), x::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer})
@ NNlib ~/.julia/packages/NNlib/y5z4i/src/activations.jl:813
[7] rrule(::Zygote.ZygoteRuleConfig{Zygote.Context}, ::Function, ::Function, ::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer})
@ ChainRulesCore ~/.julia/packages/ChainRulesCore/oBjCg/src/rules.jl:134
[8] chain_rrule
@ ~/.julia/packages/Zygote/FPUm3/src/compiler/chainrules.jl:216 [inlined]
[9] macro expansion
@ ~/.julia/packages/Zygote/FPUm3/src/compiler/interface2.jl:0 [inlined]
[10] _pullback(::Zygote.Context, ::typeof(Base.Broadcast.broadcasted), ::typeof(σ), ::CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/FPUm3/src/compiler/interface2.jl:9
[11] _pullback
@ ~/.julia/packages/Flux/BPPNj/src/layers/basic.jl:158 [inlined]
[12] _pullback(ctx::Zygote.Context, f::Dense{typeof(σ), CuArray{Dual{Nothing, Float32, 1}, 2, CUDA.Mem.DeviceBuffer}, CuArray{Dual{Nothing, Float32, 1}, 1, CUDA.Mem.DeviceBuffer}}, args::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/FPUm3/src/compiler/interface2.jl:0
[13] _pullback
@ ~/Documents/research/masters-thesis/CubicNewton/experiments/test.jl:43 [inlined]
[14] _pullback(ctx::Zygote.Context, f::typeof(f), args::CuArray{Dual{Nothing, Float32, 1}, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/FPUm3/src/compiler/interface2.jl:0
[15] _pullback(f::Function, args::CuArray{Dual{Nothing, Float32, 1}, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/FPUm3/src/compiler/interface.jl:34
[16] pullback(f::Function, args::CuArray{Dual{Nothing, Float32, 1}, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/FPUm3/src/compiler/interface.jl:40
[17] mul!(result::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, op::HvpOperator{typeof(f), Float32, Int64}, v::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
@ Main ~/Documents/research/masters-thesis/CubicNewton/experiments/test.jl:34
[18] top-level scope
@ ~/Documents/research/masters-thesis/CubicNewton/experiments/test.jl:49
[19] include(fname::String)
@ Base.MainInclude ./client.jl:444
[20] top-level scope
@ REPL[14]:1
[21] top-level scope
@ ~/.julia/packages/CUDA/nYggH/src/initialization.jl:52
I know there is quite a lot of information here, so if I can clarify or add anything let me know. Thanks again for the help!