Yes, you are right.
Google CoLab does not automatically download the latest version of the package, which causes issues.
However, now I’m getting another issue now that the code recognizes abstractdiffy
InvalidIRError: compiling MethodInstance for (::GPUArrays.var"#broadcast_kernel#38")(::CUDA.CuKernelContext, ::CuDeviceVector{Tuple{Vector{Float64}, DifferentiableFlatten.var"#unflatten_to_Real#1"}, 1}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, DifferentiableFlatten.var"#2#4", Tuple{Base.Broadcast.Extruded{CuDeviceVector{Float64, 1}, Tuple{Bool}, Tuple{Int64}}}}, ::Int64) resulted in invalid LLVM IR
Reason: unsupported call through a literal pointer (call to ijl_alloc_array_1d)
Stacktrace:
[1] Array
@ ./boot.jl:477
[2] Array
@ ./boot.jl:486
[3] similar
@ ./abstractarray.jl:884
[4] similar
@ ./abstractarray.jl:883
[5] _array_for
@ ./array.jl:671
[6] _array_for
@ ./array.jl:674
[7] vect
@ ./array.jl:126
[8] flatten
@ ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:45
[9] #2
@ ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:53
[10] _broadcast_getindex_evalf
@ ./broadcast.jl:683
[11] _broadcast_getindex
@ ./broadcast.jl:656
[12] getindex
@ ./broadcast.jl:610
[13] broadcast_kernel
@ ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:64
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/validation.jl:147
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:440 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:439 [inlined]
[5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/utils.jl:92
[6] emit_llvm
@ ~/.julia/packages/GPUCompiler/U36Ed/src/utils.jl:86 [inlined]
[7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:129
[8] codegen
@ ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:110 [inlined]
[9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, toplevel::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:106
[10] compile
@ ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:98 [inlined]
[11] #1075
@ ~/.julia/packages/CUDA/YIj5X/src/compiler/compilation.jl:247 [inlined]
[12] JuliaContext(f::CUDA.var"#1075#1077"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}})
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/driver.jl:47
[13] compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/YIj5X/src/compiler/compilation.jl:246
[14] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/execution.jl:125
[15] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/U36Ed/src/execution.jl:103
[16] macro expansion
@ ~/.julia/packages/CUDA/YIj5X/src/compiler/execution.jl:382 [inlined]
[17] macro expansion
@ ./lock.jl:267 [inlined]
[18] cufunction(f::GPUArrays.var"#broadcast_kernel#38", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Tuple{Vector{Float64}, DifferentiableFlatten.var"#unflatten_to_Real#1"}, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, DifferentiableFlatten.var"#2#4", Tuple{Base.Broadcast.Extruded{CuDeviceVector{Float64, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64}}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/YIj5X/src/compiler/execution.jl:377
[19] cufunction
@ ~/.julia/packages/CUDA/YIj5X/src/compiler/execution.jl:374 [inlined]
[20] macro expansion
@ ~/.julia/packages/CUDA/YIj5X/src/compiler/execution.jl:104 [inlined]
[21] #launch_heuristic#1120
@ ~/.julia/packages/CUDA/YIj5X/src/gpuarrays.jl:17 [inlined]
[22] launch_heuristic
@ ~/.julia/packages/CUDA/YIj5X/src/gpuarrays.jl:15 [inlined]
[23] _copyto!
@ ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:70 [inlined]
[24] copyto!
@ ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:51 [inlined]
[25] copy
@ ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:42 [inlined]
[26] materialize(bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, DifferentiableFlatten.var"#2#4", Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}})
@ Base.Broadcast ./broadcast.jl:873
[27] map(::Function, ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ GPUArrays ~/.julia/packages/GPUArrays/dAUOE/src/host/broadcast.jl:89
[28] flatten(x::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ DifferentiableFlatten ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:53
[29] (::DifferentiableFlatten.var"#2#4")(val::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ DifferentiableFlatten ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:53
[30] iterate
@ ./generator.jl:47 [inlined]
[31] _collect
@ ./array.jl:802 [inlined]
[32] collect_similar(cont::Vector{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, itr::Base.Generator{Vector{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, DifferentiableFlatten.var"#2#4"})
@ Base ./array.jl:711
[33] map(f::Function, A::Vector{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}})
@ Base ./abstractarray.jl:3263
[34] flatten(x::Vector{Any})
@ DifferentiableFlatten ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:53
[35] #8
@ ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:70 [inlined]
[36] map
@ ./tuple.jl:273 [inlined]
[37] flatten(x::Tuple{Vector{Any}})
@ DifferentiableFlatten ~/.julia/packages/DifferentiableFlatten/ro7xH/src/DifferentiableFlatten.jl:70
[38] tovecfunc(f::Function, x::Vector{Any}; flatteny::Bool)
@ NonconvexCore ~/.julia/packages/NonconvexCore/TFoWG/src/models/vec_model.jl:106
[39] tovecfunc
@ ~/.julia/packages/NonconvexCore/TFoWG/src/models/vec_model.jl:105 [inlined]
[40] abstractdiffy(f::Function, backend::AbstractDifferentiation.ReverseDiffBackend, x::Vector{Any})
@ NonconvexUtils ~/.julia/packages/NonconvexUtils/i3gzf/src/abstractdiff.jl:24
[41] abstractdiffy(model::Model{Vector{Any}}, backend::AbstractDifferentiation.ReverseDiffBackend; objective::Bool, ineq_constraints::Bool, eq_constraints::Bool, sd_constraints::Bool)
@ NonconvexUtils ~/.julia/packages/NonconvexUtils/i3gzf/src/abstractdiff.jl:31
[42] abstractdiffy(model::Model{Vector{Any}}, backend::AbstractDifferentiation.ReverseDiffBackend)
@ NonconvexUtils ~/.julia/packages/NonconvexUtils/i3gzf/src/abstractdiff.jl:28
[43] optimization(x0::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Main ./In[36]:68
[44] macro expansion
@ ./In[39]:3 [inlined]
[45] top-level scope
@ ./timing.jl:273 [inlined]
[46] top-level scope
@ ./In[39]:0