When trying to run the code: (from an example)
using DeepQLearning
using POMDPs
using Flux
using POMDPModels
using POMDPSimulators
using POMDPPolicies
# load MDP model from POMDPModels or define your own!
mdp = SimpleGridWorld();
# Define the Q network (see Flux.jl documentation)
# the gridworld state is represented by a 2 dimensional vector.
model = Chain(Dense(2, 32), Dense(32, length(POMDPs.actions(mdp))))
exploration = EpsGreedyPolicy(mdp, LinearDecaySchedule(start=1.0, stop=0.01, steps=10000/2))
solver = DeepQLearningSolver(qnetwork = model, max_steps=10000,
exploration_policy = exploration,
learning_rate=0.005,log_freq=500,
recurrence=false,double_q=true, dueling=true, prioritized_replay=true)
policy = solve(solver, mdp)
sim = RolloutSimulator(max_steps=30)
r_tot = simulate(sim, mdp, policy)
println("Total discounted reward for 1 simulation: $r_tot")
I am getting the following error:
ERROR: LoadError: Can't differentiate loopinfo expression
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] macro expansion
@ .\simdloop.jl:79 [inlined]
[3] Pullback
@ .\reduce.jl:243 [inlined]
[4] (::typeof(∂(mapreduce_impl)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[5] Pullback
@ .\reduce.jl:257 [inlined]
[6] (::typeof(∂(mapreduce_impl)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[7] Pullback
@ .\reduce.jl:415 [inlined]
[8] (::typeof(∂(_mapreduce)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[9] Pullback
@ .\reducedim.jl:318 [inlined]
[10] Pullback (repeats 2 times)
@ .\reducedim.jl:310 [inlined]
[11] (::typeof(∂(mapreduce)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[12] Pullback
@ .\reducedim.jl:878 [inlined]
[13] (::typeof(∂(#_sum#682)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[14] Pullback
@ .\reducedim.jl:878 [inlined]
[15] (::typeof(∂(_sum)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[16] Pullback (repeats 2 times)
@ .\reducedim.jl:874 [inlined]
[17] (::typeof(∂(sum)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[18] Pullback
@ C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:223 [inlined]
[19] (::typeof(∂(λ)))(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface2.jl:0
[20] (::Zygote.var"#54#55"{Zygote.Params, typeof(∂(λ)), Zygote.Context})(Δ::Float32)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface.jl:172
[21] gradient(f::Function, args::Zygote.Params)
@ Zygote C:\Users\Tyler\.julia\packages\Zygote\bRa8J\src\compiler\interface.jl:49
[22] batch_train!(solver::DeepQLearningSolver{EpsGreedyPolicy{LinearDecaySchedule{Float64}, Random._GLOBAL_RNG, NTuple{4, Symbol}}}, env::POMDPModelTools.MDPCommonRLEnv{AbstractArray{Float32, N} where N, SimpleGridWorld, StaticArrays.SVector{2, Int64}}, policy::NNPolicy{SimpleGridWorld, DeepQLearning.DuelingNetwork, Symbol}, optimizer::ADAM, target_q::DeepQLearning.DuelingNetwork, replay::PrioritizedReplayBuffer{Int32, Float32, CartesianIndex{2}, StaticArrays.SVector{2, Float32}, Matrix{Float32}}; discount::Float64)
@ DeepQLearning C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:219
[23] batch_train!
@ C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:200 [inlined]
[24] dqn_train!(solver::DeepQLearningSolver{EpsGreedyPolicy{LinearDecaySchedule{Float64}, Random._GLOBAL_RNG, NTuple{4, Symbol}}}, env::POMDPModelTools.MDPCommonRLEnv{AbstractArray{Float32, N} where N, SimpleGridWorld, StaticArrays.SVector{2, Int64}}, policy::NNPolicy{SimpleGridWorld, DeepQLearning.DuelingNetwork, Symbol}, replay::PrioritizedReplayBuffer{Int32, Float32, CartesianIndex{2}, StaticArrays.SVector{2, Float32}, Matrix{Float32}})
@ DeepQLearning C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:138
[25] solve(solver::DeepQLearningSolver{EpsGreedyPolicy{LinearDecaySchedule{Float64}, Random._GLOBAL_RNG, NTuple{4, Symbol}}}, env::POMDPModelTools.MDPCommonRLEnv{AbstractArray{Float32, N} where N, SimpleGridWorld, StaticArrays.SVector{2, Int64}})
@ DeepQLearning C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:56
[26] solve(solver::DeepQLearningSolver{EpsGreedyPolicy{LinearDecaySchedule{Float64}, Random._GLOBAL_RNG, NTuple{4, Symbol}}}, problem::SimpleGridWorld)
@ DeepQLearning C:\Users\Tyler\.julia\packages\DeepQLearning\Uet74\src\solver.jl:32
[27] top-level scope
@ C:\Users\Tyler\Documents\Julia\Learning\GridWorldMDP.jl:21
in expression starting at C:\Users\Tyler\Documents\Julia\Learning\GridWorldMDP.jl:21
This error occurs on windows (note I also tried on 1.6.2 with same result):
Julia Version 1.6.3
Commit ae8452a9e0 (2021-09-23 17:34 UTC)
Platform Info:
OS: Windows (x86_64-w64-mingw32)
CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, haswell)
and Linux:
Julia Version 1.6.2
Commit 1b93d53fc4 (2021-07-14 15:36 UTC)
Platform Info:
OS: Linux (x86_64-pc-linux-gnu)
CPU: 11th Gen Intel(R) Core(TM) i7-11700KF @ 3.60GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, icelake-client)
Possibly its an issue with SIMD in Intel x86_64 arch? I am not sure, nothing looks obvious wrong with the code. Any idea what could be causing this?
If someone can try it on a different arch that would be great, that is my best guess.