I’m a beginner with julia and I’m trying to model an inventory menagement using sarsa and q-learning algo. The code gives me the following key error: “ERROR: LoadError: KeyError: key [0.01781510003808362, 0.10435077985604246, 0.6513233559305432, 0.026058479739375584, 0.5718719213047856, 0.4894292308985352, 0.8914244892776971, 0.6229389399919408, 0.9589445614036257, 0.1799414868226339] not found” and Stacktrace: [1] getindex(h::Dict{Int64, Int64}, key::Vector{Float64}) @ Base .\dict.jl:477 [2] _call(v::Val{:stateindex}, d::Dict{Int64, Int64}, args::Tuple{Vector{Float64}}, kwargs::@Kwargs{}) @ QuickPOMDPs C:\Users\loren\.julia\packages\QuickPOMDPs\WmJG9\src\quick.jl:206 [3] _call(namev::Val{:stateindex}, m::QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}, args::Tuple{Vector{Float64}}, kwargs::@Kwargs{}) @ QuickPOMDPs C:\Users\loren\.julia\packages\QuickPOMDPs\WmJG9\src\quick.jl:196 [4] stateindex(m::QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}, args::Vector{Float64}; kwargs::@Kwargs{}) @ QuickPOMDPs C:\Users\loren\.julia\packages\QuickPOMDPs\WmJG9\src\quick.jl:211 [5] action(p::ValuePolicy{QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}, Matrix{Float64}, Int64}, s::Vector{Float64}) @ POMDPTools.Policies C:\Users\loren\.julia\packages\POMDPTools\7Rekv\src\Policies\vector.jl:62 [6] action(p::EpsGreedyPolicy{POMDPTools.Policies.var"#20#21"{Float64}, TaskLocalRNG, QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}}, on_policy::ValuePolicy{QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}, Matrix{Float64}, Int64}, k::Int64, s::Vector{Float64}) @ POMDPTools.Policies C:\Users\loren\.julia\packages\POMDPTools\7Rekv\src\Policies\exploration_policies.jl:77 [7] solve(solver::QLearningSolver{EpsGreedyPolicy{POMDPTools.Policies.var"#20#21"{Float64}, TaskLocalRNG, QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}}, TaskLocalRNG}, mdp::QuickMDP{Base.UUID("b6c06324-007b-4e3b-bc11-80d9fdde00b6"), Int64, Int64, @NamedTuple{stateindex::Dict{Int64, Int64}, isterminal::Bool, actionindex::Dict{Int64, Int64}, initialstate::Int64, transition::var"#1#6", states::Vector{Int64}, actions::Vector{Int64}, discount::Float64, reward::var"#3#8"}}) @ TabularTDLearning C:\Users\loren\.julia\packages\TabularTDLearning\IPIqw\src\q_learn.jl:57 [8] top-level scope @ d:\OneDrive\Desktop\Punto.jl:77 in expression starting at d:\OneDrive\Desktop\Punto.jl:77
.
That is the code:
### Pluto notebook per LINMA2470 - Punto 3
### 1) Import dei pacchetti
using Random
using StatsPlots # per eventuali grafici
using POMDPs
using QuickPOMDPs
using TabularTDLearning
using POMDPTools
using POMDPModels
using POMDPTools.Policies # per EpsGreedyPolicy
### 2) Parametri dell'ambiente
const INITIAL_STATE = 10
const STATES = collect(0:20)
const ACTIONS = collect([0, 1]) # 0 = no-order, 1 = order
const DEMAND_DIST = collect(0:10)
const DISCOUNT = 0.99
const ORDER_AMOUNT = 5
const MAX_STORE = 10
const MAX_CAPACITY = 20
const TRANSPORT_COST = 20.0
const HOLD_STORE = 2.0
const HOLD_PARKING = 4.0
const STOCKOUT_COST = 50.0
### 3) Definizione MDP con QuickPOMDPs
bike_inventory_mdp = QuickMDP(
states = STATES,
actions = ACTIONS,
discount = DISCOUNT,
initialstate = INITIAL_STATE,
transition = (s, a, sp) -> begin
# calcola probabilità di passare da s ad sp con azione a
order = a==1 ? min(ORDER_AMOUNT, MAX_CAPACITY - s) : 0
post_order= s + order
p = 1/length(DEMAND_DIST)
sum(d-> (max(0, post_order-d)==sp ? p : 0), DEMAND_DIST)
end,
reward = (s, a, sp) -> begin
# reward atteso dato s,a
order = a==1 ? min(ORDER_AMOUNT, MAX_CAPACITY - s) : 0
post_order = s + order
transport = a==1 ? TRANSPORT_COST : 0.0
exp_penalty= sum(d-> max(0, d-post_order)*STOCKOUT_COST, DEMAND_DIST) / length(DEMAND_DIST)
exp_hold = sum(d-> begin
inv = max(0, post_order - d)
min(inv, MAX_STORE)*HOLD_STORE + max(0, inv-MAX_STORE)*HOLD_PARKING
end, DEMAND_DIST) / length(DEMAND_DIST)
-(transport + exp_penalty + exp_hold)
end
)
### 4) Politica di esplorazione ε-greedy
rng = Random.default_rng()
# epsilon come secondo argomento posizionale
exploration = EpsGreedyPolicy(bike_inventory_mdp, 0.1; rng=rng)
### 5) Risolutori Q-learning e SARSA da TabularTDLearning) Risolutori Q-learning e SARSA da TabularTDLearning
q_solver_pkg = QLearningSolver(
n_episodes = 5000,
max_episode_length = 50,
learning_rate = 0.1,
exploration_policy = exploration
)
sarsa_solver_pkg = SARSASolver(
n_episodes = 5000,
max_episode_length = 50,
learning_rate = 0.1,
exploration_policy = exploration
)
### 6) Risoluzione e confronto delle politiche
policy_q_pkg = solve(q_solver_pkg, bike_inventory_mdp)
policy_sarsa_pkg = solve(sarsa_solver_pkg, bike_inventory_mdp)
println("Politica Q-Learning (pacchetto):")
for s in STATES
println("s = ", s, " → a = ", action(policy_q_pkg, s))
end
println("\nPolitica SARSA (pacchetto):")
for s in STATES
println("s = ", s, " → a = ", action(policy_sarsa_pkg, s))
end
I think it has to do with the definition of the MDP, but I don’t know how to solve it