Thanks for looking at it. Here is the complete source code.
A simple Markov model
P = [
0.8 0.1 0.2;
0.1 0.7 0.2;
0.1 0.2 0.6
]
3Γ3 Matrix{Float64}:
0.8 0.1 0.2
0.1 0.7 0.2
0.1 0.2 0.6
Helper function to sample from categorical distribution
function sample(outcomes, weights)
cumsum_weights = cumsum(weights)
r = rand()
for i in eachindex(outcomes)
if r <= cumsum_weights[i]
return outcomes[i]
end
end
return outcomes[end]
end
Simulate
seqlen = 10
obscnt = 100
catcount = 3
catrange = Int16(1):Int16(catcount)
x = zeros(Int16, seqlen * obscnt)
x[1] = 1
for t in 2:length(x)
x[t] = sample(1:3, P[:, x[t - 1]])
end
x = permutedims(reshape(x, seqlen, obscnt))
100Γ10 Matrix{Int16}:
1 1 1 1 1 1 1 1 1 1
1 1 3 2 2 2 2 2 1 1
[β¦]
Simple ML-estimation
P0 = zeros(Float64, 3)
P = zeros(Float64, 3, 3)
for j in 1:obscnt
P0[x[j, 1]] += 1
for i in 2:seqlen
P[x[j, i], x[j, i - 1]] += 1
end
end
P ./ sum(P, dims=1)
3Γ3 Matrix{Float64}:
0.809399 0.0918728 0.179487
0.0861619 0.70318 0.209402
0.104439 0.204947 0.611111
An RNN model should give the same result
using Flux
using OneHotArrays
catcount = 3
catrange = Int16(1):Int16(catcount)
xoh = [Flux.onehotbatch(x[:, 1], catrange) for i = 1:seqlen]
xoh[1]
3Γ100 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
1 1 1 β
1 β
β
β
β
1 1 β
1 β¦ β
β
β
1 β
1 β
1 1 β
β
β
β
β
β
1 β
β
β
1 1 β
β
β
β
1 1 1 β
1 β
1 β
β
1 β
β
β
β
β
β
β
1 1 β
β
β
β
1 β
β
β
β
β
β
β
β
β
β
β
1 1
struct MyRNN{H,C}
h0::H
W::C
end
β
Flux.@layer MyRNN trainable=(h0, W)
β
function MyRNN(nc::Int)
return MyRNN(
zeros(Float32, nc),
zeros(Float32, nc, nc)
)
end
β
function (m::MyRNN)(xoh)
y = []
# repeat() does not work with gradient()
# ht = repeat(m.h0, 1, size(xoh[1], 2))
ht = m.h0 * ones(1, size(xoh[1], 2))
for xt in xoh # Loop over seqlen, all sequences in parallell
y = [y; [ht]] # this prediction
ht = m.W * xt # next prediction - last one unused :-)
end
return y
end
model = MyRNN(catcount)
Ε· = model(xoh)
size(Ε·), size(Ε·[1]), size(Ε·[2]), model
((10,), (3, 100), (3, 100), MyRNN{Vector{Float32}, Matrix{Float32}}(Float32[0.0, 0.0, 0.0], Float32[0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0]))
using Optimisers: AdamW
β
function loss(model, xoh)
Ε· = model(xoh)
return Flux.logitcrossentropy(hcat(Ε· ...), hcat(xoh ...))
end
β
opt_state = Flux.setup(AdamW(1e-3), model)
g = gradient(m -> loss(m, xoh), model)[1]
#Flux.update!(opt_state, model, g)
MethodError: no method matching (::ChainRulesCore.ProjectTo{AbstractArray, @NamedTuple{elements::Vector{ChainRulesCore.ProjectTo{AbstractArray, @NamedTuple{element::ChainRulesCore.ProjectTo{Float32, @NamedTuple{}}, axes::Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}}}, axes::Tuple{Base.OneTo{Int64}}}})(::Tuple{ChainRulesCore.InplaceableThunk{ChainRulesCore.Thunk{ChainRules.var"#rrule##90#rrule##91"{ChainRulesCore.ProjectTo{AbstractArray, @NamedTuple{element::ChainRulesCore.ProjectTo{Float32, @NamedTuple{}}, axes::Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}}, Tuple{Colon, UnitRange{Int64}}, Matrix{Float64}}}, ChainRules.var"#rrule##88#rrule##89"{Tuple{Colon, UnitRange{Int64}}, Matrix{Float64}}}})
The object of type ChainRulesCore.ProjectTo{AbstractArray, @NamedTuple{elements::Vector{ChainRulesCore.ProjectTo{AbstractArray, @NamedTuple{element::ChainRulesCore.ProjectTo{Float32, @NamedTuple{}}, axes::Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}}}, axes::Tuple{Base.OneTo{Int64}}}} exists, but no method is defined for this combination of argument types when trying to treat it as a callable object.