Hi,
I’m trying to train a simple Autoencoder with the objective of using it for data imputation. The idea is that I have matrix of count with zeros values that are caused by a lack of sensitivity of the detector and want to replace this zeros based on information learned from other samples. This is tightly related to collaborative filtering. So far I use the Autoencoder from the Flux model zoo as a starting point. I wanted to see the effect of the Median of means estimator for the mean error so I modified the loss function of the Autoencoder to use the MOM estimator of the error on non-missing (non-zeros) values. However, when training I get this error that I don’t manage to solve:
ERROR: MethodError: no method matching (::var"#16#20"{Args,Chain{Tuple{Dense{typeof(leakyrelu),Array{Float32,2},Array{Float32,1}},Dense{typeof(leakyrelu),Array{Float32,2},Array{Float32,1}},var"#15#19"}}})(::Array{Float64,2}, ::BitArray{2})
Here is my code so far:
# use Variational Autoencoder to impute missing value
using Flux
using Flux: @epochs, onehotbatch, mse, throttle
using Base.Iterators: partition
using Parameters: @with_kw
using Random
using Statistics
using CUDA
if has_cuda() # false on my laptop so I don't use GPU
@info "CUDA is on"
CUDA.allowscalar(false)
end
@with_kw mutable struct Args
lr::Float64 = 1e-3 # Learning rate
epochs::Int = 10 # Number of epochs
N::Int = 100 # Size of the encoding
batchsize::Int = 500 # Batch size for training
sample_len::Int = 34 # Number sample in the data
throttle::Int = 5 # Throttle timeout
k::Int = 100 # number of block for the MOM estimation of the average error
end
function get_processed_data(X, args)
# localisation of non-zeros values
Ω = X .!= 0
# Partition into batches of size batchsize
perm = randperm(size(X, 1))
train_data = [float(permutedims(X[x, :])) for x in partition(perm, args.batchsize)]
train_nz = [permutedims(Ω[x, :]) for x in partition(perm, args.batchsize)]
#train_data = gpu.(train_data)
return train_data, train_nz
end
function MOMloss(x, m, k)
# train_data = x[1]
# train_nz = x[2]
if k == 1
return mse(m(x[1])[x[2]], x[1][x[2]])
else
err = (m(x[1])[x[2]] - x[1][x[2]]) .^ 2
n = length(err)
return median([sum(err[idx]) / length(idx) for idx in partition(randperm(n), k)])
end
end
function train(X; kws...)
args = Args(; kws...)
train_data, train_nz = get_processed_data(X, args)
@info("Constructing model......")
encoder = Dense(args.sample_len, args.N, leakyrelu) #|> gpu
decoder = Dense(args.N, args.sample_len, leakyrelu) #|> gpu
non_neg = x -> max.(x, 0.0)
# Defining main model as a Chain of encoder and decoder models
m = Chain(encoder, decoder, non_neg)
@info("Training model.....")
loss = x -> MOMloss(x, m, args.k)
## Training
evalcb = throttle(() -> @show(loss((train_data[1], train_nz[1]))), args.throttle)
opt = ADAM(args.lr)
@epochs args.epochs Flux.train!(loss, params(m), zip(train_data, train_nz), opt; cb = evalcb)
return m, args
end
X = round.(Int, 100 .+ 100 .* randn(17334, 34))
X[X .< 10] .= 0
m, args = train(X)
I have check that the loss function actually works:
loss((train_data[1], train_nz[1]))
18504.937187175092
for i in zip(train_data, train_nz)
println(loss(i))
end
18492.903468052522
18227.281440521285
18409.256767663464
...
I will be really thankful for any help in solving this issue !
EDIT: I somehow manage to find the mistake Flux.train!
actually “open” the zip
object before passing it to the loss function as when I change loss
to get a x and y argument as input I don’t get the error. However I get the ERROR: Mutating arrays is not supported
error now that I need to solve
EDIT2: By working on the ERROR: Mutating arrays is not supported
issue I’m almost there rewriting median
and using deepcopy
and Tullio.jl
. However I get this error ERROR: MethodError: no method matching zero(::Type{Array{Float64,1}})
which I really have hard time to figure out where it come from… Here is the modification I made on the loss function:
function median_(x)
n = length(x)
if n % 2 == 1
return 1.0 * x[div(n, 2) + 1]
else
return 0.5 * (x[div(n, 2)] + x[div(n, 2) + 1])
end
end
function MOMloss(x, y, m, k)
tmp = deepcopy(x)
tmp2 = m(tmp)[y]
tmp = tmp[y]
if k == 1
return mse(tmp2, tmp)
else
err = (tmp2 - tmp) .^ 2
n = length(err)
tmp3 = collect(partition(randperm(n), k))
@tullio (median_) tmp4 := .+(getindex(err, tmp3[k])) / length(tmp3[k]) (k in 1:length(tmp3)) grad = Dual
return tmp4
end
end