I am trying to create multivariate distributions from mean and standard deviation obtained from neural network. But I am getting the following error:
MethodError: no method matching MvNormal(::TrackedArray{…,Array{Float32,1}}, ::TrackedArray{…,Array{Float32,1}})
Closest candidates are:
MvNormal(::AbstractArray{T<:Real,1}, !Matched::PDMats.AbstractPDMat{T<:Real}) where T<:Real at /Users/elitalobo/.julia/packages/Distributions/Iltex/src/multivariate/mvnormal.jl:190
MvNormal(::AbstractArray{T,1} where T, !Matched::PDMats.AbstractPDMat) at /Users/elitalobo/.julia/packages/Distributions/Iltex/src/multivariate/mvnormal.jl:195
MvNormal(::AbstractArray{#s164,1} where #s164<:Real, !Matched::Array{#s163,2} where #s163<:Real) at /Users/elitalobo/.julia/packages/Distributions/Iltex/src/multivariate/mvnormal.jl:203
Also, I am unable to clamp trackedArray values. Both of these functions work well when I apply Tracker.data to the matrix . However I need to backpropagate the error through this distribution/matrix so I do not want to untrack the variables. Can someone please tell me how to resolve this issue.
I am using Flux for back propagating the error.
Thanks.
…
PolicyNetwork(num_inputs, num_actions, hidden_size, init_w,log_std_min::Float32,log_std_max::Float32) =(
# Taken from https://github.com/jacobcvt12/reparam
linear1 = Dense(num_inputs,hidden_size,relu);
linear2 = Dense(hidden_size,hidden_size,relu);
mean_linear = Dense(hidden_size,num_actions);
log_std_linear = Dense(hidden_size,num_actions);
model = Chain(linear1,linear2);
function forward(x)
#println(model(transpose(x)));
mean = mean_linear(model(transpose(x)));
log_std = log_std_linear(model(transpose(x)));
#print(size(log_std));
#todo clamp!(log_std,log_std_min, log_std_max);
return transpose(mean), transpose(log_std);
end;
function evaluate(x)
epsilon=1e-6;
#println("size of x");
#println(size(x));
mean, log_std = forward(x);
std = exp.(log_std);
z = rand(Normal(0,1));
temp = mean + std.*z;
println("Mean");
get_info(mean);
mean_temp = slicematrix(mean);
std_temp = slicematrix(std);
println("mean temp");
get_info(mean_temp);
normal_temp = MvNormal.(Tracker.data.(mean_temp),Tracker.data.(std_temp));
normal_temp = MvNormal.(mean_temp,std_temp);
temp = slicematrix(temp);
action = tanh.(mean .+ std.*z);
log_prob = loglikelihood.(normal_temp, temp) ;
deriv = log.(1.0 .-action.^2 .+ epsilon);
log_prob = reshape(log_prob,(1,length(log_prob))) - deriv;
print("done");
log_prob_sum = sum(log_prob,dims=2);
return transpose(action), transpose(log_prob), z , transpose(mean), transpose(log_std);
end;
function get_action(x)
epsilon=1e-6;
mean = mean_linear(model(x));
log_std = log_std_linear(model(x));
std = exp.(log_std);
z = rand(Normal(0,1));
temp = mean + std.*z;
mean_temp = reshape(mean,(size(mean)[2]));
std_temp = reshape(std,(size(std)[2]));
normal_temp = Normal.(mean_temp,std_temp);
temp = reshape(temp,(size(temp)[2],1));
temp = slicematrix(temp);
action = tanh.(mean .+ std.*z);
return Tracker.data(transpose(action)[1,:]);
end;
new(log_std_min,log_std_max,linear1,linear2,mean_linear,log_std_linear,forward,evaluate,get_action)
)
end
PolicyNetwork(num_inputs, num_actions, hidden_size) = PolicyNetwork(num_inputs, num_actions, hidden_size,1e-3,Float32(-20.0),Float32(2.0));
function slicematrix(A::AbstractMatrix)
return [A[i, :] for i in 1:size(A,1)]
end