@longemen3000 Hi Andres and thank you for helping me with my problem. Following is the code:
using MultivariateStats, ForwardDiff, StatsBase, MLKernels, MAT, Random, SparseArrays, Plots, Optim, LinearAlgebra, Distributions, Random
file8 = matopen("digit8.mat") # loading the dataset
FA8 = read(file8)
close(file8)
FA8_Data = FA8["true"]
rng = MersenneTwister(1234) # fixing the seed for the random generation
variance = 1 # the variance of the Normal Distribution used to generate the noise
X = FA8_Data + rand(Normal(0, sqrt(variance)), size(FA8_Data)) # adding noise to images
rho_m = 1 # the constant for the exp(-norm2(x,y)/rho_m) RBF kernel
L = [4] # the vector that contains the number of principal eigenvectors for the subspace used in each run/experiment (in our case is just one experiment with 4 principal eigenvectors)
Xp1 = myKPCA(X, rho_m, L, 1:size(X, 1))
function myKPCA(X::Array{Float64,2},
rho_m::Int64,
nL_vector::Array{Int64,1},
whidx::UnitRange{Int64})
# myKPCA KPCA with Gaussian RBF kernel
#
# 2 methods: (1) Scholkopf, and (2) Countour gradient
N, n = size(X) # N: sample size; n: dimension
oneOverN = 1/N
ones1n = ones(1, n)
Ntest = length(whidx) # whidx = 1:size(X, 1)
# inter-distances
norm2 = zeros(N, N)
for i in 1:N
for j in (i+1):N
norm2[i, j] = transpose(X[i, :] - X[j, :]) * (X[i, :] - X[j, :])
norm2[j, i] = norm2[i, j]
end
end
# rho = mean_component_variance * rho_m
mean_comp_var = mean(var(X, dims=1))
rho = mean_comp_var * size(X, 2) * rho_m # the same with rho = sum(var(X, dims=1)) * rho_m
# compute the kernel matrix
K = zeros(N, N)
for i in 1:N
for j in i:N
K[i, j] = exp(-norm2[i, j]/rho)
K[j, i] = K[i, j]
end
end
# centering the kernel matrix
unit = ones(N, 1)
unit2 = ones(N, N) # buffer
Kone = K * unit
oneKone = transpose(unit) * Kone # buffer
oneKoneUnit2 = oneOverN^2 .* oneKone .* unit
K_tilde = K - unit2 * K / N - K * unit2 / N + unit * oneKone * transpose(unit) / N^2
K_tilde = min.(K_tilde, transpose(K_tilde))
# K_tilde = min.(K_tilde, transpose(K_tilde), dims=1)
# find eigenvalues and eigenvectors of the centered kernel matrix
F = LinearAlgebra.eigen(K_tilde)
evecs = F.vectors
evals = real(F.values)
# denoising by 2 different methods
# loop over different choices of the number of rettained eigenvectors
for vv in 1:length(nL_vector)
@show vv
nL = nL_vector[vv] # 'nL_vector' is the vector that contains the number of the principal vectors used in every run
large_idx = N - nL .+ (1:nL) # grab the last nL index to be used later to grab the last nL eigenvalues in 'evals'
alphaA = evecs[:, large_idx] # grab the principal eigenvectors
levals = evals[large_idx] # grab the eigenvalues corresponding to the primcipal eigenvectors
alphaA = alphaA * Diagonal(levals .^ (-0.5)) # normalization of principal eigenvectors (e-vectors in feature space)
Xp1 = zeros(Ntest, n)
for ii in 1:Ntest
i = whidx[ii]
# x = X[i, :]
x = transpose(X[i, :])
Kx = zeros(N, 1)
for j in 1:N
Kx[j, i] = exp(-(x - transpose(X[j, :])) * transpose(x - transpose(X[j, :]))/rho)
end
Kx_tilde = Kx - 1/N .* K * unit - 1/N .* unit * transpose(unit) * Kx + 1/(N^2) .* (transpose(unit) * K * unit) .* unit
cpK = (transpose(Kx_tilde) * alphaA)
# optimization
# xp1 = optimize(x -> stage2_rbf(x, X, K, cp, alphaA, rho; nargout=2), x, BFGS()) #
f_stage2_rbf(x) = stage2_rbf(x, X, K, cpK, alphaA, rho; nargout=2)
xp1 = optimize(f_stage2_rbf, x, BFGS())
Xp1[ii, :] = transpose(xp1)
end
return Xp1
end
function stage2_rbf(x::Transpose{Float64,Array{Float64,1}},
X::Array{Float64,2},
K::Array{Float64,2},
cpK::Array{Float64,2},
alphaA::Array{Float64,2},
rho::Float64;
nargout=1)
N = size(X, 1) # the number of samples
n = size(X, 2) # the number of features for each sample
Kx = zeros(N, 1)
for i in 1:N
Kx[i, 1] = exp(-(x - transpose(X[i, :])) * transpose(x - transpose(X[i, :]))/rho)
end
unit = ones(N, 1)
JKx = -2/rho * (Kx * ones(1, n)) .* (unit * x - X)
Kx_tilde = Kx - 1/N * K * unit - 1/N * unit * transpose(unit) * Kx + 1/(N^2) * (transpose(unit) * K * unit) .* unit
JKx_tilde = JKx - 1/N * unit * transpose(unit) * JKx
f = 1 .- 2/N * transpose(unit) * Kx + 1/(N^2) * transpose(unit) * K * unit - 2 * (transpose(Kx_tilde) * alphaA) * transpose(cpK)
if nargout > 1
gradf = -2/N * transpose(transpose(unit) * JKx) - 2 * (transpose(JKx_tilde) * alphaA) * transpose(cpK)
end
return f, gradf
end
function stage2_rbf(x::Array{Float64,1},
X::Array{Float64,2},
K::Array{Float64,2},
cpK::Array{Float64,2},
alphaA::Array{Float64,2},
rho::Float64;
nargout=1)
N = size(X, 1) # the number of samples
n = size(X, 2) # the number of features for each sample
Kx = zeros(N, 1)
for i in 1:N
Kx[i, 1] = exp(-transpose(x - transpose(X[i, :])) * (x - transpose(X[i, :]))/rho)
end
unit = ones(N, 1)
JKx = -2/rho * (Kx * ones(1, n)) .* (unit * transpose(x) - X)
Kx_tilde = Kx - 1/N * K * unit - 1/N * unit * transpose(unit) * Kx + 1/(N^2) * (transpose(unit) * K * unit) * unit
JKx_tilde = JKx - 1/N * unit * transpose(unit) * JKx
f = 1 - 2/N * transpose(unit) * Kx + 1/(N^2) * transpose(unit) * K * unit - 2 * (transpose(Kx_tilde) * alphaA) * cpK
if nargout > 1
gradf = -2/N * transpose(transpose(unit) * JKx) - 2 * (transpose(JKx_tilde) * alphaA) * cpK
end
return f, gradf
end
I do not know how to upload the dataset ’ digit8.mat’, but it is a set of 1000 samples of handwritten number eight vectorized images (784 pixels vectorized). Hence the dimensions of the dataset is 1000x784 pixels. If you know any way how to upload please let me know so I upload them for you.
Once more thank you very much for your support.
Cheers.
Ergnoor