I thought I’d try another method. Inverse regression methods are useful if one is interested in a low-rank predictor subspace:
using DataFrames, LinearAlgebra, Statistics
function indmat(Y::AbstractVector; nq=4)
# Slice Y using quantiles
q = quantile(Y, range(0, 1.0, length=nq+1))
@show q
z = [zeros(nq-1); 0.1]
ind = (Y .>= q[1:end-1]') .& (Y .< (q[2:end] .+ z)')
return ind*1.0
end
function lda(X, Y)
# Inverse regression
# similar to linear discriminant analysis
μX = mean(X, dims=1)
Xw = svd(X.-μX).U # whiten X
Xw ./= std(Xw, dims=1) # make unit variance
Ya = indmat(Y) # slice Y
Xhat = Ya*(cov(Ya)\cov(Ya,Xw))
# Try a truncated or approx SVD here instead?
F = svd(Xhat)
DC = Xw*F.V[:, axes(Ya, 2)] # Features or Direction Components
df = DataFrame(DC.-mean(DC, dims=1), [:DC1, :DC2, :DC3, :DC4])
df.Y = Y
return df
end
m, n, N = 100, 500, 100
X, Z, Y = rand(N, n), rand(N, m), rand(N)
Wᵀ =reshape([conj(X[i,k] * Z[i,ℓ]) for ℓ in 1:m, k in 1:n, i in 1:N], m*n,N)
@show size(Wᵀ);
df = lda(Wᵀ', Y)
dfs = stack(df, Not(:Y))
plot(dfs, x=:value, y=:Y, xgroup=:variable, Geom.subplot_grid(
Geom.point, free_x_axis=true), Guide.xlabel(nothing))
These random numbers have interesting features! If you want a predictive model, then simply build it from the DCs.
