Best way for linear regression problem on product features

I thought I’d try another method. Inverse regression methods are useful if one is interested in a low-rank predictor subspace:

using DataFrames, LinearAlgebra, Statistics

function indmat(Y::AbstractVector; nq=4)
    # Slice Y using quantiles
    q = quantile(Y, range(0, 1.0, length=nq+1))
    @show q
    z = [zeros(nq-1); 0.1]
    ind = (Y .>= q[1:end-1]')  .&  (Y .< (q[2:end] .+ z)')
    return ind*1.0
end


function lda(X, Y)
    # Inverse regression
    # similar to linear discriminant analysis
    μX = mean(X, dims=1)
    Xw = svd(X.-μX).U # whiten X
    Xw ./= std(Xw, dims=1) # make unit variance
    Ya = indmat(Y) # slice Y
    Xhat = Ya*(cov(Ya)\cov(Ya,Xw))
    # Try a truncated or approx SVD here instead?
    F = svd(Xhat)
    DC = Xw*F.V[:, axes(Ya, 2)]     # Features or Direction Components
    df = DataFrame(DC.-mean(DC, dims=1), [:DC1, :DC2, :DC3, :DC4])
    df.Y = Y  
    return df
end


m, n, N = 100, 500, 100
X, Z, Y = rand(N, n), rand(N, m), rand(N)
Wᵀ =reshape([conj(X[i,k] * Z[i,ℓ]) for ℓ in 1:m, k in 1:n, i in 1:N], m*n,N)
@show size(Wᵀ);

df = lda(Wᵀ', Y)
dfs = stack(df, Not(:Y)) 
plot(dfs, x=:value, y=:Y, xgroup=:variable,  Geom.subplot_grid(
Geom.point, free_x_axis=true), Guide.xlabel(nothing))

These random numbers have interesting features! If you want a predictive model, then simply build it from the DCs.