Hi!
I created a structure for ML (for a generalized linear modell) and i want to store the linking function by initializing the structure using the name of the distribution.
The code of the structure:
mutable struct GLM
    learning_input::Matrix{Float64}
    learning_output::Vector{Float64}
    weights::Vector{Float64}
    distribution::String          # Distribution (normal, exponential, etc.)
    link_function::Function       # Link function
    cost_convergence_error::Float64
    cost_history::Vector{Float64}
    name::String
    description::String
    function GLM(learning_input::Matrix{Float64}, learning_output::Vector{Float64}, distribution::String="normal",name::String="Generalized Linear Modell", description::String="This modell is for a generalized linear modell!")
        @assert size(learning_input, 1) == length(learning_output) "Inputs and outputs sizes do not match"
        n_features = size(learning_input, 2)
        weights = zeros(n_features)               # Generate initial weights as zeros
        cost_convergence_error = 0.0            # Default cost convergence error
        cost_history = Float64[]                # Empty cost history vector
        link_fn=get_link_function(distribution, weights)
        new(learning_input, learning_output, weights, distribution, link_fn, cost_convergence_error, cost_history, name, description)
    end
end
The code for the get_link_function:
    function get_link_function(distribution::String, weights)
        if distribution == "normal"
            return (
                x -> dot(x, weights)                     # Identity with weights
            )
        elseif distribution in ["exponential", "poisson"]
            return (
                x -> log(dot(x, weights))                # Log with weighted input
            )
        elseif distribution == "gamma"
            return (
                x -> 1 / dot(x, weights)                 # Inverse with weights
            )
        elseif distribution == "inverse_gaussian"
            return (
                x -> 1 / (dot(x, weights)^2)             # Inverse square with weights
            )
        elseif distribution in ["bernoulli", "binomial"]
            return (
                x -> log(dot(x, weights) / (1 - dot(x, weights)))  # Logit with weights
            )
        elseif distribution in ["categorical", "multinomial"]
            return (
                x -> log.(dot(x, weights))               # Log-softmax
            )
        else
            error("Unsupported distribution: ($distribution). Supported options: normal, exponential, poisson, gamma, inverse_gaussian, bernoulli, binomial, categorical, multinomial.")
        end
    end
Additional function for generating data:
function generate_data(n_samples, n_features, distribution, noise=0.1)
    Random.seed!(42)  # For reproducibility
    X = rand(n_samples, n_features)              # Random features
    # Add bias term consistently (if needed)
    #X = hcat(ones(n_samples), X)                 # Add bias term
    true_weights = randn(n_features)  # This should create a vector with size n_features
    link_fn, link_inv_fn = get_link_fn(distribution)
    # Generate dependent variable
    linear_predictor = X * true_weights
    if distribution == "normal"
        y = linear_predictor .+ noise * randn(n_samples)  # Add Gaussian noise
    elseif distribution in ["exponential", "poisson"] || distribution == "bernoulli" || distribution == "binomial"
        y = exp.(linear_predictor) .+ noise               # Non-negative
    elseif distribution == "gamma"
        y = 1 ./ (linear_predictor .+ 1e-6)
    else
        error("Unsupported distribution: $distribution")
    end
    return X, y, true_weights
end
And the main() looks like:
function main()
    # Parameters
    n_samples = 100
    n_features = 20
    noise = 0.2
    width = 72  # For printing
    decimals = 20
    distribution = "normal"
    # Generate synthetic data
    X, y, true_weights = generate_data(n_samples, n_features, distribution, noise)
    # Initialize GLM with generated data
    glm = GLM(X, y, distribution)
end
I always receive the following error for the returning the function of normal distribution in get_link_function():
ERROR: DimensionMismatch: dot product arguments have lengths 2000 and 20
I would like to have the function inside the structure so I do not have to call it in the main() to give value to link_fn of GLM. Could you tell what is the problem with my code?
Thank you for your help