LUX LSTM network with multiple inputs

Can anyone help me in using LSTM to predict temperature? I have four input features and I want to predict the temperature in the future

using Random, Statistics, CSV, DataFrames, MLDatasets, ComponentArrays, DataInterpolations, Lux, MLUtils, Optimization, Zygote
using Optimisers, OptimizationOptimisers,Optim, OptimizationOptimJL,Plots, Flux
using MLJ
# File path
file_path = raw "real_values_ex_00_fixed_time_step.csv"
data_frame = CSV.read(file_path, DataFrame; header = true)
#add a fourth column with all the row=20 and the name is t_ext
data_frame.t_ext = fill(20.0, size(data_frame, 1))
Data = DataFrames.DataFrame(data_frame);

# Define the proportions for the split
train_proportion = 0.7

# Split the dataset
train, test = partition(1:size(Data, 1), train_proportion, shuffle=false)

# Extract training and test data
train_data = Data[train, :]
test_data = Data[test, :]

print(size(train_data))
print(size(test_data))
print(typeof(train_data))
print(typeof(test_data))

#train data has 4 inputs and 1 output. 

x_train = train_data[1:25201, 1:4]
y_train = train_data[1:25201, :2]
x_test= test_data[1:10800, 1:4]
y_test= test_data[1:10800, :2]
model = Lux.Chain(Lux.Dense(4 => 8), Lux.Recurrence(Lux.LSTMCell(8 => 16)), Lux.Dense(16 => 1))
rng = Random.default_rng()
Random.seed!(rng, 0)
ps, st = Lux.setup(rng, model)

Could you tell me a bit more about the data you’re trying to model? Is it annual data, monthly data, hourly data, or something else? What are you measuring the temperature of? What are the other inputs? And could I ask why you’re using a neural network for this? A neural network feels like overkill.

I have the data of the water temperature of a pot, the gas flow rate, the timestamp every second, and the external temperature. I want to use neural networks to compare them with neural ODEs, so with data coming from the solution of my ODE that incorporates the exogenous variables that I use in the NN.

OK, so in this case, a simple ODE model based on physics is probably going to outperform either a neural ODE or even a neural network; that being said, you can build an LSTM model using Lux of Flux. What are you having trouble with that the documentation hasn’t been able to explain to you?

Can you take a look at this data and how do I create my LSTM?

using Random, Statistics, CSV, DataFrames, MLDatasets, ComponentArrays, DataInterpolations, Lux, MLUtils, Optimization, Zygote
using Optimisers, OptimizationOptimisers,Optim, OptimizationOptimJL,Plots, Flux
using MLJ
using Flux
# File path
file_path = raw"es_02_real_data_.csv"
data_frame = CSV.read(file_path, DataFrame; header = true)
df = DataFrames.DataFrame(data_frame);

mean_df4=mean(df[:, 4])
std_df4=std(df[:, 4])

df[:, 2] = (df[:, 2] .- mean(df[:, 2])) ./ std(df[:, 2])
df[:, 3] = (df[:, 3] .- mean(df[:, 3])) ./ std(df[:, 3])
df[:, 4] = (df[:, 4] .- mean(df[:, 4])) ./ std(df[:, 4])


# Define the proportions for the split
train_proportion = 0.80

# Split the dataset
train, test = partition(1:(size(df, 1)-1), train_proportion, shuffle=false)

# Extract training and test data
train_data = df[train, :]
test_data = df[test, :]

train_data = permutedims(Matrix(train_data[:, 2:end]))
test_data = permutedims(Matrix(test_data[:, 2:end]))

x_train=train_data[1:3,:]
y_train=train_data[:3,:]
x_test=test_data[1:3,:]

y_test=test_data[:3,:]
batchsize=10

print(size(test_data,2))

# Reshape the vector to 1xlen
# Reshape the matrix into a 3D array with dimensions 4×input_sequence×batch_size
x_train = reshape(x_train, (3,Int(size(train_data,2)/batchsize),batchsize))


y_train = reshape(y_train, (1,Int(size(train_data,2)/batchsize),10))
x_test = reshape(x_test, (3,1,Int(size(test_data,2))))
y_test = reshape(y_test, (1,1,Int(size(test_data,2))))


function compute_loss(predicted, actual)
    return mean((predicted .- actual).^2)
end


# Now you can create an LSTM layer and pass the reshaped data through it
model = Lux.Chain(Lux.Dense(3 => 3), Lux.Recurrence(Lux.LSTMCell(3 => 8)), Lux.Dense(8 => 1)) # Replace hidden_units with the number of hidden units you want

rng = Random.default_rng()
ps, st = Lux.setup(rng, model)

num_batches = Int(size(train_data,2)/batchsize)
batch_size = 1 # 10x4 values at time
loss_fn(y_pred, y_true) = Flux.mse(y_pred, y_true)


losses=[]
opt = Optimisers.Adam(0.05)
opt_state = Optimisers.setup(opt, ps)

for start in 1:batch_size:num_batches
    bend = min(start + batch_size - 1, num_batches)
    x_batch = x_train[:, start:bend, :]
    y_batch = y_train[:, start:bend, :] 
    y_batch = reshape(y_batch, (1, size(y_batch, 3)))
    global ps
    gs = gradient(ps) do ps
        y_pred = model(x_batch, ps, st)[1]
        loss_value = loss_fn(y_pred, y_batch)
        return loss_value
    
    end
    global loss_value
    global y_pred
    global opt_state    
    opt_state, ps = Optimisers.update!(opt_state, ps, gs[1])
    global losses
        println("Loss: $loss_value")
end

display(plot(losses))

predicted_vals=model(x_test,ps,st)[1]

plot(predicted_vals[1,:], label="Prediction")
display(plot!(y_test[1,1,:], label="Test data normalized"))

denormalized_predicted_vals = predicted_vals .* std_df4 .+ mean_df4
denormalized_actual_vals = y_test .* std_df4 .+ mean_df4

plot(denormalized_predicted_vals[1,:], label="Prediction")
display(plot!(denormalized_actual_vals[1,1,:], label="Test_data"))

x_train=reshape(x_train,(3,1,Int(size(train_data,2))))
y_train=reshape(y_train,(1,1,Int(size(train_data,2))))
trained_val=model(x_train,ps,st)[1]

plot(trained_val[1,:], label="Prediction")
display(plot!(y_train[1,1,:], label="Training_data normalized"))

denormalized_trained_val = trained_val .* std_df4 .+ mean_df4
denormalized_y_train = y_train .* std_df4 .+ mean_df4

plot(denormalized_trained_val[1,:], label="Prediction")
display(plot!(denormalized_y_train[1,1,:], label="Training_data"))