Modelling heating curves with a PINN

I am Trying to model some temperature curves with a physically informed NN (PINN).

I the physical model I am trying to apply is an FOPDT.

NN = Chain(
                Dense(1 => 32, tanh),
		Dense(32 => 32, tanh),
		Dense(32 => 1) |> gpu)
	
function lossₚ(i, y) 

        run = floor(Int, i / 130)
	i₀ = run * 130
	iᵣ = i - i₀
	U = (iᵣ < 60) ? 100 : 0

	# currently from scipy.optimize fsolve
	Κ = 66.3
	τ = 145.7
	θ = 73.4

	yₑ =  grad.(y)
	yₘ = (.-y .+ Κ .* U .*(i₀ .- θ)) ./ τ
		
	return sum((yₑ .- yₘ) .^2)
end

lossₘ(y_hat, y) = sum((y_hat .- y).^2)
	
loss_ratio = 1.0 / 5.0
lossₜ(y_hat, y) = (1 - loss_ratio) * lossₘ(y_hat, y)+ loss_ratio * lossₚ(enumerate(y))

	
train_mask = 1: trunc(Int, length(static_df.T1)//2) 

X_train = static_df[train_mask, :Q1]
y_train = static_df[train_mask, :T1]
train_set = [(x, y) for  x in X_train, y in y_train]

η = 0.001
opt = Flux.setup(Adam(η), NN)
	
for epoch in 1:100 
   Flux.train!(NN, train_set, opt) do m, x, y
       lossₜ(m.(x) .+ y_train[1], y)
   end
end

beside the normal loss of the MSE I am trying to add a term for the ODE. But I am not so sure how to add the time depened term, that is why I used enumerate. In the training I also add the start condition to the NN.
Current problem is something with the type of the tanh activation function, but I don’t get it.

MethodError: no method matching (::Flux.Dense{typeof(tanh), Matrix{Float32}, Vector{Float32}})(::Float64)

So my detailed questions is how to solve this bug, but I addition I want whether I am god on track, or whether you have any other tip.

It’s saying that your neural network is taking in a Float64 which isn’t allowed, since it’s supposed to be a vector or matrix.

you don’t need to broadcast the model evaluation m, just put the different batches in different columns, so use x'.

Sorry. I did not really understand you answer. Which might be the reason of the problem?

What I now did substitute the real data with a muck up and used a simpler loss function without the casting and changed the definition of train_set.
I commented out the older code and added the new one the line below.
I can only say something is computed slowly.


# X_train = static_df[train_mask, :Q1]
X_train = 1:130
# y_train = static_df[train_mask, :T1]
y_train = [x^2+21.453 for x in 1:130]
# train_set = [(x, y) for  x in X_train, y in y_train]
train_set = [([x, ], y) for  x in X_train, y in y_train]

η = 0.001
opt = Flux.setup(Adam(η), NN)
	
for epoch in 1:100
  Flux.train!(NN, train_set, opt) do m, x, y
    # lossₜ(m(x) .+ y_train[1], y)
    sum((m(x) .- y).^2)
  end
end

I have tried to go from the toy example from the toy example to an the real data.
My net is. With 21.543 C being the starting condition.

	NN = Chain(
		Dense(1 => 16, tanh_fast),
		Dense(16 => 16, tanh_fast),
		Dense(16 => 1))
	

	g(t) = NN(t) .+ 21.543

Which I have adapted from @ChrisRackauckas course.

Well, and it uses only a single core. Hence, tried

  • gpu usage
  • mini batching

For batching I am struggling with the documentation to go from:

train_set = [([x,], y) for  x in X_train, y in y_train]

to use the DataLoader. My best guess was:

begin
	train_mask = 1: Int.(length(static_df.T1)//2)
		
	x_train = Float32.(static_df[train_mask, :Q1])

	y_train = Float32.(static_df[train_mask, :T1])

	loader = Flux.DataLoader((data=X_train, label=y_train), batchsize=64, shuffle=false)
	
	η = 0.001
	opt = Flux.setup(Adam(η), NN)
	
	η = 0.001
	opt = Flux.setup(Adam(η), NN)
	
	for epoch ∈ 1:5
		for mini_batch ∈ loader
		  	train!(g, mini_batch, opt) do m, x, y
			    # lossₜ(m(x) .+ y_train[1], y)
				ŷ = m(x)
				sum((ŷ .- y).^2)
			 end
	  end
	end
end

When I execute; I get the error:

MethodError: no method matching (::var"#57#58"{typeof(-), typeof(^), typeof(sum)})(::typeof(Main.var"workspace#30".g), ::Vector{Float32})
I understand it, as I have to convert my vector X into a Matrix? But I thought I already had.

About the GPU: I understood, that when I want to train on the GPU I only have to transfer the net onto the GPU. But


NN = Chain(
		Dense(1 => 16, tanh_fast),
		Dense(16 => 16, tanh_fast),
		Dense(16 => 1)) |> gpu

But this results in the error:

no method matching unsafe_convert(::Type{Ptr{Float32}}, ::CUDA.CuPtr{Float32})

I wanted to include the model: as second term for the loss function. I hope, I have correctly written the formula.

function lossₚ((i, x)::Tuple{Int, Float32})

	run = trunc(Int, i / 130)
	i₀ = run * 130
	iᵣ = i - i₀
	U = (iᵣ < 60) ? 100 : 0  # Input heat is either fully on, or off

	Κ = 0.997887
	τ = 1439.8
	θ = -661.1

	ŷ = g(x)
	Δyₗ = ŷ - g(x - 1)
	
	yᵣ = (.-ŷ .+ Κ .* U .*(i₀ .- θ)) ./ τ
	
	return sum((Δyₗ .- yᵣ) .^2)
end

lossₘ(ŷ, y) = sum((ŷ .- y).^2)

loss_ratio = 1.0 / 5.0
lossₜ(ŷ, y, x) = (1 - loss_ratio) * lossₘ(ŷ, y)+ loss_ratio * lossₚ.(enumerate(y))

But the code breaks. Probably at ŷ = g.(x) with the error: MethodError: no method matching (::Flux.Dense{typeof(NNlib.tanh_fast), Matrix{Float32}, Vector{Float32}})(::Float32)

  1. I do not understand, why m(x) works in the training loop than.
  2. I am also not sure, whether en enumerate is the correct thing, because I need the index of the train data set, as it marks the point in time. Or should I include it in the training dataset?

To be honest, I find the documentation of flux confusion, because I it does mix several styles. For instance the training is either done with train in a loop or functional? and with a withgradient function. I would be nice to have some paragraphs about theses styles. And how I can the several same things in these different ways.
For instance saving and print the loss.

@xtalax can you take a look at this one? It’s been sitting in my list too long.

Big thing though I can see is that it’s mixing Float32 and Float64.

Should be g(t) = NN(t) .+ Float32(21.543) and there’s a few other places with that. @Ordens_Ritter did you already correct that? I know someone asked a similar question on Slack with a bunch of Float64 values mixed into a Float32 computation.

Hi @ChrisRackauckas ,

I did a total overhaul.
I changed the input and am able to get the PINN running without the physical loss term, with the caveat that I not able to use mini batches,
or not able to use the gpu.

The network is:

NN = Chain(
	Dense(2 => 16, relu),
	Dense(16 => 1)
) |> f64
	

Streaming the network to the gpu… would be nice, but without the physical loss everything more than just is fast enough.

The X is the heating either 0 or 100 and the previous temperature and y the resulting updated temperature (in Kelvin). I get the data from a DataFrame.

train_mask = 1: Int.(length(static_df.T1)//2)
	
train_df = static_df[train_mask, :]

x_train_df = train_df[:,[:Q1, :T1prev]]
x_train_m = x_train_df|> Matrix
x_train_m  =  [x for x in eachrow(x_train_m)]

y_train = train_df[:, :T1]

train_set = [(x, y) for (x, y) in zip(x_train_m, y_train)]
	
η = 1e-5
opt = Flux.setup(Adam(η), NN)
epochs = 20

losses = []
	
for i in 1:epochs
	local_loss = []
	for (x, y) in train_set
		loss, grads = Flux.withgradient(NN) do m
			ŷ = m(x)
			lossₜ(ŷ, y)
		end
		Flux.update!(opt, NN, grads[1])
		push!(local_loss, loss)
	end
	push!(losses, mean(local_loss))
end

I try to take the physical loss not from the training data set but from a set of interesting points. Therefore I selected data points, where the first and second derivative are zero.
For each Data point I compute:

function loss_physical(x)

	Κ = 0.997887
	τ = 1439.8
	θ = -661.1

	# Q1 in df is index 4 in vector, T1prev is index 10
	ŷ = [x[4], x[10]] |> NN |> first
	prev_index = convert(Int, x[1]) - 1
	xₚᵣₑᵥ = static_m[prev_index, :]

	ŷₚᵣₑᵥ = [xₚᵣₑᵥ[4], xₚᵣₑᵥ[10]] |> NN |> first
	Δyₗ = ŷ - ŷₚᵣₑᵥ
	U = x[4]
	i₀ = x[8]  # duration
	
	yᵣ = (.-ŷ .+ Κ .* U .*(i₀ .- θ)) ./ τ
	
	return sum((Δyₗ .- yᵣ) .^2)
end

Perhaps there is a better version of getting:
τ_p \frac{dy(t)}{(dt)}=−y(t)+K_pu(t−θ_p)

The overall loss is:

loss_physical() = eachrow(sample_time_m) .|> loss_physical |> sum

loss_ratio = 1 / size(sample_time_df)[1]
	
loss_total(ŷ, y) = (1 - loss_ratio) * mae(ŷ, y)+ loss_ratio * loss_physical()

The current problem from my understanding is, that I compute the complete physical loss for every training point. Therefore I would need to get batching working.
In fact it is so slow, that I stopped after halve an hour.

Instead of doing it point-by-point via broadcast, make the input to the NN a matrix where each column is a sample and GPU it. It should be a rather quick computation like that. You’ll need to adjust [x[4], x[10]] |> NN |> first to accommodate that form.