I am working with some MLPs and noticed that TensorFlow is much faster than Flux. In the examples below, Flux requires about 20 minutes and TensorFlow requires just over a minute. Am I doing something incorrectly?

Thank you in advance for your feedback

**Flux**

## Summary

```
using MKL, Flux, Distributions, Random, ProgressMeter
using Flux: params
Random.seed!(85955)
function rand_parms()
μ = rand(Uniform(-3, 3))
σ′ = rand(Uniform(.1, 2))
return (;μ,σ′)
end
function make_training_data(n)
output = fill(0.0, 3, n)
μ,σ′ = rand_parms()
x = rand(Normal(μ,σ′ ), n)
for (i,v) in enumerate(x)
output[:,i] = [μ, σ′ ,v]
end
return output
end
# number of parameter vectors for training
n_parms = 2500
# number of data points per parameter vector
n_samples = 250
# training data
train_x = mapreduce(_ -> make_training_data(n_samples), hcat, 1:n_parms)
# true values
train_y = map(i -> pdf(Normal(train_x[1,i], train_x[2,i]), train_x[3,i]), 1:size(train_x,2))
train_y = reshape(train_y, 1, length(train_y))
train_data = Flux.Data.DataLoader((train_x, train_y), batchsize=1000)
model = Chain(
Dense(3, 100, tanh),
Dense(100, 100, tanh),
Dense(100, 120, tanh),
Dense(120, 1, identity)
)
# loss function
loss_fn(a, b) = Flux.huber_loss(model(a), b)
# optimization algorithm
opt = ADAM(0.002)
n_epochs = 50
meter = Progress(n_epochs)
train_loss = zeros(n_epochs)
@showprogress for i in 1:n_epochs
Flux.train!(loss_fn, params(model), train_data, opt)
train_loss[i] = loss_fn(train_x, train_y)
next!(meter; showvalues = [(:loss,train_loss[i])])
end
```

**TensorFlow**

## Summary

```
import tensorflow as tf
import numpy as np
from scipy.stats import norm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
import matplotlib.pyplot as plt
import time
n_parms = 2_500
n_points = 250
np.random.seed(5584)
x_train = np.zeros((n_parms * n_points, 3))
row = 0
for _ in range(n_parms):
mu = np.random.uniform(-3, 3)
sigma = np.random.uniform(.1, 2)
for _ in range(n_points):
x = np.random.normal(mu, sigma)
x_train[row,:] = np.array([mu, sigma, x])
row = row + 1
y_train = norm.pdf(x_train[:,2], x_train[:,0], x_train[:,1])
tf.random.set_seed(63236)
model = Sequential([
Flatten(input_shape = (3, 1)),
Dense(100, activation = 'tanh'),
Dense(100, activation = 'tanh'),
Dense(120, activation = 'tanh'),
Dense(1, activation = 'linear')
])
model.compile(optimizer=tf.optimizers.Adam(learning_rate=.002),
loss='huber', metrics=[tf.keras.metrics.RootMeanSquaredError()])
start_time = time.time()
losses = model.fit(x_train, y_train, epochs = 50,
batch_size = 1000)
end_time = time.time()
print('run time: ', end_time - start_time)
plt.plot(losses.history['root_mean_squared_error'])
```