I just translated a neuronal network from Python to Julia. It is already significantly faster than the Python NN, but written in Pascal it is still almost 20 times faster than the Julia version. By using the `@time`

macro I identified this code block as the main source of slow performance:

```
function dosomething(weights::Vector{Array{Float64,2}}, bias::Vector{Array{Float64,1}}, lR::Real, dError::Vector{Array{Float64,1}}, inputs::Vector{Array{Float64,1}})
lLay = length(dError)
m, n = size(weights)
for i in 1:lLay
for m in 1:(size(weights[i])[1])
bias[i][m] -= lR * dError[i][m]
for n in 1:(size(weights[i])[2])
weights[i][m, n] -= lR * dError[i][m] * inputs[i][n]
end
end
end
end
```

This code block takes 0.2 seconds on my computer whereas all other parts of the NN take less than 0.1 seconds to run. I tried to get better performance with `@simd`

and `@inbounds`

but that didn’t achieve anything. Neither did changing the loop order from row to column:

```
function dosomething2(weights::Vector{Array{Float64,2}}, bias::Vector{Array{Float64,1}}, lR::Real, dError::Vector{Array{Float64,1}}, inputs::Vector{Array{Float64,1}})
lLay = length(dError)
for i in 1:lLay
for m in 1:(size(weights[i])[1])
bias[i][m] -= lR * dError[i][m]
end
end
for i in 1:lLay
for n in 1:(size(weights[i])[2])
for m in 1:(size(weights[i])[1])
weights[i][m, n] -= lR * dError[i][m] * inputs[i][n]
end
end
end
end
```

Can someone show me how to get efficient code?