I try to optimize the performance of the following function:
function iterateOPs!(::IterateOPs_basic, wf::WindFarm, sim::Sim, floris::Floris, floridyn::FloriDyn)
# Save turbine OPs
tmpOPStates = copy(wf.States_OP[wf.StartI, :])
tmpTStates = copy(wf.States_T[wf.StartI, :])
tmpWFSTates = copy(wf.States_WF[wf.StartI, :])
# Shift states
# Downwind step
step_dw = sim.time_step .* wf.States_WF[:, 1] .* sim.dyn.advection
wf.States_OP[:, 4] .+= step_dw
# Crosswind step
deflection = centerline(wf.States_OP, wf.States_T, wf.States_WF, floris, wf.D[1])
step_cw = deflection .- wf.States_OP[:, 5:6]
wf.States_OP[:, 5:6] .= deflection
# World coordinate system adjustment
phiW = angSOWFA2world.(wf.States_WF[:, 2])
wf.States_OP[:, 1] .+= cos.(phiW) .* step_dw .- sin.(phiW) .* step_cw[:, 1]
wf.States_OP[:, 2] .+= sin.(phiW) .* step_dw .+ cos.(phiW) .* step_cw[:, 1]
wf.States_OP[:, 3] .+= step_cw[:, 2]
# Circshift & init first OPs
# OPs
wf.States_OP = circshift(wf.States_OP, (1, 0))
wf.States_OP[wf.StartI, :] = tmpOPStates
# Turbines
wf.States_T = circshift(wf.States_T, (1, 0))
wf.States_T[wf.StartI, :] = tmpTStates
# Wind Farm
wf.States_WF = circshift(wf.States_WF, (1, 0))
wf.States_WF[wf.StartI, :] = tmpWFSTates
# Check if OPs are in order
for iT in 1:wf.nT
inds = wf.StartI[iT]:(wf.StartI[iT] + wf.nOP - 1)
indOP = sortperm(wf.States_OP[inds, 4])
if indOP != sort(indOP) # check if already sorted
wf.States_OP[inds, :] = wf.States_OP[inds[indOP], :]
wf.States_T[inds, :] = wf.States_T[inds[indOP], :]
wf.States_WF[inds, :] = wf.States_WF[inds[indOP], :]
end
end
return nothing
end
Any suggestions?
If you want to benchmark it, I suggest the following steps:
mkdir tmp
cd tmp
git clone https://github.com/ufechner7/FLORIDyn.jl
cd FLORIDyn.jl
julia --project
And then in Julia:
using Pkg
Pkg.instantiate()
include("test/bench_iterateops.jl")
On a Ryzen 7950X I get 149 µs.
While this is not bad, it is already 2.5 times faster than Matlab, I want more speed. This is an example of a simulation of a wind farm with 9 turbines, but in reality I need to simulate 100 turbines, and the execution time scales with the square of the number of turbines. And in the end I need to run optimizations, and for one optimization I probably need to run 1000 simulations or more.
In the end I want to optimize the performance of the function runFLORIDyn(set, wf, wind, sim, con, floridyn, floris)
. The function above is the sub-function with the highest execution time.
runFLORIDyn currently needs about 0.16 s to execute. But it also has way too high memory allocations:
include("examples/main.jl")
(1.40 M allocations: 617.218 MiB, 20.38% gc time)