I cloned your repo and changed the functions :
@views function iterateOPs!(::IterateOPs_basic, wf::WindFarm, sim::Sim, floris::Floris, floridyn::FloriDyn)
# Save turbine OPs
tmpOPStates = copy(wf.States_OP[wf.StartI, :])
tmpTStates = copy(wf.States_T[wf.StartI, :])
tmpWFSTates = copy(wf.States_WF[wf.StartI, :])
# Shift states
# Downwind step
step_dw = sim.time_step .* sim.dyn.advection .* wf.States_WF[:, 1]
wf.States_OP[:, 4] .+= step_dw
# Crosswind step
deflection = centerline(wf.States_OP, wf.States_T, wf.States_WF, floris, wf.D[1])
step_cw = deflection .- wf.States_OP[:, 5:6]
wf.States_OP[:, 5:6] .= deflection
# World coordinate system adjustment
@inbounds @simd for i in axes(wf.States_OP,1)
phiwi = angSOWFA2world(wf.States_WF[i, 2]) # Convert wind direction to world coordinates
cphiwi = cos(phiwi)
sphiwi = sin(phiwi)
ai = step_cw[i, 1]
sdwi = step_dw[i]
wf.States_OP[i, 1] += cphiwi * sdwi - sphiwi * ai
wf.States_OP[i, 2] += sphiwi * sdwi + cphiwi * ai
wf.States_OP[i, 3] += step_cw[i, 2]
end
# Circshift & init first OPs
# OPs
wf.States_OP = circshift(wf.States_OP, (1, 0))
wf.States_OP[wf.StartI, :] .= tmpOPStates
# Turbines
wf.States_T = circshift(wf.States_T, (1, 0))
wf.States_T[wf.StartI, :] .= tmpTStates
# Wind Farm
wf.States_WF = circshift(wf.States_WF, (1, 0))
wf.States_WF[wf.StartI, :] .= tmpWFSTates
# Check if OPs are in order
buf = zeros(Int,size(wf.States_OP, 1))
for iT in 1:wf.nT
inds = wf.StartI[iT]:(wf.StartI[iT] + wf.nOP - 1)
indOP = buf[inds]
sortperm!(indOP,wf.States_OP[inds, 4])
if ! issorted(indOP) # check if already sorted
wf.States_OP[inds, :] .= wf.States_OP[inds[indOP], :]
wf.States_T[inds, :] .= wf.States_T[inds[indOP], :]
wf.States_WF[inds, :] .= wf.States_WF[inds[indOP], :]
end
end
return nothing
end
and
function centerline(states_op, states_t, states_wf, floris, d_rotor)
N = size(states_op, 1) # number of rows
Δ = Matrix{ComplexF64}(undef, N, 2) # output
is8 = 1 / sqrt(8)
s2 = sqrt(2)
α, β = floris.alpha, floris.beta
k_a, k_b = floris.k_a, floris.k_b
for i ∈ 1:N # ← one tight SIMD loop
############# 1. unpack state rows ####################################
a = states_t[i, 1]
b = states_t[i, 2]
c = states_t[i, 3]
d = states_wf[i, 3]
OP = states_op[i, 4]
############# 2. basic derived quantities #############################
C_T = calcCt(a, b)
yaw = -deg2rad(b)
cosyaw = cos(yaw)
I = sqrt(c*c + d*d)
############# 3. core length x₀ #######################################
x₀ = (cosyaw * (1 + sqrt(1 - C_T)) /
(s2 * (α * I + β * (1 - sqrt(1 - C_T))))) * d_rotor
############# 4. wake expansion (σ_y / σ_z) ###########################
k_y = k_a * I + k_b
k_z = k_y # identical expression
diff = OP - x₀
f1 = max(diff, 0.0)
f2 = min(OP / x₀, 1.0)
σ_y = f1 * k_y + f2 * cosyaw * d_rotor * is8
σ_z = f1 * k_z + f2 * d_rotor * is8
############# 5. deflection angles ####################################
Θ = 0.3 * yaw / cosyaw * (1 - sqrt(1 - C_T * cosyaw))
############# 6. near‑field / far‑field pieces #########################
Δ_nfw = Θ * min(OP, x₀)
Δ_fw₁ = Θ / 14.7 * sqrt(cosyaw / (k_y * k_z * C_T)) *
(2.9 + 1.3 * sqrt(1 - C_T) - C_T)
num = (1.6 + sqrt(C_T)) *
(1.6 * sqrt((8 * σ_y * σ_z) / (d_rotor^2 * cosyaw)) - sqrt(C_T))
den = (1.6 - sqrt(C_T)) *
(1.6 * sqrt((8 * σ_y * σ_z) / (d_rotor^2 * cosyaw)) + sqrt(C_T))
Δ_fw₂ = log(complex(num / den, 0.0)) # keep domain identical
factor = sign(OP - x₀) / 2 + 0.5 # 0, 0.5, or 1
Δy = Δ_nfw + factor * Δ_fw₁ * Δ_fw₂ * d_rotor
############# 7. store result ##########################################
Δ[i, 1] = Δy
Δ[i, 2] = 0 # Δz was always zero in the original
end
return Δ
end
down to 27 allocs and around 1.5x performance.
what takes most of the time is the log in the centerline and the 3 circshift which I’m sure we could make so much better doing all 3 at the same time