Multithreading with GC fails

I have the following function:

function getMeasurementsP(mx, my, nM, zh, wf::WindFarm, set::Settings, floris::Floris, wind::Wind)
    size_mx = size(mx)
    mz = zeros(size_mx[1], size_mx[2], nM)
    
    # Create thread-local buffers to avoid race conditions
    thread_buffers = [deepcopy(wf) for _ in 1:nthreads()]
    
    # Pre-allocate arrays for each thread buffer
    original_nT = wf.nT
    
    # Pre-allocate computation buffers for each thread
    thread_comp_buffers = Vector{NamedTuple}(undef, nthreads())
    
    for tid in 1:nthreads()
        GP = thread_buffers[tid]
        GP.nT = original_nT + 1  # Original turbines + 1 grid point
        
        # Pre-allocate dependency structure
        GP.dep = Vector{Vector{Int64}}(undef, GP.nT)
        for i in 1:original_nT
            GP.dep[i] = Int64[]  # Original turbines are independent (no dependencies)
        end
        GP.dep[end] = collect(1:original_nT)  # Grid point depends on all original turbines
        
        # Pre-allocate extended arrays
        GP.StartI = hcat(wf.StartI, [wf.StartI[end] + 1])
        GP.posBase = vcat(wf.posBase, zeros(1, 3))  # Will be updated for each grid point
        GP.posNac = vcat(wf.posNac, zeros(1, 3))   # Will be updated for each grid point
        GP.States_T = vcat(wf.States_T, zeros(1, size(wf.States_T, 2)))
        GP.D = vcat(wf.D, [0.0])  # Grid point has 0 diameter
        
        # Ensure all necessary fields are copied to avoid shared references
        if hasfield(typeof(wf), :intOPs)
            GP.intOPs = deepcopy(wf.intOPs)
        end
        
        # Pre-allocate buffers for non-allocating interpolateOPs!
        GP.intOPs = [zeros(length(GP.dep[iT]), 4) for iT in 1:GP.nT]
        dist_buffer = zeros(wf.nOP)
        sorted_indices_buffer = zeros(Int, wf.nOP)
        
        thread_comp_buffers[tid] = (
            dist_buffer = dist_buffer,
            sorted_indices_buffer = sorted_indices_buffer
        )
    end
    
    # Parallel loop using @threads
    GC.enable(false)
    @threads for iGP in 1:length(mx)
        # Get thread-local buffers
        tid = threadid()
        GP = thread_buffers[tid]
        buffers = thread_comp_buffers[tid]
        
        xGP = mx[iGP]
        yGP = my[iGP]
        
        # Thread-safe updates: create copies to avoid modifying shared arrays
        GP.posBase[end, 1] = xGP
        GP.posBase[end, 2] = yGP
        GP.posBase[end, 3] = 0.0
        
        GP.posNac[end, 1] = 0.0
        GP.posNac[end, 2] = 0.0
        GP.posNac[end, 3] = zh
        
        # Reset the grid point state (thread-safe element-wise assignment)
        for j in 1:size(GP.States_T, 2)
            GP.States_T[end, j] = 0.0
        end
        
        # Recalculate interpolated OPs for the updated geometry (non-allocating)
        interpolateOPs!(GP.intOPs, GP, buffers.dist_buffer, buffers.sorted_indices_buffer)

        tmpM, _ = setUpTmpWFAndRun(set, GP, floris, wind)
        
        # Extract only the result for the grid point (last "turbine")
        @views gridPointResult = tmpM[end, :]
        
        # Convert linear index to subscripts (thread-safe)
        rw, cl = divrem(iGP - 1, size_mx[1])
        rw += 1
        cl += 1
        
        # Thread-safe assignment using @views to avoid race conditions
        @views mz[rw, cl, 1:3] .= gridPointResult
    end
    GC.enable(true)
    
    return mz

It is a bit ugly because I try to pre-allocate everything before the loop, but it works. On the other hand, it only works with GC disabled. I disable it before the loop and enable it after the loop. If I don’t do that, it crashes.

So I think the loop is still not really threadsafe. What am I doing wrong?

I know I should use OMyThreads.jl, I will try that next, but even with normal threads it should be possible to write this function in a thread-safe way, or not?

Two ways you could try :
1- use @threads :static to make sure the threadid points to the hardware thread and never get rescheduled.
2- use the new OncePerThread() function and remove all call to threadid which is discouraged

Sidenote : can you make sure your struct are well parameterized feels weird the GC need to do something in there and yes GC leads to rescheduling of the ids but it can happen randomly too which is why 1 or better 2 is important

1 Like

As Yolhan explained, the use of threadid here is quite dangerous. Tasks can migrate between threads.

See this blog post for additional details:

1 Like

This doesn’t help, it still crashes with GC enabled.

Ok keep it though, remove the @threads and try to do either a @profview_allocs on vscode or simply a @code_warntype

julia> @code_warntype measurements = FLORIDyn.getMeasurementsP(mx, my, nM, zh, wf, set, floris, wind)
MethodInstance for FLORIDyn.getMeasurementsP(::Matrix{Float64}, ::Matrix{Float64}, ::Int64, ::Float64, ::WindFarm, ::Settings, ::Floris, ::Wind)
  from getMeasurementsP(mx, my, nM, zh, wf::WindFarm, set::Settings, floris::Floris, wind::Wind) @ FLORIDyn ~/repos/FLORIDyn.jl/src/visualisation/calc_flowfield.jl:193
Arguments
  #self#::Core.Const(FLORIDyn.getMeasurementsP)
  mx::Matrix{Float64}
  my::Matrix{Float64}
  nM::Int64
  zh::Float64
  wf::WindFarm
  set::Settings
  floris::Floris
  wind::Wind
Locals
  @_10::Union{Nothing, Tuple{Int64, Int64}}
  @_11::Union{Nothing, Tuple{Int64, Int64}}
  #69::FLORIDyn.var"#69#71"{WindFarm}
  thread_comp_buffers::Vector{NamedTuple}
  original_nT::Int64
  thread_buffers::Vector{WindFarm}
  mz::Array{Float64, 3}
  size_mx::Tuple{Int64, Int64}
  #70::FLORIDyn.var"#70#72"{WindFarm}
  @_19::Union{Nothing, Tuple{Int64, Int64}}
  tid@_20::Int64
  sorted_indices_buffer::Vector{Int64}
  dist_buffer::Vector{Float64}
  GP@_23::WindFarm
  i::Int64
  @_25::Int64
  @_26::Int64
  @_27::Union{Nothing, Tuple{Int64, Int64}}
  iGP::Int64
  cl::Int64
  rw::Int64
  gridPointResult::SubArray{Float64, 1, Matrix{Float64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}
  tmpM::Matrix{Float64}
  yGP::Float64
  xGP::Float64
  buffers::NamedTuple
  GP@_36::WindFarm
  tid@_37::Int64
  j::Int64
Body::Array{Float64, 3}
1 ──        Core.NewvarNode(:(@_10))
β”‚    %2   = FLORIDyn.size::Core.Const(size)
β”‚           (size_mx = (%2)(mx))
β”‚    %4   = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %5   = size_mx::Tuple{Int64, Int64}
β”‚    %6   = Base.getindex(%5, 1)::Int64
β”‚    %7   = size_mx::Tuple{Int64, Int64}
β”‚    %8   = Base.getindex(%7, 2)::Int64
β”‚           (mz = (%4)(%6, %8, nM))
β”‚    %10  = FLORIDyn.:(var"#69#71")::Core.Const(FLORIDyn.var"#69#71")
β”‚    %11  = Core.typeof(wf)::Core.Const(WindFarm)
β”‚    %12  = Core.apply_type(%10, %11)::Core.Const(FLORIDyn.var"#69#71"{WindFarm})
β”‚           (#69 = %new(%12, wf))
β”‚    %14  = #69::FLORIDyn.var"#69#71"{WindFarm}
β”‚    %15  = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %16  = FLORIDyn.nthreads::Core.Const(Base.Threads.nthreads)
β”‚    %17  = (%16)()::Int64
β”‚    %18  = (%15)(1, %17)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚    %19  = Base.Generator(%14, %18)::Core.PartialStruct(Base.Generator{UnitRange{Int64}, FLORIDyn.var"#69#71"{WindFarm}}, Any[FLORIDyn.var"#69#71"{WindFarm}, Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])])
β”‚           (thread_buffers = Base.collect(%19))
β”‚           (original_nT = Base.getproperty(wf, :nT))
β”‚    %22  = FLORIDyn.NamedTuple::Core.Const(NamedTuple)
β”‚    %23  = Core.apply_type(FLORIDyn.Vector, %22)::Core.Const(Vector{NamedTuple})
β”‚    %24  = FLORIDyn.undef::Core.Const(UndefInitializer())
β”‚    %25  = FLORIDyn.nthreads::Core.Const(Base.Threads.nthreads)
β”‚    %26  = (%25)()::Int64
β”‚           (thread_comp_buffers = (%23)(%24, %26))
β”‚    %28  = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %29  = FLORIDyn.nthreads::Core.Const(Base.Threads.nthreads)
β”‚    %30  = (%29)()::Int64
β”‚    %31  = (%28)(1, %30)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚           (@_11 = Base.iterate(%31))
β”‚    %33  = @_11::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %34  = (%33 === nothing)::Bool
β”‚    %35  = Base.not_int(%34)::Bool
└───        goto #9 if not %35
2 ┄─        Core.NewvarNode(:(#70))
β”‚           Core.NewvarNode(:(sorted_indices_buffer))
β”‚           Core.NewvarNode(:(dist_buffer))
β”‚    %40  = @_11::Tuple{Int64, Int64}
β”‚           (tid@_20 = Core.getfield(%40, 1))
β”‚    %42  = Core.getfield(%40, 2)::Int64
β”‚    %43  = thread_buffers::Vector{WindFarm}
β”‚    %44  = tid@_20::Int64
β”‚           (GP@_23 = Base.getindex(%43, %44))
β”‚    %46  = FLORIDyn.:+::Core.Const(+)
β”‚    %47  = original_nT::Int64
β”‚    %48  = (%46)(%47, 1)::Int64
β”‚    %49  = GP@_23::WindFarm
β”‚           Base.setproperty!(%49, :nT, %48)
β”‚    %51  = FLORIDyn.Vector::Core.Const(Vector)
β”‚    %52  = Core.apply_type(FLORIDyn.Vector, FLORIDyn.Int64)::Core.Const(Vector{Int64})
β”‚    %53  = Core.apply_type(%51, %52)::Core.Const(Vector{Vector{Int64}})
β”‚    %54  = FLORIDyn.undef::Core.Const(UndefInitializer())
β”‚    %55  = GP@_23::WindFarm
β”‚    %56  = Base.getproperty(%55, :nT)::Int64
β”‚    %57  = (%53)(%54, %56)::Vector{Vector{Int64}}
β”‚    %58  = GP@_23::WindFarm
β”‚           Base.setproperty!(%58, :dep, %57)
β”‚    %60  = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %61  = original_nT::Int64
β”‚    %62  = (%60)(1, %61)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚           (@_19 = Base.iterate(%62))
β”‚    %64  = @_19::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %65  = (%64 === nothing)::Bool
β”‚    %66  = Base.not_int(%65)::Bool
└───        goto #5 if not %66
3 ┄─ %68  = @_19::Tuple{Int64, Int64}
β”‚           (i = Core.getfield(%68, 1))
β”‚    %70  = Core.getfield(%68, 2)::Int64
β”‚    %71  = Base.getindex(FLORIDyn.Int64)::Vector{Int64}
β”‚    %72  = GP@_23::WindFarm
β”‚    %73  = Base.getproperty(%72, :dep)::Vector{Vector{Int64}}
β”‚    %74  = i::Int64
β”‚           Base.setindex!(%73, %71, %74)
β”‚           (@_19 = Base.iterate(%62, %70))
β”‚    %77  = @_19::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %78  = (%77 === nothing)::Bool
β”‚    %79  = Base.not_int(%78)::Bool
└───        goto #5 if not %79
4 ──        goto #3
5 ┄─ %82  = GP@_23::WindFarm
β”‚    %83  = Base.getproperty(%82, :dep)::Vector{Vector{Int64}}
β”‚    %84  = FLORIDyn.collect::Core.Const(collect)
β”‚    %85  = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %86  = original_nT::Int64
β”‚    %87  = (%85)(1, %86)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚    %88  = (%84)(%87)::Vector{Int64}
β”‚    %89  = Base.lastindex(%83)::Int64
β”‚           Base.setindex!(%83, %88, %89)
β”‚    %91  = FLORIDyn.hcat::Core.Const(hcat)
β”‚    %92  = Base.getproperty(wf, :StartI)::Matrix{Int64}
β”‚    %93  = FLORIDyn.:+::Core.Const(+)
β”‚    %94  = Base.getproperty(wf, :StartI)::Matrix{Int64}
β”‚    %95  = Base.lastindex(%94)::Int64
β”‚    %96  = Base.getindex(%94, %95)::Int64
β”‚    %97  = (%93)(%96, 1)::Int64
β”‚    %98  = Base.vect(%97)::Vector{Int64}
β”‚    %99  = (%91)(%92, %98)::Matrix{Int64}
β”‚    %100 = GP@_23::WindFarm
β”‚           Base.setproperty!(%100, :StartI, %99)
β”‚    %102 = FLORIDyn.vcat::Core.Const(vcat)
β”‚    %103 = Base.getproperty(wf, :posBase)::Matrix{Float64}
β”‚    %104 = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %105 = (%104)(1, 3)::Matrix{Float64}
β”‚    %106 = (%102)(%103, %105)::Matrix{Float64}
β”‚    %107 = GP@_23::WindFarm
β”‚           Base.setproperty!(%107, :posBase, %106)
β”‚    %109 = FLORIDyn.vcat::Core.Const(vcat)
β”‚    %110 = Base.getproperty(wf, :posNac)::Matrix{Float64}
β”‚    %111 = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %112 = (%111)(1, 3)::Matrix{Float64}
β”‚    %113 = (%109)(%110, %112)::Matrix{Float64}
β”‚    %114 = GP@_23::WindFarm
β”‚           Base.setproperty!(%114, :posNac, %113)
β”‚    %116 = FLORIDyn.vcat::Core.Const(vcat)
β”‚    %117 = Base.getproperty(wf, :States_T)::Matrix{Float64}
β”‚    %118 = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %119 = FLORIDyn.size::Core.Const(size)
β”‚    %120 = Base.getproperty(wf, :States_T)::Matrix{Float64}
β”‚    %121 = (%119)(%120, 2)::Int64
β”‚    %122 = (%118)(1, %121)::Matrix{Float64}
β”‚    %123 = (%116)(%117, %122)::Matrix{Float64}
β”‚    %124 = GP@_23::WindFarm
β”‚           Base.setproperty!(%124, :States_T, %123)
β”‚    %126 = FLORIDyn.vcat::Core.Const(vcat)
β”‚    %127 = Base.getproperty(wf, :D)::Vector{Float64}
β”‚    %128 = Base.vect(0.0)::Vector{Float64}
β”‚    %129 = (%126)(%127, %128)::Vector{Float64}
β”‚    %130 = GP@_23::WindFarm
β”‚           Base.setproperty!(%130, :D, %129)
β”‚    %132 = FLORIDyn.hasfield::Core.Const(hasfield)
β”‚    %133 = FLORIDyn.typeof::Core.Const(typeof)
β”‚    %134 = (%133)(wf)::Core.Const(WindFarm)
β”‚    %135 = (%132)(%134, :intOPs)::Core.Const(true)
└───        goto #7 if not %135
6 ── %137 = FLORIDyn.deepcopy::Core.Const(deepcopy)
β”‚    %138 = Base.getproperty(wf, :intOPs)::Vector{Matrix{Float64}}
β”‚    %139 = (%137)(%138)::Vector{Matrix{Float64}}
β”‚    %140 = GP@_23::WindFarm
└───        Base.setproperty!(%140, :intOPs, %139)
7 ┄─ %142 = FLORIDyn.:(var"#70#72")::Core.Const(FLORIDyn.var"#70#72")
β”‚    %143 = GP@_23::WindFarm
β”‚    %144 = Core.typeof(%143)::Core.Const(WindFarm)
β”‚    %145 = Core.apply_type(%142, %144)::Core.Const(FLORIDyn.var"#70#72"{WindFarm})
β”‚    %146 = GP@_23::WindFarm
β”‚           (#70 = %new(%145, %146))
β”‚    %148 = #70::FLORIDyn.var"#70#72"{WindFarm}
β”‚    %149 = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %150 = GP@_23::WindFarm
β”‚    %151 = Base.getproperty(%150, :nT)::Int64
β”‚    %152 = (%149)(1, %151)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚    %153 = Base.Generator(%148, %152)::Core.PartialStruct(Base.Generator{UnitRange{Int64}, FLORIDyn.var"#70#72"{WindFarm}}, Any[FLORIDyn.var"#70#72"{WindFarm}, Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])])
β”‚    %154 = Base.collect(%153)::Vector{Matrix{Float64}}
β”‚    %155 = GP@_23::WindFarm
β”‚           Base.setproperty!(%155, :intOPs, %154)
β”‚    %157 = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %158 = Base.getproperty(wf, :nOP)::Int64
β”‚           (dist_buffer = (%157)(%158))
β”‚    %160 = FLORIDyn.zeros::Core.Const(zeros)
β”‚    %161 = FLORIDyn.Int::Core.Const(Int64)
β”‚    %162 = Base.getproperty(wf, :nOP)::Int64
β”‚           (sorted_indices_buffer = (%160)(%161, %162))
β”‚    %164 = (:dist_buffer, :sorted_indices_buffer)::Core.Const((:dist_buffer, :sorted_indices_buffer))
β”‚    %165 = Core.apply_type(Core.NamedTuple, %164)::Core.Const(NamedTuple{(:dist_buffer, :sorted_indices_buffer)})
β”‚    %166 = dist_buffer::Vector{Float64}
β”‚    %167 = sorted_indices_buffer::Vector{Int64}
β”‚    %168 = Core.tuple(%166, %167)::Tuple{Vector{Float64}, Vector{Int64}}
β”‚    %169 = (%165)(%168)::@NamedTuple{dist_buffer::Vector{Float64}, sorted_indices_buffer::Vector{Int64}}
β”‚    %170 = thread_comp_buffers::Vector{NamedTuple}
β”‚    %171 = tid@_20::Int64
β”‚           Base.setindex!(%170, %169, %171)
β”‚           (@_11 = Base.iterate(%31, %42))
β”‚    %174 = @_11::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %175 = (%174 === nothing)::Bool
β”‚    %176 = Base.not_int(%175)::Bool
└───        goto #9 if not %176
8 ──        goto #2
9 ┄─ %179 = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %180 = FLORIDyn.length::Core.Const(length)
β”‚    %181 = (%180)(mx)::Int64
β”‚    %182 = (%179)(1, %181)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚           (@_10 = Base.iterate(%182))
β”‚    %184 = @_10::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %185 = (%184 === nothing)::Bool
β”‚    %186 = Base.not_int(%185)::Bool
└───        goto #15 if not %186
10 β”„        Core.NewvarNode(:(@_25))
β”‚           Core.NewvarNode(:(@_26))
β”‚           Core.NewvarNode(:(cl))
β”‚           Core.NewvarNode(:(rw))
β”‚           Core.NewvarNode(:(gridPointResult))
β”‚           Core.NewvarNode(:(tmpM))
β”‚    %194 = @_10::Tuple{Int64, Int64}
β”‚           (iGP = Core.getfield(%194, 1))
β”‚    %196 = Core.getfield(%194, 2)::Int64
β”‚    %197 = FLORIDyn.threadid::Core.Const(Base.Threads.threadid)
β”‚           (tid@_37 = (%197)())
β”‚    %199 = thread_buffers::Vector{WindFarm}
β”‚    %200 = tid@_37::Int64
β”‚           (GP@_36 = Base.getindex(%199, %200))
β”‚    %202 = thread_comp_buffers::Vector{NamedTuple}
β”‚    %203 = tid@_37::Int64
β”‚           (buffers = Base.getindex(%202, %203))
β”‚    %205 = iGP::Int64
β”‚           (xGP = Base.getindex(mx, %205))
β”‚    %207 = iGP::Int64
β”‚           (yGP = Base.getindex(my, %207))
β”‚    %209 = GP@_36::WindFarm
β”‚    %210 = Base.getproperty(%209, :posBase)::Matrix{Float64}
β”‚    %211 = xGP::Float64
β”‚    %212 = Base.lastindex(%210, 1)::Int64
β”‚           Base.setindex!(%210, %211, %212, 1)
β”‚    %214 = GP@_36::WindFarm
β”‚    %215 = Base.getproperty(%214, :posBase)::Matrix{Float64}
β”‚    %216 = yGP::Float64
β”‚    %217 = Base.lastindex(%215, 1)::Int64
β”‚           Base.setindex!(%215, %216, %217, 2)
β”‚    %219 = GP@_36::WindFarm
β”‚    %220 = Base.getproperty(%219, :posBase)::Matrix{Float64}
β”‚    %221 = Base.lastindex(%220, 1)::Int64
β”‚           Base.setindex!(%220, 0.0, %221, 3)
β”‚    %223 = GP@_36::WindFarm
β”‚    %224 = Base.getproperty(%223, :posNac)::Matrix{Float64}
β”‚    %225 = Base.lastindex(%224, 1)::Int64
β”‚           Base.setindex!(%224, 0.0, %225, 1)
β”‚    %227 = GP@_36::WindFarm
β”‚    %228 = Base.getproperty(%227, :posNac)::Matrix{Float64}
β”‚    %229 = Base.lastindex(%228, 1)::Int64
β”‚           Base.setindex!(%228, 0.0, %229, 2)
β”‚    %231 = GP@_36::WindFarm
β”‚    %232 = Base.getproperty(%231, :posNac)::Matrix{Float64}
β”‚    %233 = Base.lastindex(%232, 1)::Int64
β”‚           Base.setindex!(%232, zh, %233, 3)
β”‚    %235 = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %236 = FLORIDyn.size::Core.Const(size)
β”‚    %237 = GP@_36::WindFarm
β”‚    %238 = Base.getproperty(%237, :States_T)::Matrix{Float64}
β”‚    %239 = (%236)(%238, 2)::Int64
β”‚    %240 = (%235)(1, %239)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚           (@_27 = Base.iterate(%240))
β”‚    %242 = @_27::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %243 = (%242 === nothing)::Bool
β”‚    %244 = Base.not_int(%243)::Bool
└───        goto #13 if not %244
11 β”„ %246 = @_27::Tuple{Int64, Int64}
β”‚           (j = Core.getfield(%246, 1))
β”‚    %248 = Core.getfield(%246, 2)::Int64
β”‚    %249 = GP@_36::WindFarm
β”‚    %250 = Base.getproperty(%249, :States_T)::Matrix{Float64}
β”‚    %251 = Base.lastindex(%250, 1)::Int64
β”‚    %252 = j::Int64
β”‚           Base.setindex!(%250, 0.0, %251, %252)
β”‚           (@_27 = Base.iterate(%240, %248))
β”‚    %255 = @_27::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %256 = (%255 === nothing)::Bool
β”‚    %257 = Base.not_int(%256)::Bool
└───        goto #13 if not %257
12 ─        goto #11
13 β”„ %260 = FLORIDyn.interpolateOPs!::Core.Const(FLORIDyn.interpolateOPs!)
β”‚    %261 = GP@_36::WindFarm
β”‚    %262 = Base.getproperty(%261, :intOPs)::Vector{Matrix{Float64}}
β”‚    %263 = GP@_36::WindFarm
β”‚    %264 = buffers::NamedTuple
β”‚    %265 = Base.getproperty(%264, :dist_buffer)::Any
β”‚    %266 = buffers::NamedTuple
β”‚    %267 = Base.getproperty(%266, :sorted_indices_buffer)::Any
β”‚           (%260)(%262, %263, %265, %267)
β”‚    %269 = GP@_36::WindFarm
β”‚    %270 = FLORIDyn.setUpTmpWFAndRun(set, %269, floris, wind)::Tuple{Matrix{Float64}, WindFarm}
β”‚    %271 = Base.indexed_iterate(%270, 1)::Core.PartialStruct(Tuple{Matrix{Float64}, Int64}, Any[Matrix{Float64}, Core.Const(2)])
β”‚           (tmpM = Core.getfield(%271, 1))
β”‚           (@_26 = Core.getfield(%271, 2))
β”‚    %274 = @_26::Core.Const(2)
β”‚    %275 = Base.indexed_iterate(%270, 2, %274)::Core.PartialStruct(Tuple{WindFarm, Int64}, Any[WindFarm, Core.Const(3)])
β”‚           Core.getfield(%275, 1)
β”‚    %277 = tmpM::Matrix{Float64}
β”‚    %278 = tmpM::Matrix{Float64}
β”‚    %279 = (lastindex)(%278, 1)::Int64
β”‚    %280 = FLORIDyn.:(:)::Core.Const(Colon())
β”‚           (gridPointResult = (Base.maybeview)(%277, %279, %280))
β”‚    %282 = FLORIDyn.divrem::Core.Const(divrem)
β”‚    %283 = FLORIDyn.:-::Core.Const(-)
β”‚    %284 = iGP::Int64
β”‚    %285 = (%283)(%284, 1)::Int64
β”‚    %286 = size_mx::Tuple{Int64, Int64}
β”‚    %287 = Base.getindex(%286, 1)::Int64
β”‚    %288 = (%282)(%285, %287)::Tuple{Int64, Int64}
β”‚    %289 = Base.indexed_iterate(%288, 1)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(2)])
β”‚           (rw = Core.getfield(%289, 1))
β”‚           (@_25 = Core.getfield(%289, 2))
β”‚    %292 = @_25::Core.Const(2)
β”‚    %293 = Base.indexed_iterate(%288, 2, %292)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(3)])
β”‚           (cl = Core.getfield(%293, 1))
β”‚    %295 = FLORIDyn.:+::Core.Const(+)
β”‚    %296 = rw::Int64
β”‚           (rw = (%295)(%296, 1))
β”‚    %298 = FLORIDyn.:+::Core.Const(+)
β”‚    %299 = cl::Int64
β”‚           (cl = (%298)(%299, 1))
β”‚    %301 = mz::Array{Float64, 3}
β”‚    %302 = rw::Int64
β”‚    %303 = cl::Int64
β”‚    %304 = FLORIDyn.:(:)::Core.Const(Colon())
β”‚    %305 = (%304)(1, 3)::Core.Const(1:3)
β”‚    %306 = Base.dotview(%301, %302, %303, %305)::Core.PartialStruct(SubArray{Float64, 1, Array{Float64, 3}, Tuple{Int64, Int64, UnitRange{Int64}}, true}, Any[Array{Float64, 3}, Core.PartialStruct(Tuple{Int64, Int64, UnitRange{Int64}}, Any[Int64, Int64, Core.Const(1:3)]), Int64, Int64])
β”‚    %307 = gridPointResult::SubArray{Float64, 1, Matrix{Float64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}
β”‚    %308 = Base.broadcasted(Base.identity, %307)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(identity), Tuple{SubArray{Float64, 1, Matrix{Float64}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}}}
β”‚           Base.materialize!(%306, %308)
β”‚           (@_10 = Base.iterate(%182, %196))
β”‚    %311 = @_10::Union{Nothing, Tuple{Int64, Int64}}
β”‚    %312 = (%311 === nothing)::Bool
β”‚    %313 = Base.not_int(%312)::Bool
└───        goto #15 if not %313
14 ─        goto #10
15 β”„ %316 = mz::Array{Float64, 3}
└───        return %316

In red:
β”‚ %264 = buffers::NamedTuple
β”‚ %265 = Base.getproperty(%264, :dist_buffer)::Any
β”‚ %266 = buffers::NamedTuple
β”‚ %267 = Base.getproperty(%266, :sorted_indices_buffer)::Any
β”‚ (%260)(%262, %263, %265, %267)

seems not bad, if you’re on 1.12, you may need to add a thread this works :

mutable struct WindFarm
    nT ::Int64
    StartI ::Matrix{Int64}
    posBase ::Matrix{Float64}
    posNac :: Matrix{Float64}
    States_T ::Matrix{Float64}
    D ::Vector{Float64}
    dep ::Vector{Vector{Int64}}
    intOPs ::Vector{Matrix{Float64}}
    nOP ::Int64
end

mx = zeros(10, 10)
my = zeros(10, 10)
nM = 10
wf = WindFarm(1, zeros(1, 3), zeros(1, 3), zeros(1, 3), zeros(1, 3), [1.0], [Int64[]], [zeros(1,3)],3)
zh = 1.0
using Base.Threads

function getMeasurementsP(mx, my, nM, zh, wf::WindFarm)
    size_mx = size(mx)
    mz = zeros(size_mx[1], size_mx[2], nM)
    nth = nthreads() + 1
    
    # Create thread-local buffers to avoid race conditions
    thread_buffers = [deepcopy(wf) for _ in 1:nth]
    
    # Pre-allocate arrays for each thread buffer
    original_nT = wf.nT
    # Pre-allocate computation buffers for each thread
    thread_comp_buffers = Vector{NamedTuple}(undef, nth)
    
    for tid in 1:nth
        GP = thread_buffers[tid]
        GP.nT = original_nT + 1  # Original turbines + 1 grid point
        
        # Pre-allocate dependency structure
        GP.dep = Vector{Vector{Int64}}(undef, GP.nT)
        for i in 1:original_nT
            GP.dep[i] = Int64[]  # Original turbines are independent (no dependencies)
        end
        GP.dep[end] = collect(1:original_nT)  # Grid point depends on all original turbines
        
        # Pre-allocate extended arrays
        GP.StartI = hcat(wf.StartI, [wf.StartI[end] + 1])
        GP.posBase = vcat(wf.posBase, zeros(1, 3))  # Will be updated for each grid point
        GP.posNac = vcat(wf.posNac, zeros(1, 3))   # Will be updated for each grid point
        GP.States_T = vcat(wf.States_T, zeros(1, size(wf.States_T, 2)))
        GP.D = vcat(wf.D, [0.0])  # Grid point has 0 diameter
        
        # Ensure all necessary fields are copied to avoid shared references
        if hasfield(typeof(wf), :intOPs)
            GP.intOPs = deepcopy(wf.intOPs)
        end
        
        # Pre-allocate buffers for non-allocating interpolateOPs!
        GP.intOPs = [zeros(length(GP.dep[iT]), 4) for iT in 1:GP.nT]
        dist_buffer = zeros(wf.nOP)
        sorted_indices_buffer = zeros(Int, wf.nOP)
        
        thread_comp_buffers[tid] = (
            dist_buffer = dist_buffer,
            sorted_indices_buffer = sorted_indices_buffer
        )
    end
    
    # Parallel loop using @threads
    # GC.enable(false)
    @threads :static for iGP in 1:length(mx)
        # Get thread-local buffers
        tid = threadid()
        GP = thread_buffers[tid]
        buffers = thread_comp_buffers[tid]
        
        xGP = mx[iGP]
        yGP = my[iGP]
        
        # Thread-safe updates: create copies to avoid modifying shared arrays
        GP.posBase[end, 1] = xGP
        GP.posBase[end, 2] = yGP
        GP.posBase[end, 3] = 0.0
        
        GP.posNac[end, 1] = 0.0
        GP.posNac[end, 2] = 0.0
        GP.posNac[end, 3] = zh
        
        # Reset the grid point state (thread-safe element-wise assignment)
        for j in 1:size(GP.States_T, 2)
            GP.States_T[end, j] = 0.0
        end
        
        # Recalculate interpolated OPs for the updated geometry (non-allocating)
        # interpolateOPs!(GP.intOPs, GP, buffers.dist_buffer, buffers.sorted_indices_buffer)
        # tmpM, _ = setUpTmpWFAndRun(set, GP, floris, wind)
        
        # Extract only the result for the grid point (last "turbine")
         #tmpM[end, :]
        
        # Convert linear index to subscripts (thread-safe)
        rw, cl = divrem(iGP - 1, size_mx[1])
        rw += 1
        cl += 1
        gridPointResult = 0.0
        
        # Thread-safe assignment using @views to avoid race conditions
        @views mz[rw, cl, 1:3] .= gridPointResult
    end
    # GC.enable(true)
    return mz
end

getMeasurementsP(mx, my, nM, zh, wf)

try to add nth = nthreads() + 1 and use that everywhere you did nthreads

can you show us the structs though just to make sure

Fixing the type instability also fixed the problem of crashing with GC active. It also improved the speed.

So for now, on Julia 1.11 all works fine. Still need to test on Julia 1.12.

EDIT: While it works on my desktop, it still crashes on my laptop.

I found the reason: I am using ControlPlots.jl, which is based on PyPlot.jl for live plotting. When the GC collects a Python object from another thread than the main thread, Julia crashes.

Workaround: Disable the GC during the multi-threaded operations. Not nice, because it reduces the performance a lot.

So I need to look for alternatives, either using another plotting library, or (more likely) to do plotting in a separate process.

1 Like