CUDA performing scalar indexing when used along with Distributed

Hello @eldee ,

Thanks a lot for the suggestions! Finally I got to a solution that works fine for my needs: using a struct for running with the “fixed params”. I made a MWE that is running fine:

@everywhere begin
    struct my_dummy_struct
        mat_one
        mat_two
    end
    
    function initialize_dummy_struct(mat_one, mat_two)
        return my_dummy_struct(mat_one, mat_two)
    end

    # Define function working with mat_one and mat_two as "fixed inputs"
    function (m::my_dummy_struct)(x)
        # Extract inputs from x
        variable1 = x[1]
        variable2 = x[2]

        a = sum(variable1 .* (m.mat_one * m.mat_two))
        b = mean(variable2 .* (m.mat_one))
        return a, b
    end
end

# Define mat_one and mat_two
mat_one = CUDA.ones(2,2)
mat_two = CuArray([2.5 3.0; 2.7 4.5])

# Initialize the struct on the main process
pmap_dummy_struct = initialize_dummy_struct(mat_one, mat_two)

# Make struct available on other workers
global my_pmap_dummy_struct = pmap_dummy_struct

variable1_vec = [1; 2; 2.5]
variable2_vec = [3; 2; 3.5]

pmap(x -> my_pmap_dummy_struct(x), zip(variable1_vec, variable2_vec))
#=
3-element Vector{Tuple{Float64, Float64}}:
 (25.4, 3.0)
 (50.8, 2.0)
 (63.5, 3.5)
=#

Many of the points you asked the answer is just that I am really bad at coding! (and it was great to receive the comments so that I learnt some things, thanks for that also!)

1 Like