One-sided MPI with Subarrays

I’m having some issues getting a one-sided subarray MPI.Put to work. I’m trying to do a 2d halo exchange with an arbitrary number of halo cells. I’d like to move the jlo_edge subarray on processor 1 to the jhi_halo_edge location on processor 0 (see the listing below)

proc: 1
j: 10   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 9    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 8    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 7    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 6    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 5    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 4    [1.0, 1.0, {1.0, 1.0, 1.0, 1.0}, 1.0, 1.0]
j: 3    [1.0, 1.0, {1.0, 1.0, 1.0, 1.0}, 1.0, 1.0]
j: 2    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 1    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

to

proc: 0
j: 10   [0.0, 0.0, {1.0, 1.0, 1.0, 1.0}, 0.0, 0.0]
j: 9    [0.0, 0.0, {1.0, 1.0, 1.0, 1.0}, 0.0, 0.0]
j: 8    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 7    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 6    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 5    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 4    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 3    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

The MWE is listed here:

using MPI
MPI.Init()

const comm = MPI.COMM_WORLD
const nprocs = MPI.Comm_size(comm)
const me = MPI.Comm_rank(comm)

function print_array(U)
    for proc in 0:nprocs
        if me == proc
            println()
            println("proc: ", proc)
            for j in size(U, 2):-1:1
                println("j: ", j, "\t", U[:,j])
            end
            istr = join(collect(1:size(U, 1)), "    ")
            println("\t  "*"-"^length(istr))
            println("i :\t  ", istr)
            println()
        end
        MPI.Barrier(comm)
    end
end

ni = 8
nj = 10
nhalo = 2
U = zeros(ni, nj)
U .= me

println("Before")
print_array(U)

win = MPI.Win_create(U, comm)

jlo_edge = @view U[nhalo+1:end-nhalo, nhalo:nhalo + nhalo - 1]
jlo_buf = MPI.Buffer(jlo_edge)
jhi_halo_edge = @view U[nhalo+1:end-nhalo, end-nhalo:end]

target_proc = 0
count = length(jlo_edge)
target_disp = LinearIndices(U)[nhalo+1, end-nhalo+1] - 1

# Halo exchange
MPI.Win_fence(0, win)

# This works, but does not put the subarray like I expect it should
MPI.Put(jlo_edge, count, target_proc, target_disp, win)

# This results in an error
# MPI.Put(jlo_buf, count, target_proc, target_disp, win)

MPI.Win_fence(0, win)

println("After")
print_array(U)

MPI.Finalize()

which gives me

proc: 0
j: 10   [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 9    [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 8    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 7    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 6    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 5    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 4    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 3    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

This makes it look as if the subarray is just made into a 1D vector. I don’t want to include the corner halo region. If I try to use the Buffer type and un-comment the line MPI.Put(jlo_buf, count, target_proc, target_disp, win), I get the error:

ERROR: LoadError: MethodError: Cannot `convert` an object of type MPI.Buffer{Matrix{Float64}} to an object of type MPI.MPIPtr

What is the correct way to do this?

What version of MPI.jl are you using? I recently changed how MPI.Put works (https://github.com/JuliaParallel/MPI.jl/pull/467).

If you are using the latest version, then count is no longer part of the API:
https://juliaparallel.github.io/MPI.jl/stable/onesided/#MPI.Put
(it is determined automatically from the length of the SubArray)

Apologies for such a late response! This was a side project that got put on the back-burner. Updated MPI.jl did indeed fix the issue. Thanks for your help and work on MPI.jl!

1 Like

The essential functionality is done by the following function, which uses Get and Put

function sync_2d_edges(A::AbstractArray, nhalo::Int)
    # Get the start/end indices for subarray/view extraction of the halo regions
    ndims = length(size(A))
    ilo_halo_start, ilo_halo_end, ilo_dom_start, _ = lo_indices(nhalo)
    jlo_halo_start, jlo_halo_end, jlo_dom_start, _ = lo_indices(nhalo)
    _, ihi_dom_end, ihi_halo_start, _ = hi_indices(A, ndims - 1, nhalo)
    _, jhi_dom_end, jhi_halo_start, _ = hi_indices(A, ndims, nhalo)

    # Create the halo region views
    ilo_edge = @view A[..,ilo_dom_start:ilo_dom_start + nhalo - 1, jlo_dom_start:jhi_dom_end]
    jlo_edge = @view A[..,ilo_dom_start:ihi_dom_end, jlo_dom_start:jlo_dom_start + nhalo - 1]
    ilo_halo_edge = @view A[..,ilo_halo_start:ilo_halo_end, jlo_dom_start:jhi_dom_end]
    jlo_halo_edge = @view A[..,ilo_dom_start:ihi_dom_end, jlo_halo_start:jlo_halo_end]

    # Define the positions w/in the window for Get/Put operations
    ilo_edge_pos = LinearIndices(A)[..,ilo_dom_start, jlo_dom_start]
    jlo_edge_pos = LinearIndices(A)[..,ilo_dom_start, jlo_dom_start]
    ihi_halo_pos = LinearIndices(A)[..,ihi_halo_start, jlo_dom_start]
    jhi_halo_pos = LinearIndices(A)[..,ilo_dom_start, jhi_halo_start]

    # Create the MPI subarray buffers for transfer
    ilo_buf = MPI.Buffer(ilo_edge)
    jlo_buf = MPI.Buffer(jlo_edge)
    ilo_halo_buf = MPI.Buffer(ilo_halo_edge)
    jlo_halo_buf = MPI.Buffer(jlo_halo_edge)

    # Calculate the offset to move the buffers
    ilo_to_ihi_halo = ihi_halo_pos - ilo_edge_pos
    jlo_to_jhi_halo = jhi_halo_pos - jlo_edge_pos
    ihi_halo_to_ilo = ihi_halo_pos - ilo_edge_pos
    jhi_halo_to_jlo = jhi_halo_pos - jlo_edge_pos

    # Halo exchange
    ilo_neighbor, jlo_neighbor, _ = global_domain().neighbors[1,:]
    ihi_neighbor, jhi_neighbor, _ = global_domain().neighbors[2,:]

    win = MPI.Win_create(A, global_domain().comm)
    MPI.Win_fence(0, win)

    MPI.Get(ilo_halo_buf, ilo_neighbor, ihi_halo_to_ilo, win) # ilo halo update
    MPI.Get(jlo_halo_buf, jlo_neighbor, jhi_halo_to_jlo, win) # jlo halo update
    MPI.Put(jlo_buf,      jlo_neighbor, jlo_to_jhi_halo, win) # jhi halo update
    MPI.Put(ilo_buf,      ilo_neighbor, ilo_to_ihi_halo, win) # ihi halo update
    
    MPI.Win_fence(0, win)

    return nothing
end