One-sided MPI with Subarrays

I’m having some issues getting a one-sided subarray MPI.Put to work. I’m trying to do a 2d halo exchange with an arbitrary number of halo cells. I’d like to move the jlo_edge subarray on processor 1 to the jhi_halo_edge location on processor 0 (see the listing below)

proc: 1
j: 10   [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 9    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 8    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 7    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 6    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 5    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 4    [1.0, 1.0, {1.0, 1.0, 1.0, 1.0}, 1.0, 1.0]
j: 3    [1.0, 1.0, {1.0, 1.0, 1.0, 1.0}, 1.0, 1.0]
j: 2    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 1    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

to

proc: 0
j: 10   [0.0, 0.0, {1.0, 1.0, 1.0, 1.0}, 0.0, 0.0]
j: 9    [0.0, 0.0, {1.0, 1.0, 1.0, 1.0}, 0.0, 0.0]
j: 8    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 7    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 6    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 5    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 4    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 3    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

The MWE is listed here:

using MPI
MPI.Init()

const comm = MPI.COMM_WORLD
const nprocs = MPI.Comm_size(comm)
const me = MPI.Comm_rank(comm)

function print_array(U)
    for proc in 0:nprocs
        if me == proc
            println()
            println("proc: ", proc)
            for j in size(U, 2):-1:1
                println("j: ", j, "\t", U[:,j])
            end
            istr = join(collect(1:size(U, 1)), "    ")
            println("\t  "*"-"^length(istr))
            println("i :\t  ", istr)
            println()
        end
        MPI.Barrier(comm)
    end
end

ni = 8
nj = 10
nhalo = 2
U = zeros(ni, nj)
U .= me

println("Before")
print_array(U)

win = MPI.Win_create(U, comm)

jlo_edge = @view U[nhalo+1:end-nhalo, nhalo:nhalo + nhalo - 1]
jlo_buf = MPI.Buffer(jlo_edge)
jhi_halo_edge = @view U[nhalo+1:end-nhalo, end-nhalo:end]

target_proc = 0
count = length(jlo_edge)
target_disp = LinearIndices(U)[nhalo+1, end-nhalo+1] - 1

# Halo exchange
MPI.Win_fence(0, win)

# This works, but does not put the subarray like I expect it should
MPI.Put(jlo_edge, count, target_proc, target_disp, win)

# This results in an error
# MPI.Put(jlo_buf, count, target_proc, target_disp, win)

MPI.Win_fence(0, win)

println("After")
print_array(U)

MPI.Finalize()

which gives me

proc: 0
j: 10   [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 9    [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
j: 8    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 7    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 6    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 5    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 4    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 3    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 2    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
j: 1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
          ------------------------------------
i :       1    2    3    4    5    6    7    8

This makes it look as if the subarray is just made into a 1D vector. I don’t want to include the corner halo region. If I try to use the Buffer type and un-comment the line MPI.Put(jlo_buf, count, target_proc, target_disp, win), I get the error:

ERROR: LoadError: MethodError: Cannot `convert` an object of type MPI.Buffer{Matrix{Float64}} to an object of type MPI.MPIPtr

What is the correct way to do this?

What version of MPI.jl are you using? I recently changed how MPI.Put works (https://github.com/JuliaParallel/MPI.jl/pull/467).

If you are using the latest version, then count is no longer part of the API:
https://juliaparallel.github.io/MPI.jl/stable/onesided/#MPI.Put
(it is determined automatically from the length of the SubArray)

Apologies for such a late response! This was a side project that got put on the back-burner. Updated MPI.jl did indeed fix the issue. Thanks for your help and work on MPI.jl!

The essential functionality is done by the following function, which uses Get and Put

function sync_2d_edges(A::AbstractArray, nhalo::Int)
    # Get the start/end indices for subarray/view extraction of the halo regions
    ndims = length(size(A))
    ilo_halo_start, ilo_halo_end, ilo_dom_start, _ = lo_indices(nhalo)
    jlo_halo_start, jlo_halo_end, jlo_dom_start, _ = lo_indices(nhalo)
    _, ihi_dom_end, ihi_halo_start, _ = hi_indices(A, ndims - 1, nhalo)
    _, jhi_dom_end, jhi_halo_start, _ = hi_indices(A, ndims, nhalo)

    # Create the halo region views
    ilo_edge = @view A[..,ilo_dom_start:ilo_dom_start + nhalo - 1, jlo_dom_start:jhi_dom_end]
    jlo_edge = @view A[..,ilo_dom_start:ihi_dom_end, jlo_dom_start:jlo_dom_start + nhalo - 1]
    ilo_halo_edge = @view A[..,ilo_halo_start:ilo_halo_end, jlo_dom_start:jhi_dom_end]
    jlo_halo_edge = @view A[..,ilo_dom_start:ihi_dom_end, jlo_halo_start:jlo_halo_end]

    # Define the positions w/in the window for Get/Put operations
    ilo_edge_pos = LinearIndices(A)[..,ilo_dom_start, jlo_dom_start]
    jlo_edge_pos = LinearIndices(A)[..,ilo_dom_start, jlo_dom_start]
    ihi_halo_pos = LinearIndices(A)[..,ihi_halo_start, jlo_dom_start]
    jhi_halo_pos = LinearIndices(A)[..,ilo_dom_start, jhi_halo_start]

    # Create the MPI subarray buffers for transfer
    ilo_buf = MPI.Buffer(ilo_edge)
    jlo_buf = MPI.Buffer(jlo_edge)
    ilo_halo_buf = MPI.Buffer(ilo_halo_edge)
    jlo_halo_buf = MPI.Buffer(jlo_halo_edge)

    # Calculate the offset to move the buffers
    ilo_to_ihi_halo = ihi_halo_pos - ilo_edge_pos
    jlo_to_jhi_halo = jhi_halo_pos - jlo_edge_pos
    ihi_halo_to_ilo = ihi_halo_pos - ilo_edge_pos
    jhi_halo_to_jlo = jhi_halo_pos - jlo_edge_pos

    # Halo exchange
    ilo_neighbor, jlo_neighbor, _ = global_domain().neighbors[1,:]
    ihi_neighbor, jhi_neighbor, _ = global_domain().neighbors[2,:]

    win = MPI.Win_create(A, global_domain().comm)
    MPI.Win_fence(0, win)

    MPI.Get(ilo_halo_buf, ilo_neighbor, ihi_halo_to_ilo, win) # ilo halo update
    MPI.Get(jlo_halo_buf, jlo_neighbor, jhi_halo_to_jlo, win) # jlo halo update
    MPI.Put(jlo_buf,      jlo_neighbor, jlo_to_jhi_halo, win) # jhi halo update
    MPI.Put(ilo_buf,      ilo_neighbor, ilo_to_ihi_halo, win) # ihi halo update
    
    MPI.Win_fence(0, win)

    return nothing
end