@spawnat and remotecall equivalence?

Hi,
I try to understand the distributed parallel computing Julia’s API and I wonder if I always can use remotecall in place of @spawnat.

In the following MWE, I initialize a vector on np procs and I modify these vectors with a long function before displaying the result on the master process.

It works with @spawnat but not with remotecall. I suspect that is is due to the implicit caching of fetched futures. would it be possible to use remotecall inside the test1D_wait function ?

Here is the MWE:

using Distributed

function test1D(n::Int)
   # Create n-elements array on each procs
   f=[]
   for i=1:nprocs()
      push!(f,remotecall(zeros,i,n))
   end
   # add Float(procIndex) to each array elements on every procs
   for i=1:nprocs()
      @spawnat i fetch(f[i]).+=Float64(i)
   end
   #recover and display the np arrays
   #N.B. there is no named future for the previous addiction tasks
   # so one cannot be sure that the operations are completed !
   for i=1:nprocs()
      @show i,fetch(f[i])
   end
end

function test1D_nowait(n::Int)
   # Create n-elements array on each procs
   f=[]
   for i=1:nprocs()
      push!(f,remotecall(zeros,i,n))
   end

   for i=1:nprocs()
      @spawnat i longfunction!(fetch(f[i]))
   end

   #recover and display the modified np arrays
   #N.B. there is no named furture for the previous addiction tasks
   # so one cannot be sure that the operations are completed !

   for i=1:nprocs()
      @show i,fetch(f[i])
   end

end

function test1D_wait(n::Int)

   # Create n-elements array on each procs
   f=[]
   for i=1:nprocs()
      push!(f,remotecall(zeros,i,10))
   end

   fadd=Vector{Future}()
   for i=1:nprocs()
      #Question: is it possible to use remote_call in place of @spawnat
      push!(fadd,@spawnat i longfunction!(fetch(f[i])))
   end
   #Wait for all long functions to finish
   @time begin
      for i=1:nprocs()
         wait(fadd[i])
      end
   end
   #recover and display the modified np arrays
   for i=1:nprocs()
      @show i,fetch(f[i])
   end


end

Distributed.addprocs(3)

#define an artificially long function applied to each vector
@everywhere function longfunction!(a)
   sleep(0.1)
   for i=1:length(a)
      if i%2==0
         a[i]+=1.0
      end
   end
end

test1D(10)
test1D_nowait(10)
test1D_wait(10)
1 Like

This seems to work:

function test1D_wait(n::Int)

    # Create n-elements array on each procs
    f=[]
    for i=1:nprocs()
        push!(f,remotecall(zeros,i,10))
    end

    fadd=Vector{Future}()
    for i=1:nprocs()
        # Question: is it possible to use remote_call in place of @spawnat?

        # Version 1: does not work
        # fut = remotecall(longfunction!, i, fetch(f[i]))

        # Version 2: works
        fut = remotecall(i) do
            longfunction!(fetch(f[i]))
        end
        push!(fadd, fut)
    end

    #Wait for all long functions to finish
    @time begin
        for i=1:nprocs()
            wait(fadd[i])
        end
    end

    #recover and display the modified np arrays
    for i=1:nprocs()
        @show i,fetch(f[i])
    end
end
julia> test1D_wait(10)
  0.109823 seconds (222 allocations: 7.594 KiB)
(i, fetch(f[i])) = (1, [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])
(i, fetch(f[i])) = (2, [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])
(i, fetch(f[i])) = (3, [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])
(i, fetch(f[i])) = (4, [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])

My interpretation (and I’d love if someone more experienced could confirm this):

  • in Version 1 above (commented out), fetch(f[i]) is evaluated early (and on the master process) : since it is used as argument to remotecall, it has to be evaluated even before remotecall begins executing. When results are finally printed out, the second call to fetch(f[i]) probably merely recalls a cache result.

  • in Version 2 above (the working one), remotecall is passed an anonymous function which closes over the future itself. fetch(f[i]) is only evaluated much later, directly on process i. When results are printed out in the end, the second call to fetch(f[i]) is actually the first to run on the master process. The new value is fetched from process i and correctly displayed.

WARNING: as I said above, I’m no expert at all on these questions, and it might very well be that my answer or my explanations are incorrect.

1 Like

Thanx !