Type inference for parallel computing

I have a small program, whose prototype looks like:

@everywhere function _calculate(kpts::Matrix{Float64})
    return prod(kpts)
end

function calculate(nkmesh::Vector{Int64})
    nkpts = prod(nkmesh)
    kpts = zeros(3, nkpts)
    ik = 1
    for ikx in 1:nkmesh[1]
        for iky in 1:nkmesh[2]
            for ikz in 1:nkmesh[3]
                kpts[:, ik] = [(ikx-1)/nkmesh[1], (iky-1)/nkmesh[2], (ikz-1)/nkmesh[3]]
                ik += 1
            end
        end
    end
    wnkpts = floor(Int64, nkpts/nworkers())
    sigma = 0.0
    results = Vector{Future}()
    for iworker in 1:nworkers()
        if iworker < nworkers()
            r = @spawn _calculate(kpts[:, ((iworker-1)*wnkpts+1):(iworker)*wnkpts])
        else
            r = @spawn _calculate(kpts[:, ((nworkers()-1)*wnkpts+1):nkpts])
        end
        append!(results, [r])
    end
    for iworker in 1:nworkers()
        sigma += fetch(results[iworker])
    end
    return sigma
end

However, it seems type inference of calculate cannot be done by the compiler:

@code_warntype calculate([10, 10, 10])
Variables:
  #self# <optimized out>
  nkmesh::Array{Int64,1}
  ikz::Int64
  #temp#@_4::Int64
  iky::Int64
  #temp#@_6::Int64
  ikx::Int64
  #temp#@_8::Int64
  #83::##83#85{Array{Float64,2}}
  #84::##84#86{Int64,Array{Float64,2}}
  iworker@_11::Core.Box
  r::Future
  #temp#@_13::Int64
  iworker@_14::Int64
  #temp#@_15::Int64
  nkpts::Int64
  kpts::Array{Float64,2}
  ik::Int64
  wnkpts::Core.Box
  sigma::Any
  results::Array{Future,1}
  n@_22::Int64
  #temp#@_23::Int64
  n@_24::Int64
  #temp#@_25::Int64
  n@_26::Int64
  #temp#@_27::Int64
  n@_28::Int64
  #temp#@_29::Int64

Body:
  begin 
      wnkpts::Core.Box = $(Expr(:new, :(Core.Box)))
      NewvarNode(:(sigma::Any))
      NewvarNode(:(results::Array{Future,1}))
      nkpts::Int64 = $(Expr(:invoke, MethodInstance for _mapreduce(::Base.#identity, ::Base.#*, ::IndexLinear, ::Array{Int64,1}), :(Base._mapreduce), :(Base.identity), :(Base.*), :($(QuoteNode(IndexLinear()))), :(nkmesh))) # line 7:
      SSAValue(46) = nkpts::Int64
      $(Expr(:inbounds, false))
      # meta: location array.jl zeros 266
      # meta: location array.jl zeros 264
      # meta: location array.jl zeros 263
      SSAValue(19) = 3
      SSAValue(20) = SSAValue(46)
      # meta: pop location
      # meta: pop location
      # meta: pop location
      $(Expr(:inbounds, :pop))
      kpts::Array{Float64,2} = $(Expr(:invoke, MethodInstance for fill!(::Array{Float64,2}, ::Float64), :(Base.fill!), :($(Expr(:foreigncall, :(:jl_alloc_array_2d), Array{Float64,2}, svec(Any, Int64, Int64), Array{Float64,2}, 0, SSAValue(19), 0, SSAValue(20), 0))), :((Base.sitofp)(Float64, 0)::Float64))) # line 8:
      ik::Int64 = 1 # line 9:
      SSAValue(21) = (Base.arrayref)(nkmesh::Array{Int64,1}, 1)::Int64
      SSAValue(47) = (Base.select_value)((Base.sle_int)(1, SSAValue(21))::Bool, SSAValue(21), (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#@_8::Int64 = 1
      24: 
      unless (Base.not_int)((#temp#@_8::Int64 === (Base.add_int)(SSAValue(47), 1)::Int64)::Bool)::Bool goto 112
      SSAValue(48) = #temp#@_8::Int64
      SSAValue(49) = (Base.add_int)(#temp#@_8::Int64, 1)::Int64
      ikx::Int64 = SSAValue(48)
      #temp#@_8::Int64 = SSAValue(49) # line 10:
      SSAValue(22) = (Base.arrayref)(nkmesh::Array{Int64,1}, 2)::Int64
      SSAValue(50) = (Base.select_value)((Base.sle_int)(1, SSAValue(22))::Bool, SSAValue(22), (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#@_6::Int64 = 1
      34: 
      unless (Base.not_int)((#temp#@_6::Int64 === (Base.add_int)(SSAValue(50), 1)::Int64)::Bool)::Bool goto 110
      SSAValue(51) = #temp#@_6::Int64
      SSAValue(52) = (Base.add_int)(#temp#@_6::Int64, 1)::Int64
      iky::Int64 = SSAValue(51)
      #temp#@_6::Int64 = SSAValue(52) # line 11:
      SSAValue(23) = (Base.arrayref)(nkmesh::Array{Int64,1}, 3)::Int64
      SSAValue(53) = (Base.select_value)((Base.sle_int)(1, SSAValue(23))::Bool, SSAValue(23), (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#@_4::Int64 = 1
      44: 
      unless (Base.not_int)((#temp#@_4::Int64 === (Base.add_int)(SSAValue(53), 1)::Int64)::Bool)::Bool goto 108
      SSAValue(54) = #temp#@_4::Int64
      SSAValue(55) = (Base.add_int)(#temp#@_4::Int64, 1)::Int64
      ikz::Int64 = SSAValue(54)
      #temp#@_4::Int64 = SSAValue(55) # line 12:
      SSAValue(6) = $(Expr(:invoke, MethodInstance for vect(::Float64, ::Vararg{Float64,N} where N), :(Base.vect), :((Base.div_float)((Base.sitofp)(Float64, (Base.sub_int)(ikx, 1)::Int64)::Float64, (Base.sitofp)(Float64, (Base.arrayref)(nkmesh, 1)::Int64)::Float64)::Float64), :((Base.div_float)((Base.sitofp)(Float64, (Base.sub_int)(iky, 1)::Int64)::Float64, (Base.sitofp)(Float64, (Base.arrayref)(nkmesh, 2)::Int64)::Float64)::Float64), :((Base.div_float)((Base.sitofp)(Float64, (Base.sub_int)(ikz, 1)::Int64)::Float64, (Base.sitofp)(Float64, (Base.arrayref)(nkmesh, 3)::Int64)::Float64)::Float64)))
      # meta: location abstractarray.jl setindex! 968
      $(Expr(:inbounds, false))
      # meta: location indices.jl to_indices 213
      # meta: location abstractarray.jl indices 64
      SSAValue(27) = (Base.arraysize)(kpts::Array{Float64,2}, 1)::Int64
      (Base.arraysize)(kpts::Array{Float64,2}, 2)::Int64
      # meta: pop location
      # meta: location multidimensional.jl to_indices 469
      # meta: location multidimensional.jl uncolon 479
      # meta: location indices.jl Type 233
      # meta: location indices.jl Type 233
      # meta: location range.jl convert 764
      SSAValue(29) = (Base.select_value)((Base.slt_int)(SSAValue(27), 0)::Bool, 0, SSAValue(27))::Int64
      # meta: pop location
      # meta: pop location
      # meta: pop location
      # meta: pop location
      # meta: pop location
      # meta: pop location
      $(Expr(:inbounds, :pop))
      SSAValue(40) = $(Expr(:new, Base.Slice{Base.OneTo{Int64}}, :($(Expr(:new, Base.OneTo{Int64}, :((Base.select_value)((Base.slt_int)(SSAValue(29), 0)::Bool, 0, SSAValue(29))::Int64))))))
      SSAValue(41) = ik::Int64
      $(Expr(:inbounds, false))
      # meta: location multidimensional.jl _setindex! 537
      # meta: location multidimensional.jl # line 540:
      SSAValue(32) = (Core.tuple)(SSAValue(40), SSAValue(41))::Tuple{Base.Slice{Base.OneTo{Int64}},Int64}
      # meta: location abstractarray.jl checkbounds 362
      # meta: location abstractarray.jl checkbounds 342
      # meta: location abstractarray.jl indices 64
      (Base.arraysize)(kpts::Array{Float64,2}, 1)::Int64
      SSAValue(38) = (Base.arraysize)(kpts::Array{Float64,2}, 2)::Int64
      # meta: pop location
      # meta: location abstractarray.jl checkbounds_indices 389
      # meta: location abstractarray.jl checkbounds_indices 402
      SSAValue(39) = (Core.getfield)(SSAValue(32), 2)::Int64
      # meta: pop location
      # meta: pop location
      # meta: pop location
      SSAValue(33) = (Base.and_int)(true, (Base.and_int)((Base.sle_int)(1, SSAValue(39))::Bool, (Base.sle_int)(SSAValue(39), (Base.select_value)((Base.slt_int)(SSAValue(38), 0)::Bool, 0, SSAValue(38))::Int64)::Bool)::Bool)::Bool
      unless SSAValue(33) goto 94
      goto 96
      94: 
      $(Expr(:invoke, MethodInstance for throw_boundserror(::Array{Float64,2}, ::Tuple{Base.Slice{Base.OneTo{Int64}},Int64}), :(Base.throw_boundserror), :(kpts), SSAValue(32)))
      96: 
      # meta: pop location # line 541:
      $(Expr(:invoke, MethodInstance for _unsafe_setindex!(::IndexLinear, ::Array{Float64,2}, ::Array{Float64,1}, ::Base.Slice{Base.OneTo{Int64}}, ::Int64), :(Base._unsafe_setindex!), :($(QuoteNode(IndexLinear()))), :(kpts), SSAValue(6), SSAValue(40), SSAValue(41)))
      # meta: pop location
      # meta: pop location
      $(Expr(:inbounds, :pop))
      # meta: pop location # line 13:
      ik::Int64 = (Base.add_int)(ik::Int64, 1)::Int64
      106: 
      goto 44
      108: 
      goto 34
      110: 
      goto 24
      112:  # line 17:
      $(Expr(:inbounds, false))
      # meta: location distributed\cluster.jl nworkers 676
      n@_22::Int64 = $(Expr(:invoke, MethodInstance for nprocs(), :(Base.Distributed.nprocs))) # line 677:
      unless (n@_22::Int64 === 1)::Bool goto 121
      #temp#@_23::Int64 = 1
      goto 123
      121: 
      #temp#@_23::Int64 = (Base.sub_int)(n@_22::Int64, 1)::Int64
      123: 
      # meta: pop location
      $(Expr(:inbounds, :pop))
      SSAValue(7) = $(Expr(:invoke, MethodInstance for trunc(::Type{Int64}, ::Float64), :(Base.trunc), Int64, :((Base.floor_llvm)((Base.div_float)((Base.sitofp)(Float64, nkpts)::Float64, (Base.sitofp)(Float64, #temp#@_23)::Float64)::Float64)::Float64)))
      (Core.setfield!)(wnkpts::Core.Box, :contents, SSAValue(7))::Int64 # line 18:
      sigma::Any = 0.0 # line 19:
      results::Array{Future,1} = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Array{Future,1}, svec(Any, Int64), Array{Future,1}, 0, 0, 0)) # line 20:
      $(Expr(:inbounds, false))
      # meta: location distributed\cluster.jl nworkers 676
      n@_24::Int64 = $(Expr(:invoke, MethodInstance for nprocs(), :(Base.Distributed.nprocs))) # line 677:
      unless (n@_24::Int64 === 1)::Bool goto 140
      #temp#@_25::Int64 = 1
      goto 142
      140: 
      #temp#@_25::Int64 = (Base.sub_int)(n@_24::Int64, 1)::Int64
      142: 
      # meta: pop location
      $(Expr(:inbounds, :pop))
      SSAValue(56) = (Base.select_value)((Base.sle_int)(1, #temp#@_25::Int64)::Bool, #temp#@_25::Int64, (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#@_13::Int64 = 1
      147: 
      unless (Base.not_int)((#temp#@_13::Int64 === (Base.add_int)(SSAValue(56), 1)::Int64)::Bool)::Bool goto 196
      NewvarNode(:(#83::##83#85{Array{Float64,2}}))
      NewvarNode(:(#84::##84#86{Int64,Array{Float64,2}}))
      iworker@_11::Core.Box = $(Expr(:new, :(Core.Box)))
      NewvarNode(:(r::Future))
      SSAValue(57) = #temp#@_13::Int64
      SSAValue(58) = (Base.add_int)(#temp#@_13::Int64, 1)::Int64
      SSAValue(10) = SSAValue(57)
      (Core.setfield!)(iworker@_11::Core.Box, :contents, SSAValue(10))::Int64
      #temp#@_13::Int64 = SSAValue(58) # line 21:
      SSAValue(43) = (Core.getfield)(iworker@_11::Core.Box, :contents)::Any
      $(Expr(:inbounds, false))
      # meta: location distributed\cluster.jl nworkers 676
      n@_26::Int64 = $(Expr(:invoke, MethodInstance for nprocs(), :(Base.Distributed.nprocs))) # line 677:
      unless (n@_26::Int64 === 1)::Bool goto 167
      #temp#@_27::Int64 = 1
      goto 169
      167: 
      #temp#@_27::Int64 = (Base.sub_int)(n@_26::Int64, 1)::Int64
      169: 
      # meta: pop location
      $(Expr(:inbounds, :pop))
      unless (SSAValue(43) < #temp#@_27::Int64)::Any goto 182 # line 22:
      #83::##83#85{Array{Float64,2}} = $(Expr(:new, ##83#85{Array{Float64,2}}, :(iworker@_11), :(kpts), :(wnkpts)))
      $(Expr(:inbounds, false))
      # meta: location distributed\macros.jl spawn_somewhere 17
      SSAValue(44) = $(Expr(:invoke, MethodInstance for nextproc(), :(Base.Distributed.nextproc)))
      # meta: pop location
      $(Expr(:inbounds, :pop))
      r::Future = $(Expr(:invoke, MethodInstance for sync_add(::Future), :(Base.Distributed.sync_add), :((Base.Distributed.remotecall)(#83, SSAValue(44))::Future)))
      goto 191
      182:  # line 24:
      #84::##84#86{Int64,Array{Float64,2}} = $(Expr(:new, ##84#86{Int64,Array{Float64,2}}, :(nkpts), :(kpts), :(wnkpts)))
      $(Expr(:inbounds, false))
      # meta: location distributed\macros.jl spawn_somewhere 17
      SSAValue(45) = $(Expr(:invoke, MethodInstance for nextproc(), :(Base.Distributed.nextproc)))
      # meta: pop location
      $(Expr(:inbounds, :pop))
      r::Future = $(Expr(:invoke, MethodInstance for sync_add(::Future), :(Base.Distributed.sync_add), :((Base.Distributed.remotecall)(#84, SSAValue(45))::Future)))
      191:  # line 26:
      $(Expr(:invoke, MethodInstance for append!(::Array{Future,1}, ::Array{Future,1}), :(Main.append!), :(results), :($(Expr(:invoke, MethodInstance for vect(::Future, ::Vararg{Future,N} where N), :(Base.vect), :(r))))))
      194: 
      goto 147
      196:  # line 28:
      $(Expr(:inbounds, false))
      # meta: location distributed\cluster.jl nworkers 676
      n@_28::Int64 = $(Expr(:invoke, MethodInstance for nprocs(), :(Base.Distributed.nprocs))) # line 677:
      unless (n@_28::Int64 === 1)::Bool goto 205
      #temp#@_29::Int64 = 1
      goto 207
      205: 
      #temp#@_29::Int64 = (Base.sub_int)(n@_28::Int64, 1)::Int64
      207: 
      # meta: pop location
      $(Expr(:inbounds, :pop))
      SSAValue(59) = (Base.select_value)((Base.sle_int)(1, #temp#@_29::Int64)::Bool, #temp#@_29::Int64, (Base.sub_int)(1, 1)::Int64)::Int64
      #temp#@_15::Int64 = 1
      212: 
      unless (Base.not_int)((#temp#@_15::Int64 === (Base.add_int)(SSAValue(59), 1)::Int64)::Bool)::Bool goto 222
      SSAValue(60) = #temp#@_15::Int64
      SSAValue(61) = (Base.add_int)(#temp#@_15::Int64, 1)::Int64
      iworker@_14::Int64 = SSAValue(60)
      #temp#@_15::Int64 = SSAValue(61) # line 29:
      sigma::Any = (sigma::Any + $(Expr(:invoke, MethodInstance for fetch(::Future), :(Main.fetch), :((Base.arrayref)(results, iworker@_14)::Future))))::Any
      220: 
      goto 212
      222:  # line 31:
      return sigma::Any
  end::Any

I assume this is because julia don’t know the return type of fetch. (Maybe I am wrong here.) Then how to fix this?

Seems to be fetch and 15276. This is mostly type stable, only the intermediate result of fetch is not.

function calculate(nkmesh::Vector{Int64})
   nkpts = prod(nkmesh)
   kpts = zeros(3, nkpts)
   ik = 1
   for ikx in 1:nkmesh[1]
       for iky in 1:nkmesh[2]
           for ikz in 1:nkmesh[3]
               kpts[:, ik] = [(ikx-1)/nkmesh[1], (iky-1)/nkmesh[2], (ikz-1)/nkmesh[3]]
               ik += 1
           end
       end
   end
   wnkpts = floor(Int64, nkpts/nworkers())
   sigma = 0.0
   results = Vector{Future}()
   for iworker in 1:nworkers()
       let iworker = iworker, wnkpts = wnkpts
           if iworker < nworkers()
               r = @spawn _calculate(kpts[:, ((iworker-1)*wnkpts+1):(iworker)*wnkpts]);
           else
               r = @spawn _calculate(kpts[:, ((nworkers()-1)*wnkpts+1):nkpts]);
           end
           append!(results, [r])
       end
   end
   for iworker in 1:nworkers()
       sigma += Float64(fetch(results[iworker]))
   end
   return sigma
end