The outer loop will be partitioned across the workers.
So in this example (10xN array), say you have 5 workers. Then each worker will process 2 iterations of the outer loop (i.e. each worker will execute the full inner loop twice)
In general this should work if (outer loop dimension) > (number of workers)

# parallel calculation
@everywhere function test(i, j)
sleep(0.01)
println(i, j)
end
@everywhere function test1(i, m)
@sync @parallel for j = 1:m
test(i, j)
end
end
function test_series(n, m)
for i = 1:n
for j = 1:m
test(i, j)
end
end
end
function test_parallel_single_loop(n, m)
@sync @parallel for i = 1:n
for j = 1:m
test(i, j)
end
end
end
function test_parallel_double_loop(n, m)
@sync @parallel for i = 1:n
test1(i, m)
end
end
n = 10
m = 10
function calc()
@time test_series(n, m)
@time test_parallel_single_loop(n, m)
@time test_parallel_double_loop(n, m)
@time test_series(n, m)
@time test_parallel_single_loop(n, m)
@time test_parallel_double_loop(n, m)
end
calc()