I have the following code, my purpose is assigning the tasks to the different workers, and print the progress.
# bug.jl
# a simple calculation task
function cal_one_task(size)
a = rand(size)
sum(a)
end
# manage tasks on different workers and print progress
function auto_manger(task)
# total number of the tasks
num_of_task = length(task)
# Get the number of remote processes
numof_workers = length(workers())
result = []
workersFuture = [Future(1) for i in 1:numof_workers]
# record the position of the completed task ID in the workers() list
c = Channel{Int64}(numof_workers)
progress = 0.0
# Start numof_workers number of processes at the same time
for i in 1:numof_workers
@async begin; put!(workersFuture[i], remotecall_fetch(cal_one_task, workers()[i], task[i])); put!(c, i); end
end
# i is the index where the next data is stored in the result
# (i-1) is the number of recorded data
# and (i-1 + numof_workers) is the number of processes that have been started
# when the number of started processes is less than num_of_task, the result of the process that has completed the task is continuously recorded, and the calculation task is started after the process of completing the task
# after starting num_of_task number of processes, wait for the remaining running process (the last one) and return the result
i = 1
test_num = 0
while true
# index of the process that completed the task
j = take!(c)
# get the result
push!(result, fetch(workersFuture[j]))
workersFuture[j] = Future(1)
println("up $i #")
if i < num_of_task
@async begin; put!(workersFuture[j], remotecall_fetch(cal_one_task, workers()[j], task[i + numof_workers])); put!(c, j); end;
# test_num = test_num + 1
# println("test_num $test_num")
## print progress
if (i - 1)/num_of_task > 0.001 + progress
progress = i/num_of_task
print("\r")
print(round(progress*100, digits = 1))
print("%")
end
## print progress
else
break
end
i = i + 1
end
close(c)
print(" all done @")
println(now())
result
end
Then just run the following commands in REPL, we can start compute
using Distributed
using Dates
@everywhere include("./bug.jl")
my_tasks = [10^8 for i in 1:10];
auto_manger(my_tasks)
The incredible thing is that the above program is always stuck on the last task, always in the waiting state for j = take! (c)
What is even more bizarre is that when I remove the #
below, that is, after @async
, print the variable test_num
# test_num = test_num + 1
# println("test_num $test_num")
Then the program will end normally!