Hello folks!
I’m experimenting with HTTP.jl and I’m trying to put in a place a watchdog task that would regularly check on the server, and spin up a new one if it goes down.
The issue I’m encountering is that I get this error: “Base.IOError(“listen: address already in use (EADDRINUSE)”, -48)” , despite any server being terminated - I’m saving their tasks in an array and can inspect their task status.
The only way to make it work is to restart the Julia process (I’m testing this in Pluto btw).
This is my implementation:
Watchdog function:
function start_ws_watchdog()
## on first run
# terminate any running watchdog(s) - by InterruptException
terminate_active_watchdog!()
# terminate any active websockets
terminate_active_ws!()
sleep(1)
# start new watchdog
watchdog_task = @spawn try
#ws_array |> empty!
while true
try
# start websocket server if there's none or they're all done/closed
if isempty(ws_array) || all(ws -> istaskdone(ws.task), ws_array) || all(ws -> !isopen(ws.listener.server), ws_array)
start_ws_server()
end
catch e
if isa(e, Base.IOError) && occursin("EADDRINUSE", e.msg)
# port already in use, re-start was too early
@warn "WARNING - ws start, port already in use $e"
sleep(1)
terminate_active_ws!()
else
rethrow(e)
end
end
sleep(5)
end
catch e
if isa(e, InterruptException)
@warn "LOG - Websocket watchdog terminated"
else
@error "ERROR - ws watchdog, $e"
rethrow(e)
end
end
@info "LOG - New watchdog started: $watchdog_task"
# save watchdog task reference
push!(ws_watchdog, watchdog_task)
end
WS server:
function start_ws_server()
# start new server (it spawns its on task)
ws_server = WebSockets.listen!(ws_ip, ws_port; verbose = true) do ws
_info = "LOG - New Websocket server started: $(ws_server.task)"
@info _info
for msg in ws
@spawn try
# save received messages as-is
lock(msg_lock)
push!(ws_msg_log_raw, msg)
unlock(msg_lock)
parsed_msg = JSON3.read(msg, Dict) |> dict_keys_to_sym
# save parsed message
setindex!(parsed_msg, "received", :type)
lock(msg_lock)
push!(ws_msg_log, parsed_msg)
unlock(msg_lock)
# pass msg on
msg_handler(ws, parsed_msg)
catch e
@error "ERROR - Message handler error, $e"
end
end
end
# saves server handler & task for reference
push!(ws_array, ws_server)
end
And these are the killer functions:
function terminate_active_ws!()
for ws in filter(ws -> !istaskdone(ws.task) || !isempty(ws.connections) || isopen(ws.listener.server), ws_array)
HTTP.forceclose(ws)
_num_conn = ws.connections |> length
@warn "WARNING - Terminating ws server: $(ws.task) - $_num_conn connections"
end
end
function terminate_active_watchdog!()
for task in ws_watchdog
if !istaskdone(task)
schedule(task, InterruptException(), error=true)
@warn "WARNING - Terminating ws watchdog: $task"
end
end
end
I don’t understand what I’m doing wrong.
It would work fine for a while (<1hr?), but then get unresponsive - even if server still has a running task and it’s open, and finally give this error if I try to trigger server restart through the watchdog.
Anyone encountered similar issues?