I am going through the very excellent Algorithms course, in Coursera, and am trying to implement all of the problems in Julia, so that I can learn about algorithms and practice with Julia. A couple of weeks back, we were asked to implement Kosaraju’s algorithm to find the Strongly Connected Components (SCCs) in a graph. The problem came with a very huge dataset. I basically ripped off most of the code from here. This works for small graphs correctly. However, I get a stack overflow error. I am working with VSCode, and am unable to even debug the code to see what is happening.
Here is the code:
f = open("source.txt");
txt = readlines(f);
close(f);
N = 875714;
# Extract Data
graphTxtArray = split.(txt);
graphNumArray = [parse.(Int64, strArr) for strArr in graphTxtArray];
# Parse data and convert to Dict.
# Creating a Dict for forward and reverse graphs
graph = Dict(i=>Int64[] for i in 1:N);
rev_graph = Dict(i=>Int64[] for i in 1:N);
for numArr in graphNumArray
if numArr[1] ≠ numArr[2]
append!(graph[numArr[1]], numArr[2]);
append!(rev_graph[numArr[2]], numArr[1]);
end
end
# Function to determine finishing times
function fillOrder(g, v, visited, v_stack)
visited[v] = 1
for i in g[v]
if visited[i] == 0
fillOrder(g, i, visited, v_stack)
end
end
return append!(v_stack, v), visited
end
# DFS
function DFSUtil(g, v, j, visited, SCC)
visited[v]= 1
if j in keys(SCC)
append!(SCC[j], v)
else
SCC[j] = [v]
end
for i in g[v]
if visited[i] == 0
DFSUtil(g, i, j, visited, SCC)
end
end
return SCC
end
# Main function
# Run DFS on forward graph and determine fill order
# Use fill order to run DFS on reverse graph
function printSCCs(g1, g2)
v_stack = Int64[];
SCCs = Dict();
visited = zeros(Int64, length(g1));
for i in range(1, length=length(g1))
if visited[i] ≠ 1
v_stack, visited = fillOrder(g1, i, visited, v_stack);
end
end
visited = zeros(Int64, length(g1));
for j in range(1, length=length(g2))
i = pop!(v_stack);
if visited[i] == 0
SCCs = DFSUtil(g2, i, j, visited, SCCs);
end
end
return SCCs
end
SCCs = printSCCs(graph, rev_graph)
top5len = sort([length(val) for val in values(SCCs)], rev=true)
#println(top5len)
println("Done!")
For a graph with 12 nodes, as shown below, I get the following correct output. It is an array containing the lengths of the SCCs in descending order:
Input:
1 2
2 3
2 4
2 5
3 6
4 5
4 7
5 2
5 6
5 7
6 3
6 8
7 8
7 10
8 7
9 7
10 9
10 11
11 12
12 10
Output:
[6, 3, 2, 1]
Done!
However, when I try to run the script with the actual data, I get the following error:
ERROR: LoadError: StackOverflowError:
Stacktrace:
[1] fillOrder(g::Dict{Int64, Vector{Int64}}, v::Int64, visited::Vector{Int64}, v_stack::Vector{Int64})
@ Main C:\Users\DannOfThursday\Desktop\Julia\kosaraju.jl:22
[2] fillOrder(g::Dict{Int64, Vector{Int64}}, v::Int64, visited::Vector{Int64}, v_stack::Vector{Int64}) (repeats 18629 times)
@ Main C:\Users\DannOfThursday\Desktop\Julia\kosaraju.jl:26
[3] printSCCs(g1::Dict{Int64, Vector{Int64}}, g2::Dict{Int64, Vector{Int64}})
@ Main C:\Users\DannOfThursday\Desktop\Julia\kosaraju.jl:55
[4] top-level scope
@ C:\Users\DannOfThursday\Desktop\Julia\kosaraju.jl:69
in expression starting at C:\Users\DannOfThursday\Desktop\Julia\kosaraju.jl:69
Any idea what I am doing wrong? I did solve the problem with a different implementation, but it did not use recursion. This has been bugging for a few weeks now and am curious to know what is going wrong.
The dataset can be found here.