This is Julia 1.6.1. I hit an error that seems really weird right now. The code’s basically like this:
loop
if isempty(some_set)
# Change some_set
# ...
continue
end
local a = first(some_set)
delete!(some_set, a)
# ...
end
In the first
call, I get this error: “ArgumentError: collection must be non-empty”, even though I check for that condition in the if
immediately above.
This is the full code, if anybody is willing to take a look. There’s only one call to first
:
# Copyright © 2021: Neven Sajko
#
# Main input: arguments of function 'search'
# * A file name
# * The file must be a connected partitioned word list.
# * The file name must end in this form:
# list_XXXXXX_YYYYYY.txt
# * XXXXXX is the count of word classes in the list.
# * YYYYYY is just some number unique to the list.
# * Two natural numbers M and N. M must be less than N. The
# numbers define the range M:N, from which the roots of the
# search are taken. E.g., if M is 2 and N is 3: the roots are
# 2 and 3, i.e., the second word class in the file and the
# third word class in the file.
# * A natural number: the minimal length that a Kaladont sequence
# has to have for us to be interested in it.
#
# Main output: ./data/abstract_kaladont_sequences/seq_XXXXXX_YYYYYY_ZZZZZ_TTTT.txt
# * ZZZZZ is the count of words in the sequence.
# * TTTT is the thread ID of the thread which found the sequence.
# This just serves so I wouldn't have to synchronize the
# threads.
#
# Additional I/O: ./data/sequence_root_explored/done_XXXXXX_YYYYYY_CCCCCC
# * All of the files in the directory sequence_root_explored are
# read and need to have that naming scheme.
# * CCCCCC denotes the class of words within the list which was
# already fully explored as a root for the search.
# * As output, this is a sort of progress report.
# * As input, this allows restarting the program without needing
# to unnecessarily explore options which were explored already
# in previous runs.
# * Note: these are just empty files, only their names are
# important.
#
# Additional input: ./data/exit_when_done_with_root
# * If this file is found to exist during this program's
# execution, the program exits after finishing with the current
# roots of the search. (There can be multiple current roots
# because of multiple parallel threads of execution each
# starting from a distinct root.)
module FindAbstractKaladontSequences
using Base.Threads, Printf, StaticArrays
export search
const TwoChars = Tuple{UInt8, UInt8}
# A two-character prefix and a two-character suffix.
const PreSuf = Tuple{TwoChars, TwoChars}
struct Graph
# The count of words in each word class.
presuf_to_class_size :: Dict{PreSuf, Int32}
# The index of each word class.
presuf_to_index :: Dict{PreSuf, Int16}
# The word class that correspond to an index.
index_to_presuf :: Vector{PreSuf}
# A sort of adjacency list.
#
# Maps a prefix to all word classes which have that prefix.
prefix_to_presuf :: SMatrix{27, 27, Set{PreSuf}, 27^2}
end
function encodedCharacter(c)
local plain = UInt8(c)
(plain == UInt8('-')) && (return UInt8(27))
UInt8(plain - UInt8('a') + 1)
end
function decodedCharacter(encoded)
(encoded == UInt8(27)) && (return UInt8('-'))
UInt8(encoded + UInt8('a') - 1)
end
presuf(s::String) = (encodedCharacter.((s[1], s[2])),
encodedCharacter.((s[length(s) - 1], s[length(s)])))
function presuf_for_humans(ps::PreSuf)
local d = (Char.(decodedCharacter.(ps[1])), Char.(decodedCharacter.(ps[2])))
string(d[1][1], d[1][2], d[2][1], d[2][2])
end
function load(input_file::String)
local presuf_to_class_size = Dict{PreSuf, Int32}()
local presuf_to_index = Dict{PreSuf, Int16}()
local index_to_presuf = PreSuf[]
local prefix_to_presuf = @MMatrix[Set{PreSuf}() for _ in 1:27, _ in 1:27]
for line in eachline(input_file)
(length(line) == 0) && continue
local ps = presuf(line)
if !haskey(presuf_to_class_size, ps)
presuf_to_class_size[ps] = 0
push!(index_to_presuf, ps)
presuf_to_index[ps] = length(index_to_presuf)
end
presuf_to_class_size[ps] += 1
push!(prefix_to_presuf[ps[1][1], ps[1][2]], ps)
end
Graph(presuf_to_class_size,
presuf_to_index,
index_to_presuf,
SMatrix(prefix_to_presuf))
end
function report_seq(prefix::String, g::Graph,
walk::Vector{NamedTuple{(:walk_node, :need_traversing),
Tuple{PreSuf, Set{PreSuf}}}})
local file_name = string(prefix, Printf.@sprintf("%05d", length(walk)), "_",
Printf.@sprintf("%04d", Threads.threadid()))
Base.Filesystem.ispath(file_name) && (return nothing)
local s = ""
for node in walk
local ps = node.walk_node
s = string(s, Printf.@sprintf("%05d", g.presuf_to_index[ps]), " ",
presuf_for_humans(ps), "\n")
end
local f = open(file_name, lock = false, write = true, create = true)
write(f, s)
close(f)
return nothing
end
function update_max_visited(vert::PreSuf,
max_visited::Set{PreSuf},
visited::Dict{PreSuf, Int32},
g::Graph)
(visited[vert] == g.presuf_to_class_size[vert]) && push!(max_visited, vert)
return nothing
end
function search(input_file::String, m::Int, n::Int, minimal_length::Int)
# Check the range.
if !(1 <= m && m <= n)
print("invalid range\n")
return nothing
end
local exit_signal_file = "./data/exit_when_done_with_root"
# Check if exit_signal_file exists.
if Base.Filesystem.ispath(exit_signal_file)
print(string("delete ", exit_signal_file, " before starting the search\n"))
return nothing
end
# Load graph from input file.
local g = load(input_file)
# Check the range's upper bound.
if (length(g.index_to_presuf) < n)
print("range out of bounds\n")
return nothing
end
local io_file_name_code = input_file[(length(input_file) - 16) : (length(input_file) - 4)]
local seq_prefix = string("./data/abstract_kaladont_sequences/seq_",
io_file_name_code, "_")
local done_prefix = string("./data/sequence_root_explored/done_",
io_file_name_code, "_")
Threads.@threads for search_root in m:n
local done_signal_file = string(done_prefix, Printf.@sprintf("%06d", search_root))
# Check if the part of the search with this root is
# already done. If so, skip to the next root.
Base.Filesystem.ispath(done_signal_file) && continue
# The root vertex (word class).
local root_vert = g.index_to_presuf[search_root]
# The vertices of a walk through the graph.
local walk_stack = [(walk_node = root_vert,
need_traversing = g.prefix_to_presuf[root_vert[2][1],
root_vert[2][2]])]
# How many times was each vertex (word class) visited.
local visited = Dict{PreSuf, Int32}(root_vert => 1)
# The vertices that can't be visited again, i.e., those
# which are not potential successors.
local max_visited = Set{PreSuf}()
update_max_visited(root_vert, max_visited, visited, g)
# Search.
while !isempty(walk_stack)
if isempty(last(walk_stack).need_traversing)
# We reached a dead end, backtrack!
local v = pop!(walk_stack).walk_node
visited[v] -= 1
(visited[v] == 0) && delete!(visited, v)
delete!(max_visited, v)
continue
end
# Pick a successor node.
local suc = first(last(walk_stack).need_traversing)
delete!(last(walk_stack).need_traversing, suc)
haskey(visited, suc) || (visited[suc] = 0)
visited[suc] += 1
update_max_visited(suc, max_visited, visited, g)
local need_traversing = setdiff(g.prefix_to_presuf[suc[2][1],
suc[2][2]],
max_visited)
push!(walk_stack, (walk_node = suc,
need_traversing = need_traversing))
# Record the walk if it's long enough.
(minimal_length <= length(walk_stack)) &&
report_seq(seq_prefix, g, walk_stack)
end
# Signal that we finished the part of the search that
# starts from this root.
Base.Filesystem.touch(done_signal_file)
# If an exit was requested, kill this thread.
Base.Filesystem.ispath(exit_signal_file) && break
end
return nothing
end
end # module
In case somebody wants to run it, it requires a file like this as input in ./data/connected_partitioned_word_lists/list_000043_000001.txt
:
kabac
aca
caano
inacica
kiabudin
inace
ce-ov
ce
ice
icanovic
naakovic
ana