Combining the vcat
, and the collect
change that I mentioned bring it down to 45 minutes. That’s pretty close to the Rust version.
Answering the questions in reverse order.
The full version of serial code with memoization
using StaticArrays
function _readline(path)::SVector{5, Char}
word = readline(path)
SVector{5, Char}(word...)
end
function _readlines(path)::Vector{SVector{5, Char}}
words = Vector{SVector{5, Char}}()
for word in readlines(path)
push!(words, SVector{5, Char}(word...))
end
println(length(words))
words
end
const words = _readlines("solutions.txt")
const non_solution_words = _readlines("non-solution-guesses.txt")
const allowed_guesses = vcat(words, non_solution_words)
@enum ConstraintType has_no has_but_not_at has_at
struct Constraint
type::ConstraintType
letter::Char
index::Int
end
function isequal(c0::ConstraintType, c1::ConstraintType)::Bool
c0.type == c1.type && c0.letter == c1.letter && (c0.type == has_no || c0.index == c1.index)
end
function hash(c::ConstraintType, h::UInt = 0)::UInt
hash(c.type, h) ⊻ hash(c.letter, h ⊻ 1) ⊻ hash(c.index, h ⊻ 2)
end
function constraints(guess::SVector{5, Char}, actual::SVector{5, Char})::Vector{Constraint}
c = Vector{Constraint}()
for (i, g, a) in zip(1:5, guess, actual)
if g == a
push!(c, Constraint(has_at, g, i))
elseif g ∈ actual
push!(c, Constraint(has_but_not_at, g, i))
else
push!(c, Constraint(has_no, g, i))
end
end
c
end
# Code: . = has_no, x = has_but_not_at, o = has_at
function parse_constraints(template::SVector{5, Char}, guess::SVector{5, Char})::Vector{Constraint}
constraints = Vector{Constraint}()
for (i, c) in zip(1:5, template)
if c == 'x'
push!(constraints, Constraint(has_but_not_at, guess[i], i))
elseif c == 'o'
push!(constraints, Constraint(has_at, guess[i], i))
else
push!(constraints, Constraint(has_no, guess[i], i))
end
end
constraints
end
function match_constraint(word::SVector{5, Char}, constraint::Constraint)::Bool
if constraint.type == has_no
constraint.letter ∉ word
elseif constraint.type == has_but_not_at
(word[constraint.index] != constraint.letter) && (constraint.letter ∈ word)
elseif constraint.type == has_at
word[constraint.index] == constraint.letter
else
@assert false
end
end
function match_constraints(word::SVector{5, Char}, constraints::Vector{Constraint})::Bool
for c in constraints
if !match_constraint(word, c)
return false
end
end
return true
end
struct Choice
word::SVector{5, Char}
avg_remaining::Float64
end
function rank_guesses(guesses::Vector{SVector{5, Char}}, words::Vector{SVector{5, Char}})::Vector{Choice}
wt = length(guesses)
avg_time = 0
memo = Dict()
choices = Vector{Choice}(undef, wt)
for (wi, guess) in enumerate(guesses)
rem = 0
time = @elapsed for actual in words
cs = constraints(guess, actual)
if !haskey(memo, cs)
rem_cs = 0
for w in words
rem_cs += match_constraints(w, cs) ? 1.0 : 0.0
end
memo[cs] = rem_cs
end
rem += memo[cs]
end
choices[wi] = Choice(guess, rem / length(words))
print("\x1b[1K\x1b[GRanking guesses... ", wi, "/", wt, " words (", Int(ceil(avg_time*(wt-wi))), " sec left)")
avg_time = (wi*avg_time + time) / (wi+1)
end
print("\x1b[1K\x1b[G")
sort!(choices, by = c -> c.avg_remaining)
end
function main()
global words; global allowed_guesses
remaining_words = copy(words) # List of words that currently fit all known constraints.
while length(remaining_words) > 1
best_guesses = rank_guesses(allowed_guesses, remaining_words)[1:10]
for (i, g) in zip(1:10, best_guesses)
println(i, ". ", g.word, " (keeps ", g.avg_remaining, " words on average)")
end
println("Insert your guess: ")
guess = _readline(stdin)
println("Insert the results (o = letter at right spot, x = wrong spot, . = not in the word): ")
constraint_template = _readline(stdin)
remaining_words = filter(w -> match_constraints(w, parse_constraints(constraint_template, guess)), remaining_words)
end
println("Solution: ", remaining_words[1], ".")
end
runs in under a minute to the first interactive prompt.
Changing SVector{5, Char}
to String
roughly triples the runtime, corresponding to the micro benchmark in Rust / Julia Comparison Post - #21 by goerch
https://github.com/JuliaLang/julia/pull/43989 brings the runtime of the string version to 29 minutes on my computer. So despite all it’s problems, this post did actually uncover a minor performance bug in Julia.
Are you sure (meaning, someone checked the assembly code generated?). Because the statically compiled code can very well stack allocate arrays which are ‘syntactically’ heap allocated. This is probably the main point where Fortran syntax is simpler than that of Julia, for example, it may be the same with Rust.
I actually have no idea, I don’t know Rust and just went off the Rust documentation saying their String
s are heap allocated. It was growable/mutable so I assumed it was always heap-allocated and that the str
views were used to avoid allocations, but I definitely can’t rule out the compiler eliding allocations.
This is an untyped container Dict{Any,Any}
, rather use memo = Dict{Vector{Constraint},Int64}()
, and
just use ints ? 1 : 0
. That said, your code with memoization runs in an insane 30 sec.
on my computer.
Man, I’m never gonna beat that time in Wordle.
Thanks for checking!
This now looks a bit like a dynamic programming problem to me. The memoization takes advantage of the fact that the number of different constraint combinations is seemingly pretty low. But at a cost: my attempts to parallelize the code failed, I believe this could be a more general problem.
Or just
rem_cs += match_constraints(w, cs)
Yes, sure. Interestingly, however, the branch predictor in this case seems clever enough to eliminate the branch altogether.
your code with memoization runs in an insane
30 sec.
on my computer.
Am I right in understanding that after a couple days here on the forum the code went from ~8575 minutes to 30 seconds?
Yeah. We really dropped the ball on this one. I’m as sad as you we couldn’t get it under 1s
Well, has anyone profiled the current top ranking version?
The code listed under "The full version of serial code with memoization…
" runs on my machine in 32s. This is reduced to 26.5s by making the changes above plus a couple of others: use Dict{Vector{Constraint},Int64}()
and rem_cs += match_constraints(w, cs)
; and also I also used @inbounds
gaining 0.5s. I also made a couple of variables type stable, either Float64
or Int
, but I don’t know if that helped any.
The algorithm can be simplified considerably if we use the fact that the constraints generated by a guess and a target word, which is the pattern of colors on the tiles, can be considered as a 5-digit base-3 number. For example
function constraints(guess, actual)
val = 0
mult = 1
for (g, a) in zip(guess, actual)
val += (g == a ? 2 : (g ∈ actual ? 1 : 0)) * mult
mult *= 3
end
return val
end
Then we just need to keep track of the number of words that generate each of these numbers between 0 and 242.
For a given initial guess we can update an integer vector counts
of length 243 as
function ccounts!(counts, guess, words)
fill!(counts, 0)
for actual in words
counts[constraints(guess, actual) + 1] += 1
end
return counts
end
So, for example,
julia> show(ccounts!(counts, SVector{5,UInt8}("raise"...), words))
[168, 103, 10, 92, 78, 4, 91, 26, 9, 107, 23, 4, 34, 18, 1, 14, 2, 6, 51, 28, 1, 12, 4, 0, 6, 4, 1, 80, 24, 1, 43, 21, 0, 20, 2, 1, 21, 4, 1, 7, 1, 0, 5, 0, 0, 29, 5, 0, 0, 0, 0, 1, 0, 0, 17, 13, 1, 22, 8, 1, 7, 2, 0, 6, 1, 0, 1, 0, 0, 0, 0, 0, 9, 5, 0, 1, 0, 0, 2, 0, 0, 121, 102, 20, 69, 34, 13, 20, 28, 4, 35, 26, 8, 3, 1, 0, 0, 0, 0, 15, 12, 1, 1, 0, 0, 0, 0, 0, 41, 18, 2, 12, 4, 0, 1, 2, 0, 4, 4, 3, 1, 0, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 9, 7, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 61, 40, 6, 41, 26, 0, 26, 4, 1, 25, 3, 2, 2, 1, 0, 0, 0, 0, 23, 17, 0, 5, 1, 0, 3, 0, 0, 17, 10, 0, 20, 5, 0, 9, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 15, 2, 0, 1, 0, 0, 0, 0, 0, 20, 8, 2, 8, 2, 0, 5, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 1]
This means that there are 168 words that would generate all grey tiles if your initial guess is “raise”, 103 that would give you a yellow tile followed by 4 grey tiles, etc. Now here is the real subtle point - for a given initial guess a word matches a constraint pattern if and only if it generates that pattern. That is, the counts
vector is also the size of the pool after the first guess. Thus the average size of the pool after the first guess of “raise” is
julia> sum(abs2, counts)/sum(counts)
61.00086393088553
which corresponds to the earlier result.
Adding a function
function avgpool(guesses, words)
counts = zeros(Int, 243)
nwords = length(words)
avgs = sizehint!(Float64[], length(guesses))
for guess in guesses
ccounts!(counts, guess, words)
push!(avgs, sum(abs2, counts) / nwords)
end
return sort(DataFrame(initial_guess = String.(guesses), avg_pool = avgs), :avg_pool)
end
gives the desired result of < 1s execution time
julia> @btime avgpool(allowed_guesses, words)
423.376 ms (27048 allocations: 2.39 MiB)
12972×2 DataFrame
Row │ initial_guess avg_pool
│ String Float64
───────┼─────────────────────────
1 │ roate 60.4246
2 │ raise 61.0009
3 │ raile 61.3309
4 │ soare 62.3011
5 │ arise 63.7257
6 │ irate 63.7793
7 │ orate 63.8907
8 │ ariel 65.2877
9 │ arose 66.0212
10 │ raine 67.0562
⋮ │ ⋮ ⋮
12963 │ kudzu 871.555
12964 │ zoppo 873.149
12965 │ susus 873.97
12966 │ yukky 880.128
12967 │ fuffy 882.759
12968 │ gyppy 887.443
12969 │ jugum 900.946
12970 │ jujus 903.489
12971 │ qajaq 940.134
12972 │ immix 969.972
12952 rows omitted
Well done! I knew we could break 1 million times faster!