Rust / Julia Comparison Post

Combining the vcat, and the collect change that I mentioned bring it down to 45 minutes. That’s pretty close to the Rust version.

2 Likes

Answering the questions in reverse order.

The full version of serial code with memoization

using StaticArrays

function _readline(path)::SVector{5, Char}
    word = readline(path)
    SVector{5, Char}(word...)
end
function _readlines(path)::Vector{SVector{5, Char}}
    words = Vector{SVector{5, Char}}()
    for word in readlines(path)
        push!(words, SVector{5, Char}(word...))
    end
    println(length(words))
    words
end

const words = _readlines("solutions.txt")
const non_solution_words = _readlines("non-solution-guesses.txt")
const allowed_guesses = vcat(words, non_solution_words)

@enum ConstraintType has_no has_but_not_at has_at
struct Constraint
  type::ConstraintType
  letter::Char
  index::Int
end

function isequal(c0::ConstraintType, c1::ConstraintType)::Bool
  c0.type == c1.type && c0.letter == c1.letter && (c0.type == has_no || c0.index == c1.index)
end

function hash(c::ConstraintType, h::UInt = 0)::UInt
  hash(c.type, h) ⊻ hash(c.letter, h ⊻ 1) ⊻ hash(c.index, h ⊻ 2)
end

function constraints(guess::SVector{5, Char}, actual::SVector{5, Char})::Vector{Constraint}
  c = Vector{Constraint}()
  for (i, g, a) in zip(1:5, guess, actual)
    if g == a
      push!(c, Constraint(has_at, g, i))
    elseif g ∈ actual
      push!(c, Constraint(has_but_not_at, g, i))
    else
      push!(c, Constraint(has_no, g, i))
    end
  end
  c
end

# Code: . = has_no, x = has_but_not_at, o = has_at
function parse_constraints(template::SVector{5, Char}, guess::SVector{5, Char})::Vector{Constraint}
  constraints = Vector{Constraint}()
  for (i, c) in zip(1:5, template)
    if c == 'x'
      push!(constraints, Constraint(has_but_not_at, guess[i], i))
    elseif c == 'o'
      push!(constraints, Constraint(has_at, guess[i], i))
    else
      push!(constraints, Constraint(has_no, guess[i], i))
    end
  end
  constraints
end

function match_constraint(word::SVector{5, Char}, constraint::Constraint)::Bool
  if constraint.type == has_no
    constraint.letter ∉ word
  elseif constraint.type == has_but_not_at
    (word[constraint.index] != constraint.letter) && (constraint.letter ∈ word)
  elseif constraint.type == has_at
    word[constraint.index] == constraint.letter
  else
    @assert false
  end
end

function match_constraints(word::SVector{5, Char}, constraints::Vector{Constraint})::Bool
  for c in constraints
    if !match_constraint(word, c)
      return false
    end
  end
  return true
end

struct Choice
  word::SVector{5, Char}
  avg_remaining::Float64
end

function rank_guesses(guesses::Vector{SVector{5, Char}}, words::Vector{SVector{5, Char}})::Vector{Choice}
  wt = length(guesses)
  avg_time = 0
  memo = Dict()
  choices = Vector{Choice}(undef, wt)
  for (wi, guess) in enumerate(guesses)
    rem = 0
    time = @elapsed for actual in words
      cs = constraints(guess, actual)
      if !haskey(memo, cs)
        rem_cs = 0
        for w in words
          rem_cs += match_constraints(w, cs) ? 1.0 : 0.0
        end
        memo[cs] = rem_cs
      end
      rem += memo[cs]
    end
    choices[wi] = Choice(guess, rem / length(words))
    print("\x1b[1K\x1b[GRanking guesses... ", wi, "/", wt, " words (", Int(ceil(avg_time*(wt-wi))), " sec left)")
    avg_time = (wi*avg_time + time) / (wi+1)
  end
  print("\x1b[1K\x1b[G")
  sort!(choices, by = c -> c.avg_remaining)
end

function main()
  global words; global allowed_guesses
  remaining_words = copy(words)  # List of words that currently fit all known constraints.
  while length(remaining_words) > 1
    best_guesses = rank_guesses(allowed_guesses, remaining_words)[1:10]
    for (i, g) in zip(1:10, best_guesses)
      println(i, ". ", g.word, " (keeps ", g.avg_remaining, " words on average)")
    end
    println("Insert your guess: ")
    guess = _readline(stdin)
    println("Insert the results (o = letter at right spot, x = wrong spot, . = not in the word): ")
    constraint_template = _readline(stdin)
    remaining_words = filter(w -> match_constraints(w, parse_constraints(constraint_template, guess)), remaining_words)
  end
  println("Solution: ", remaining_words[1], ".")
end

runs in under a minute to the first interactive prompt.

Changing SVector{5, Char} to String roughly triples the runtime, corresponding to the micro benchmark in Rust / Julia Comparison Post - #21 by goerch

4 Likes

https://github.com/JuliaLang/julia/pull/43989 brings the runtime of the string version to 29 minutes on my computer. So despite all it’s problems, this post did actually uncover a minor performance bug in Julia.

13 Likes

Are you sure (meaning, someone checked the assembly code generated?). Because the statically compiled code can very well stack allocate arrays which are ‘syntactically’ heap allocated. This is probably the main point where Fortran syntax is simpler than that of Julia, for example, it may be the same with Rust.

I actually have no idea, I don’t know Rust and just went off the Rust documentation saying their Strings are heap allocated. It was growable/mutable so I assumed it was always heap-allocated and that the str views were used to avoid allocations, but I definitely can’t rule out the compiler eliding allocations.

1 Like

This is an untyped container Dict{Any,Any}, rather use memo = Dict{Vector{Constraint},Int64}(), and

just use ints ? 1 : 0. That said, your code with memoization runs in an insane 30 sec. on my computer.

3 Likes

Man, I’m never gonna beat that time in Wordle.

Thanks for checking!

This now looks a bit like a dynamic programming problem to me. The memoization takes advantage of the fact that the number of different constraint combinations is seemingly pretty low. But at a cost: my attempts to parallelize the code failed, I believe this could be a more general problem.

2 Likes

Or just

rem_cs += match_constraints(w, cs)
3 Likes

Yes, sure. Interestingly, however, the branch predictor in this case seems clever enough to eliminate the branch altogether.

Am I right in understanding that after a couple days here on the forum the code went from ~8575 minutes to 30 seconds?

10 Likes

Yeah. We really dropped the ball on this one. I’m as sad as you we couldn’t get it under 1s :slight_smile:

13 Likes

Well, has anyone profiled the current top ranking version?
:wink:

1 Like

The code listed under "The full version of serial code with memoization…
" runs on my machine in 32s. This is reduced to 26.5s by making the changes above plus a couple of others: use Dict{Vector{Constraint},Int64}() and rem_cs += match_constraints(w, cs); and also I also used @inbounds gaining 0.5s. I also made a couple of variables type stable, either Float64 or Int, but I don’t know if that helped any.

1 Like

The algorithm can be simplified considerably if we use the fact that the constraints generated by a guess and a target word, which is the pattern of colors on the tiles, can be considered as a 5-digit base-3 number. For example

function constraints(guess, actual)
    val = 0
    mult = 1
    for (g, a) in zip(guess, actual)
        val += (g == a ? 2 : (g ∈ actual ? 1 : 0)) * mult
        mult *= 3
    end
    return val
end

Then we just need to keep track of the number of words that generate each of these numbers between 0 and 242.

For a given initial guess we can update an integer vector counts of length 243 as

function ccounts!(counts, guess, words)
    fill!(counts, 0)
    for actual in words
        counts[constraints(guess, actual) + 1] += 1
    end
    return counts
end

So, for example,

julia> show(ccounts!(counts, SVector{5,UInt8}("raise"...), words))
[168, 103, 10, 92, 78, 4, 91, 26, 9, 107, 23, 4, 34, 18, 1, 14, 2, 6, 51, 28, 1, 12, 4, 0, 6, 4, 1, 80, 24, 1, 43, 21, 0, 20, 2, 1, 21, 4, 1, 7, 1, 0, 5, 0, 0, 29, 5, 0, 0, 0, 0, 1, 0, 0, 17, 13, 1, 22, 8, 1, 7, 2, 0, 6, 1, 0, 1, 0, 0, 0, 0, 0, 9, 5, 0, 1, 0, 0, 2, 0, 0, 121, 102, 20, 69, 34, 13, 20, 28, 4, 35, 26, 8, 3, 1, 0, 0, 0, 0, 15, 12, 1, 1, 0, 0, 0, 0, 0, 41, 18, 2, 12, 4, 0, 1, 2, 0, 4, 4, 3, 1, 0, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 9, 7, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 61, 40, 6, 41, 26, 0, 26, 4, 1, 25, 3, 2, 2, 1, 0, 0, 0, 0, 23, 17, 0, 5, 1, 0, 3, 0, 0, 17, 10, 0, 20, 5, 0, 9, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0, 0, 15, 2, 0, 1, 0, 0, 0, 0, 0, 20, 8, 2, 8, 2, 0, 5, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 1]

This means that there are 168 words that would generate all grey tiles if your initial guess is “raise”, 103 that would give you a yellow tile followed by 4 grey tiles, etc. Now here is the real subtle point - for a given initial guess a word matches a constraint pattern if and only if it generates that pattern. That is, the counts vector is also the size of the pool after the first guess. Thus the average size of the pool after the first guess of “raise” is

julia> sum(abs2, counts)/sum(counts)
61.00086393088553

which corresponds to the earlier result.

10 Likes

Adding a function

function avgpool(guesses, words)
    counts = zeros(Int, 243)
    nwords = length(words)
    avgs = sizehint!(Float64[], length(guesses))
    for guess in guesses
        ccounts!(counts, guess, words)
        push!(avgs, sum(abs2, counts) / nwords)
    end
    return sort(DataFrame(initial_guess = String.(guesses), avg_pool = avgs), :avg_pool)
end

gives the desired result of < 1s execution time

julia> @btime avgpool(allowed_guesses, words)
  423.376 ms (27048 allocations: 2.39 MiB)
12972×2 DataFrame
   Row │ initial_guess  avg_pool 
       │ String         Float64  
───────┼─────────────────────────
     1 │ roate           60.4246
     2 │ raise           61.0009
     3 │ raile           61.3309
     4 │ soare           62.3011
     5 │ arise           63.7257
     6 │ irate           63.7793
     7 │ orate           63.8907
     8 │ ariel           65.2877
     9 │ arose           66.0212
    10 │ raine           67.0562
   ⋮   │       ⋮           ⋮
 12963 │ kudzu          871.555
 12964 │ zoppo          873.149
 12965 │ susus          873.97
 12966 │ yukky          880.128
 12967 │ fuffy          882.759
 12968 │ gyppy          887.443
 12969 │ jugum          900.946
 12970 │ jujus          903.489
 12971 │ qajaq          940.134
 12972 │ immix          969.972
               12952 rows omitted
47 Likes

Well done! I knew we could break 1 million times faster!

15 Likes