Trying to solve the problem d17 of this year’s AOC, I found that, among the proposed solutions, the most efficient seems to be the one that uses recursion.
for example, compared to an iterative one that works on the same principle (see below, under the title iterative solution), it is more than 3 times faster.
I then tried to simulate by hand what (I imagine) a recursive function does,
but I can’t get the same performance.
Compared to the initial version that had 15 us against 1.8 us of the recursive version, I obtained a clear improvement (reaching 2.2 us) “simply” declaring the type of a vector: astk.
However, I couldn’t do better.
I would expect that by doing things properly, the “ad hoc recursion” would be more efficient than the general one.
sol with recursion
function fss(sn, seq, n=0)
n == length(seq) && return sn
for a in 0:7
A = (sn << 3) + a
out = ((A % 8) ⊻ 1 ⊻ A÷(2^((A % 8) ⊻ 6))) % 8
if out== seq[n+1]
pA = fss(A, seq, n + 1)
!isnothing(pA) && return pA
end
end
end
Pr=[2,4,1,6,7,5,4,4,1,7,0,3,5,5,3,0]
using BenchmarkTools
@btime fss(0, reverse($Pr)) # 1.8us
sol iterative
Pr=[2,4,1,6,7,5,4,4,1,7,0,3,5,5,3,0]
function prevs((A,n),rPr)
res=Tuple{Int,Int}[]
for a in 0:7
Aa=(A << 3)+a
pA = ((Aa % 8) ⊻ 1 ⊻ Aa ÷ (2^((Aa % 8) ⊻ 6))) % 8
if pA == rPr[n+1]
push!(res,(Aa,n+1))
end
end
res
end
function rfss(rPr)
vpn=prevs((0,0),rPr)
for _ in 1:15
vpn=vcat(prevs.(vpn,[rPr])...)
end
vpn[1]
end
using BenchmarkTools
@btime rfss(reverse(Pr)) # 6us
hand-made recursion solution
ePr=[2,4,1,6,7,5,4,4,1,7,0,3,5,5,3,0]
function prev(rPr,(A,n),astk=Tuple{Tuple{Int,Int},Int}[],s=0)
n==length(rPr) && return (A,n)
res=(-1,0)
for a in s:7
Aa=(A << 3)+a
a8=(Aa % 8)
pA = (a8 ⊻ 1 ⊻ Aa ÷ (2^(a8 ⊻ 6))) % 8
if pA == rPr[n+1]
res = (Aa,n+1)
push!(astk,((A,n),a+1))
break
end
end
res, astk
end
function hr(rPr)
res, astk = prev(rPr,(0,0))
while res[2] != length(rPr)
if res[1] == -1
res,s = pop!(astk)
res, astk = prev(rPr,res,astk,s)
else
res, astk = prev(rPr,res,astk)
end
end
res
end
using BenchmarkTools
@btime hr(reverse(ePr)) #15us --> 2.2/3.2 us dopo aver esplicitato tipo del vettore astk
How can the performance of the hand-made recursion solution be further improved?