Original code:
using BenchmarkTools
function power_digit_sum(pow, n)
return sum(c^pow for c in reverse(digits(n)))
end
function Problem30()
ans = sum(i for i in 2:1_000_000 if i == power_digit_sum(5, i))
return ans
end
@show Problem30();
@btime Problem30();
Problem30() = 443839
128.256 ms (2000004 allocations: 243.83 MiB)
The easiest way to reduce the allocation in this code is to use digits!
:
module Rev1
function power_digit_sum!(pow, n, ws)
return sum(c^pow for c in digits!(ws, n))
end
function Problem30(N, m)
ws = Vector{Int}(undef, N)
ans = sum(i for i in 2:(10^N - 1) if i == power_digit_sum!(m, i, ws))
return ans
end
end
@show Rev1.Problem30(6, 5);
@btime Rev1.Problem30(6, 5);
Rev1.Problem30(6, 5) = 443839
54.770 ms (1 allocation: 128 bytes)
128.256 ms (2000004 allocations: 243.83 MiB)
↓
54.770 ms (1 allocation: 128 bytes)
The naive simplest way to solve Project Euler Problem 30 is to use a 6-fold for loop that runs from 0 to 9 for each of i_1, i_2, \ldots, i_6. Using the Base.Cartesian macros, we can simply write a 6-fold for loop in Julia.
function projecteuler30()
s = 0
x = 0
@nloops 6 i d -> 0:9 begin
y = 0
@nexprs 6 d -> y += (z = i_d^2; z^2*i_d)
s += (x == y) * x
x += 1
end
s - 1
end
@show projecteuler30();
@btime projecteuler30();
projecteuler30() = 443839
237.800 μs (0 allocations: 0 bytes)
128.256 ms (2000004 allocations: 243.83 MiB)
↓
54.770 ms (1 allocation: 128 bytes)
↓
237.800 μs (0 allocations: 0 bytes)
That’s over 500 times faster than the original code! It’s only using one CPU thread! The readability of the code has been reduced, but it is simple enough!
It can be generalized with only a 30% performance sacrifice by using a generated function as follows:
@generated function _projecteuler30(::Val{N}, ::Val{m}) where {N, m}
quote
p = @ntuple 10 i -> (i - 1) ^ $m
s = 0
x = 0
@inbounds @nloops $N i d -> 0:9 begin
y = 0
@nexprs $N d -> y += p[i_d+1]
s += (x == y) * x
x += 1
end
s - 1
end
end
projecteuler30(N, m) = _projecteuler30(Val(N), Val(m))
@show projecteuler30(6, 5);
@btime projecteuler30(6, 5);
projecteuler30(6, 5) = 443839
315.700 μs (0 allocations: 0 bytes)
I think one of the great joys of Julia is that calculations can be made hundreds of times faster with a little effort.
Jupyter notebook (I have left the trial and error part.)