This one runs 5x 8x faster then the original julia example posted here, and illustrates nicely how something like ComplexSIMD can be defined quite easily in Julia:
const xn = 960
const yn = 960
const xmin = -2.0
const xmax = 0.6
const ymin = -1.5
const ymax = 1.5
const MAX_ITERS = 200
struct ComplexSIMD{N,T}
re::NTuple{N,T}
im::NTuple{N,T}
end
Base.abs2(z::ComplexSIMD) = z.re .* z.re .+ z.im .* z.im
Base.:(*)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(
a.re .* b.re .- a.im .* b.im,
a.re .* b.im .+ a.im .* b.re
)
Base.:(+)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(a.re .+ b.re, a.im .+ b.im)
@inline function mandelbrot_kernel(c::ComplexSIMD{N,T}) where {N,T}
z = c
mask = ntuple(k -> true, N)
iters = ntuple(k -> T(0), N)
for i in 1:MAX_ITERS
!any(mask) && return iters
mask = abs2(z) .<= 4.0
iters = iters .+ (mask .* 1)
z = z * z + c
end
return iters
end
@inbounds function inner(result, x_range, y_range, y, ::Val{N}) where N
ci = y_range[y]
for x in 1:N:length(x_range)
# Calculate whether the (x:x+7)-th real coordinates with
# the y-th imaginary coordinate belong to the
# mandelbrot set.
c = ComplexSIMD(ntuple(k -> x_range[x+k-1], N), ntuple(k -> ci, N))
iters = mandelbrot_kernel(c)
foreach(enumerate(iters)) do (k, v)
# Write the result to the output array
result[y, x+k-1] = v
end
end
end
function mandelbrot(result, x_range, y_range, N)
@sync for y in eachindex(y_range)
# Threads.@spawn allows dynamic scheduling instead of static scheduling
# of Threads.@threads macro.
# See <https://github.com/JuliaLang/julia/issues/21017>.
Threads.@spawn @inbounds begin
inner(result, x_range, y_range, y, N)
end
end
return result
end
using BenchmarkTools
result = zeros(yn, xn)
x_range = range(xmin, xmax, xn)
y_range = range(ymin, ymax, xn)
@btime mandelbrot(result, x_range, y_range, Val(4));
Edit:
Also allocated outside the function like Mojo, now its even a bit faster!
(Adapted from mandelbrot Julia #7 program (Benchmarks Game))