Julia & Mojo Mandelbrot Benchmark

This one runs 5x 8x faster then the original julia example posted here, and illustrates nicely how something like ComplexSIMD can be defined quite easily in Julia:

const xn = 960
const yn = 960
const xmin = -2.0
const xmax = 0.6
const ymin = -1.5
const ymax = 1.5
const MAX_ITERS = 200

struct ComplexSIMD{N,T}
    re::NTuple{N,T}
    im::NTuple{N,T}
end

Base.abs2(z::ComplexSIMD) = z.re .* z.re .+ z.im .* z.im
Base.:(*)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(
    a.re .* b.re .- a.im .* b.im,
    a.re .* b.im .+ a.im .* b.re
)
Base.:(+)(a::ComplexSIMD, b::ComplexSIMD) = ComplexSIMD(a.re .+ b.re, a.im .+ b.im)

@inline function mandelbrot_kernel(c::ComplexSIMD{N,T}) where {N,T}
    z = c
    mask = ntuple(k -> true, N)
    iters = ntuple(k -> T(0), N)
    for i in 1:MAX_ITERS
        !any(mask) && return iters
        mask = abs2(z) .<= 4.0
        iters = iters .+ (mask .* 1)
        z = z * z + c
    end
    return iters
end

@inbounds function inner(result, x_range, y_range, y, ::Val{N}) where N
    ci = y_range[y]
    for x in 1:N:length(x_range)
        # Calculate whether the (x:x+7)-th real coordinates with
        # the y-th imaginary coordinate belong to the
        # mandelbrot set.
        c = ComplexSIMD(ntuple(k -> x_range[x+k-1], N), ntuple(k -> ci, N))
        iters = mandelbrot_kernel(c)
        foreach(enumerate(iters)) do (k, v)
            # Write the result to the output array
            result[y, x+k-1] = v
        end
    end
end

function mandelbrot(result, x_range, y_range, N)
    @sync for y in eachindex(y_range)
        # Threads.@spawn allows dynamic scheduling instead of static scheduling
        # of Threads.@threads macro.
        # See <https://github.com/JuliaLang/julia/issues/21017>.
        Threads.@spawn @inbounds begin
            inner(result, x_range, y_range, y, N)
        end
    end
    return result
end

using BenchmarkTools
result = zeros(yn, xn)
x_range = range(xmin, xmax, xn)
y_range = range(ymin, ymax, xn)
@btime mandelbrot(result, x_range, y_range, Val(4));

Edit:
Also allocated outside the function like Mojo, now its even a bit faster!

(Adapted from mandelbrot Julia #7 program (Benchmarks Game))

56 Likes