Julia & Mojo Mandelbrot Benchmark

New best:

julia> @benchmark compute_mandelbrot_polyturbo!($result)
BenchmarkTools.Trial: 652 samples with 1 evaluation.
 Range (min … max):  1.494 ms …   4.784 ms  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     1.520 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):   1.533 ms ± 147.023 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%

  ▃▃        ▂▄▆██▆▃  ▁         ▂                               
  ██▇▄▁▁▆▄▁▇███████▇███▇▆▇███▆▆█████▇▅▇▅▁▇▄▅▇▆▅▁▄▅▁▁▁▁▁▁▁▁▁▁▄ █
  1.49 ms      Histogram: log(frequency) by time       1.6 ms <

 Memory estimate: 0 bytes, allocs estimate: 0.

code

using LoopVectorization, VectorizationBase, Polyester

# using Plots 

const xn = 960
const yn = 960
const xmin = -2.0
const xmax = 0.6
const ymin = -1.5
const ymax = 1.5
const MAX_ITERS = 200

@inline function mandelbrot_kernel(c)
  z = c
  for i = 1:MAX_ITERS
    z = z * z + c
    if abs2(z) > 4
      return i
    end
  end
  return MAX_ITERS
end
@inline function mandelbrot_kernel(r, i::Vec{W}) where {W}
  z = c = complex(r,i)
  inactive = VectorizationBase.Mask(VectorizationBase.zero_mask(Val(W)))
  imask = VectorizationBase.vbroadcast(Val(W), 0)
  for i = 1:MAX_ITERS
    z = muladd(z, z, c)
    imask = ifelse(inactive, imask, i)
    inactive |= abs2(z) > 4
    VectorizationBase.vall(inactive) && return imask
  end
  return imask
end
@inline function mandelbrot_kernel(r::VecUnroll, i::VecUnroll)
VectorizationBase.VecUnroll(VectorizationBase.fmap(mandelbrot_kernel, VectorizationBase.data(r),VectorizationBase.data(i)))
end
@inline function mandelbrot_kernel(r::Vec, i::VecUnroll)
VectorizationBase.VecUnroll(VectorizationBase.fmap(mandelbrot_kernel, r, VectorizationBase.data(i)))
end
@inline function mandelbrot_kernel(r::VecUnroll, i::Vec)
VectorizationBase.VecUnroll(VectorizationBase.fmap(mandelbrot_kernel, VectorizationBase.data(r),i))
end

function compute_mandelbrot!(result)

  x_range = range(xmin, xmax, xn)
  y_range = range(ymin, ymax, xn)

  Threads.@threads for j = 1:yn
    for i = 1:xn
      x = x_range[i]
      y = y_range[j]
      result[j, i] = mandelbrot_kernel(complex(x, y))
    end
  end
  return result
end
compute_mandelbrot() = compute_mandelbrot!(zeros(yn, xn))
function compute_mandelbrot_turbo!(result)
  x_range = range(xmin, xmax, xn)
  y_range = range(ymin, ymax, xn)
  
  @tturbo for j = 1:yn
    for i = 1:xn
      x = x_range[i]
      y = y_range[j]
      result[j, i] = mandelbrot_kernel(x, y)
    end
  end
  return result
end

function compute_mandelbrot_polyturbo!(result)
  x_range = range(xmin, xmax, xn)
  y_range = range(ymin, ymax, xn)
  
  @batch per=thread for i = 1:xn
    x = x_range[i]
    @turbo for j = 1:yn
      y = y_range[j]
      result[j, i] = mandelbrot_kernel(x, y)
    end
  end
  return result
end
compute_mandelbrot_turbo() = compute_mandelbrot_turbo!(zeros(yn, xn))
compute_mandelbrot_polyturbo() = compute_mandelbrot_polyturbo!(zeros(yn, xn))
result = zeros(yn, xn);

@benchmark compute_mandelbrot!($result)
@benchmark compute_mandelbrot_turbo!($result)
@benchmark compute_mandelbrot_polyturbo!($result)
4 Likes