Parallel quicksort openmp

I have made a few minor changes in your code here:


const SEQ_THRESH = 1 << 9

@inline function partition!(A, pivot, left,right)
    @inbounds while left <= right
      while A[left] < pivot
        left += 1
      end
      while A[right] > pivot
        right -= 1
      end
      if left <= right
        A[left], A[right] = A[right], A[left]
        left += 1
        right -= 1
      end
    end

  return (left,right)
end

function quicksort!(A, i=1, j=length(A))
  if j > i
    left, right = partition!(A, A[(j+i) >>> 1], i, j)
    quicksort!(A,i,right)
    quicksort!(A,left,j)
  end
  return
end

function parallel_quicksort!(A,i=1,j=length(A))
  if j-i <= SEQ_THRESH
    quicksort!(A, i, j)
    return
  end
  left, right = partition!(A, A[(j+i) >>> 1], i, j)
  t = Threads.@spawn parallel_quicksort!(A,i,right)
  parallel_quicksort!(A,left,j)
  wait(t)

  return
end

function test(n)
  b = rand(n)

  for i = 1:5
    print("Parallel  : ")
    a1 = copy(b); @time parallel_quicksort!(a1);
    print("Sequential: ")
    a2 = copy(b); @time quicksort!(a2);
    println("")
    @assert a1 == sort(a1)
    @assert a1 == a2
  end

end

test(2^20)

And I do get about 2.33x speedup on a 2-core 2013 MacBookPro with 16GB RAM.

~/p/quicksort.jl> julia -tauto --project=. test_qs.jl
Parallel  :   0.057559 seconds (14.31 k allocations: 1.012 MiB, 7.69% compilation time)
Sequential:   0.114027 seconds

Parallel  :   0.047487 seconds (13.52 k allocations: 992.812 KiB)
Sequential:   0.106023 seconds

Parallel  :   0.047373 seconds (13.52 k allocations: 992.906 KiB)
Sequential:   0.106490 seconds

Parallel  :   0.046449 seconds (13.48 k allocations: 991.656 KiB)
Sequential:   0.109551 seconds

Parallel  :   0.045878 seconds (13.47 k allocations: 991.375 KiB)
Sequential:   0.107051 seconds

Perhaps if you were to undo my changes one by one, it would be very interesting to find out which aspect of your initial code caused the slowdown, (probably some type instability). Somebody wiser to help here? @Elrod @stevengj perhaps.

CHANGES:

  1. parallel and sequential quicksort() return nothing instead of A
  2. abstracted out partition!() and made it inline
  3. asserted external while block as inbounds
  4. timing from a test function, not the REPL
5 Likes