See the following pieces of code for quicksort, adapted from GitHub - JuliaLang/Microbenchmarks: Microbenchmarks comparing the Julia Programming language with other languages. As you can see in the benchmark output, julia runs the sorting in about 62 ms while C++ does it in about 83 ms. So julia is about 25% faster than C++. How is this possible?

julia version is 1.9.2

I have a Windows10 machine and compiled c++ with VisualStudio2019 in release mode.

```
using BenchmarkTools
# quicksort version with tail recursion: https://stackoverflow.com/questions/19094283/quicksort-and-tail-recursive-optimization
function qsort!(a,lo,hi)
i, j = lo, hi
@inbounds while i < hi
pivot = a[(lo+hi)>>>1]
while i <= j
while a[i] < pivot; i += 1; end
while a[j] > pivot; j -= 1; end
if i <= j
a[i], a[j] = a[j], a[i]
i, j = i+1, j-1
end
end
if lo < j; qsort!(a,lo,j); end
lo, j = i, hi
end
return a
end
const N::Int32 = 1000000
@benchmark qsort!(a, 1, N) setup=(a = rand(Int32.(1:N), N))
```

Julia output:

```
BenchmarkTools.Trial: 69 samples with 1 evaluation.
Range (min β¦ max): 58.021 ms β¦ 72.308 ms β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 62.288 ms β GC (median): 0.00%
Time (mean Β± Ο): 62.607 ms Β± 2.842 ms β GC (mean Β± Ο): 0.00% Β± 0.00%
β β β β ββ β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
58 ms Histogram: frequency by time 69.6 ms <
Memory estimate: 208 bytes, allocs estimate: 13.
```

C++ code:

```
#include <chrono>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
void quicksort(std::vector<int>& a, int lo, int hi) {
int i = lo;
int j = hi;
while (i < hi) {
double pivot = a[(lo + hi) / 2];
// Partition
while (i <= j) {
while (a[i] < pivot) {
i = i + 1;
}
while (a[j] > pivot) {
j = j - 1;
}
if (i <= j) {
double t = a[i];
a[i] = a[j];
a[j] = t;
i = i + 1;
j = j - 1;
}
}
// Recursion for quicksort
if (lo < j) {
quicksort(a, lo, j);
}
lo = i;
j = hi;
}
}
int main() {
int cnt = 60;
float duration = 0.0;
for (int i = 0; i < cnt; ++i) {
int N = 1000000;
std::vector<int> d(N);
// Seed with a real random value, if available
std::random_device r;
// Choose a random mean between 1 and 6
std::default_random_engine e1(r());
std::uniform_int_distribution<int> uniform_dist(1, N);
for (int i = 0; i < d.size(); i++)
{
d[i] = uniform_dist(e1);
}
auto start = std::chrono::high_resolution_clock::now();
quicksort(d, 0, N - 1);
auto end = std::chrono::high_resolution_clock::now();
double time_taken =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
duration += time_taken * 1e-06;
}
std::cout << "Average time taken by program is : " << std::fixed
<< duration/cnt << std::setprecision(3) << " ms" << std::endl;
}
```

C++ output:

```
Average time taken by program is : 83.069237 ms
```