Hi there,
I am in need to optimize as much as possible the function I will present here. I moved it from C to Julia for convenience since the operations around it become much easier but I am getting significantly worst performance in the performance critical function:
using SpecialFunctions
utilities = rand(500);
cutoffs = rand(500);
# JULIA BENCHMARK
cdf_eval(x, mean, stddev) = 0.5 * (1 + erf((x - mean) / (stddev * sqrt(2.0))))
function assignment_probabilities_sorted_gauss(utilities, cutoffs, mean, stddev)
last_prob = 1.0;
expectedU = 0.0;
sch_left = 500;
while sch_left > 0
max_util, ix = findmax(@view utilities[1:sch_left]);
if max_util > 0
this_prob = cdf_eval(cutoffs[ix], mean, stddev)
p = last_prob - this_prob;
expectedU += p * max_util;
last_prob = this_prob;
sch_left = ix - 1;
else
break
end
end
return expectedU
end
@benchmark assignment_probabilities_sorted_gauss!($(utilities), $(cutoffs), $0.5, $1.0)
# C BENCHMARK
C_code = """
#include <stddef.h>
#include <math.h>
double normcdf(double x, double mean, double stddev) {
double z = (x - mean) / stddev;
return 0.5 * (1.0 + erf(z / sqrt(2.0)));
}
double assignment_probabilities_sorted_gauss(double* utilities, double* cutoffs, size_t size) {
double last_prob = 1.0; // Start with full probability
double expectedU = 0.0; // Initialize expected utility
int ix;
double max_util, this_prob, p;
double mean = 0.5; // Extract mean from F
double stddev = 1.0; // Extract standard deviation from F
// While there are still utilities to process
while (size > 0) {
max_util = -INFINITY; // Reset max_util before each iteration
// Find the maximum utility and its index (zero-based indexing)
for (int i = 0; i < size; i++) {
if (utilities[i] > max_util) {
max_util = utilities[i];
ix = i; // Store the index of the maximum utility
}
}
// If the maximum utility is greater than 0, process it
if (max_util > 0) {
// Compute the probability using the normal CDF with the given mean and stddev
this_prob = normcdf(cutoffs[ix], mean, stddev); // Compute the normal CDF at the cutoff
// Calculate the probability for this utility
p = last_prob - this_prob;
// Update expected utility
expectedU += p * max_util;
// Remove the processed utility and all lower utilities (size is reduced)
size = ix; // Only keep utilities up to the maximum index (zero-indexed)
// Update last_prob for the next iteration
last_prob = this_prob;
} else {
// If the maximum utility is less or equal to zero, break the loop
break;
}
}
return expectedU;
}
"""
const Clib = tempname()
open(`gcc -fPIC -O3 -msse3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
c_assignment_probabilities_sorted_gauss(utilities::Array{Float64}, cutoffs::Array{Float64}) = ccall(("assignment_probabilities_sorted_gauss", Clib3), Float64, (Ptr{Float64}, Ptr{Float64}, Csize_t),
utilities, cutoffs, length(utilities))
@benchmark c_assignment_probabilities_sorted_gauss($(utilities), $(cutoffs))
The benchmark output for Julia:
julia> @benchmark assignment_probabilities_sorted_gauss!($(utilities), $(cutoffs), $(p), $0.5, $1.0)
BenchmarkTools.Trial: 10000 samples with 6 evaluations.
Range (min … max): 4.993 μs … 21.042 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 5.028 μs ┊ GC (median): 0.00%
Time (mean ± σ): 5.104 μs ± 585.617 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
█▆ ▁▄▁ ▁ ▁
███████▇██▇▇█▆▅▅▄▃▅▆▆▅▄▅▄▄▁▄▁▃▄▃▄▃▄▃▃▅▁▁▁▅▄▃▄▄▁▄▁▃▁▄▃▄▁▄▁▄▅ █
4.99 μs Histogram: log(frequency) by time 7.4 μs <
Memory estimate: 0 bytes, allocs estimate: 0.
The benchmark output for C:
julia> @benchmark c_assignment_probabilities_sorted_gauss($(utilities), $(cutoffs))
BenchmarkTools.Trial: 10000 samples with 10 evaluations.
Range (min … max): 1.142 μs … 2.954 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 1.150 μs ┊ GC (median): 0.00%
Time (mean ± σ): 1.157 μs ± 67.292 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
█
▂▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▄▇▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▂ ▂
1.14 μs Histogram: frequency by time 1.17 μs <
Memory estimate: 0 bytes, allocs estimate: 0.
I would really appreciate if someone helped out in making these two comparable.
Thanks in advance.