I noticed that if I use Real instead of Float calculation is ~400 fold slower. Also there are large increase in allocations. But only difference in codes is that I use Real instead of Float (by default) in few Arrays.
33.443 ms (2354 allocations: 2.15 MiB)
13.397 s (616406763 allocations: 9.19 GiB)
also
julia> @btime z = zeros(Real,36,36);
7.131 ÎĽs (1 allocation: 10.25 KiB)
julia> @btime z = zeros(36,36);
1.613 ÎĽs (1 allocation: 10.25 KiB)
MWE of our’s model demonstrating Real vs Float benchmark:
using LinearAlgebra, BenchmarkTools
function MC36SM_Mindaugo_SS_Real(vj::Float64, par_tuple, p::Array{Real}, pc1c2::Float64, pc2c1::Float64)
PPP = zeros(Real, 36, 36);
p1 = 0;
p2 = 0;
p3 = 0;
p4 = 0;
pr = collect(par_tuple);
par = reshape(pr, 4, 8);
A = par[:,1];
v0 = par[:,2];
Go = par[:,3];
Gc = par[:,4];
Ro = par[:,5];
Rc = par[:,6];
Pt = par[:,7];
P = par[:,8];
K = Pt[1];
g = zeros(Real, 36, 4);
gg = zeros(Real, 36); #buffer for the sums
v = zeros(Real, 36, 4);
k = zeros(Real, 36, 4);
R = zeros(Real, 36, 4);
# //states conductances
for i1 = 1:2
for i2 = 1:3
for i3 = 1:3
for i4 = 1:2
if (i1 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Go[1];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Ro[1];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Gc[1];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Rc[1];
end
if (i2 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Go[2];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Ro[2];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Gc[2];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Rc[2];
end
if (i3 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Go[3];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Ro[3];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Gc[3];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Rc[3];
end
if (i4 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Go[4];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Ro[4];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Gc[4];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Rc[4];
end
end
end
end
end
for a = 1:4 #// voltages and rectification
# gg = 1 ./ sum( (1 ./ g), dims = 2 ); #sums rows
fill!(gg, 0.0)
row_sum!(gg, g) #happens twice so took it out into function
# v=vj*[gg gg gg gg]./g;
for j = 1:4, i = 1:36 #loop so no allocations
@inbounds v[i, j] = vj * gg[i] / g[i, j];
end
g = g .* exp.(v ./ R);
end
for i = 1:36
for j = 1:4
@inbounds k[i,j] = exp(A[j] * (v[i,j] * P[j] - v0[j]));
end
end
# //------------Matrica P-------------------------------------------------
for i1 = 1:2 for i2 = 1:3 for i3 = 1:3 for i4 = 1:2
for j1 = 1:2 for j2 = 1:3 for j3 = 1:3 for j4 = 1:2
i = (i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1;
j = (j1 - 1) * 18 + (j2 - 1) * 6 + (j3 - 1) * 2 + (j4 - 1) * 1 + 1;
# %-------be Pt---------------------
# % p1
if (i1 == 1)
if (j1 == 1)
p1 = (1 - K * k[i,1] / (1 + k[i,1]));
else
p1 = K * k[i,1] / (1 + k[i,1]);
end
else
if (j1 == 2)
p1 = (1 - K / (1 + k[i,1]));
else
p1 = K / (1 + k[i,1]);
end
end
# %------------------------------
# % p2
if (i2 == 1)
if (j2 == 1)
p2 = (1 - K * k[i,2] / (1 + k[i,2]));
end
if (j2 == 2)
p2 = K * k[i,2] / (1 + k[i,2]);
end
if (j2 == 3)
p2 = 0;
end
end
if (i2 == 2)
if (j2 == 1)
p2 = (K / (1 + k[i,2])) * (1 - pc1c2);
end
if (j2 == 2)
p2 = (1 - K / (1 + k[i,2])) * (1 - pc1c2);
end
if (j2 == 3)
p2 = pc1c2;
end
end
if (i2 == 3)
if (j2 == 1)
p2 = 0;
end
if (j2 == 2)
p2 = pc2c1;
end
if (j2 == 3)
p2 = 1 - pc2c1;
end
end
# %----------------------------------------
# % p3
if (i3 == 1)
if (j3 == 1)
p3 = (1 - K * k[i,3] / (1 + k[i,3]));
end
if (j3 == 2)
p3 = K * k[i,3] / (1 + k[i,3]);
end
if (j3 == 3)
p3 = 0;
end
end
if (i3 == 2)
if (j3 == 1)
p3 = (K / (1 + k[i,3])) * (1 - pc1c2);
end
if (j3 == 2)
p3 = (1 - K / (1 + k[i,3])) * (1 - pc1c2);
end
if (j3 == 3)
p3 = pc1c2;
end
end
if (i3 == 3)
if (j3 == 1)
p3 = 0;
end
if (j3 == 2)
p3 = pc2c1;
end
if (j3 == 3)
p3 = 1 - pc2c1;
end
end
# %----------------------------
# %p4
if (i4 == 1)
if (j4 == 1)
p4 = (1 - K * k[i,4] / (1 + k[i,4]));
else
p4 = K * k[i,4] / (1 + k[i,4]);
end
else
if (j4 == 2)
p4 = (1 - K / (1 + k[i,4]));
else
p4 = K / (1 + k[i,4]);
end
end
PPP[j,i] = p1 * p2 * p3 * p4;
end
end
end
end
end
end
end
end
d_g = 100000;
g_old = 100000;
g_final = 0;
i = 0;
tmp = similar(p);
# ggg = zeros(MVector{36,Float64});
# ggg = 1 ./ sum( (1 ./ g), dims=2 ); #%sums rows
fill!(gg, 0.0);
row_sum!(gg, g)
while (d_g / (g_old + g_final) > .001e-5)
i = i + 1;
# p=p*PPP;
mul!(tmp, PPP, p)
p, tmp = tmp, p;
g_final = dot(gg, p); # (p*ggg)[1];
d_g = abs(g_old - g_final);
g_old = g_final;
# print(" ",i,", ", d_g / (g_old + g_final), " ")
end
# print(i, " ");
# println(i);
return g_final, i;
end
function MC36SM_Mindaugo_SS(vj::Float64, par_tuple, p::Array{Float64}, pc1c2::Float64, pc2c1::Float64)
PPP = zeros(36, 36);
p1 = 0;
p2 = 0;
p3 = 0;
p4 = 0;
pr = collect(par_tuple);
par = reshape(pr, 4, 8); #[]... - Tuple to array, and reshape into dimensions
A = par[:,1];
v0 = par[:,2];
Go = par[:,3];
Gc = par[:,4];
Ro = par[:,5];
Rc = par[:,6];
Pt = par[:,7];
P = par[:,8];
K = Pt[1];
g = zeros(36, 4);
gg = zeros(36); #buffer for the sums
v = zeros(36, 4);
k = zeros(36, 4);
R = zeros(36, 4);
# //states conductances
for i1 = 1:2
for i2 = 1:3
for i3 = 1:3
for i4 = 1:2
if (i1 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Go[1];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Ro[1];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Gc[1];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,1] = Rc[1];
end
if (i2 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Go[2];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Ro[2];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Gc[2];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,2] = Rc[2];
end
if (i3 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Go[3];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Ro[3];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Gc[3];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,3] = Rc[3];
end
if (i4 == 1)
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Go[4];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Ro[4];
else
g[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Gc[4];
R[(i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1,4] = Rc[4];
end
end
end
end
end
for a = 1:4 #// voltages and rectification
# gg = 1 ./ sum( (1 ./ g), dims = 2 ); #sums rows
fill!(gg, 0.0)
row_sum!(gg, g) #happens twice so took it out into function
# v=vj*[gg gg gg gg]./g;
for j = 1:4, i = 1:36 #loop so no allocations
@inbounds v[i, j] = vj * gg[i] / g[i, j];
end
g = g .* exp.(v ./ R);
end
for i = 1:36
for j = 1:4
@inbounds k[i,j] = exp(A[j] * (v[i,j] * P[j] - v0[j]));
end
end
# //------------Matrica P-------------------------------------------------
for i1 = 1:2 for i2 = 1:3 for i3 = 1:3 for i4 = 1:2
for j1 = 1:2 for j2 = 1:3 for j3 = 1:3 for j4 = 1:2
i = (i1 - 1) * 18 + (i2 - 1) * 6 + (i3 - 1) * 2 + (i4 - 1) * 1 + 1;
j = (j1 - 1) * 18 + (j2 - 1) * 6 + (j3 - 1) * 2 + (j4 - 1) * 1 + 1;
# %-------be Pt---------------------
# % p1
if (i1 == 1)
if (j1 == 1)
p1 = (1 - K * k[i,1] / (1 + k[i,1]));
else
p1 = K * k[i,1] / (1 + k[i,1]);
end
else
if (j1 == 2)
p1 = (1 - K / (1 + k[i,1]));
else
p1 = K / (1 + k[i,1]);
end
end
# %------------------------------
# % p2
if (i2 == 1)
if (j2 == 1)
p2 = (1 - K * k[i,2] / (1 + k[i,2]));
end
if (j2 == 2)
p2 = K * k[i,2] / (1 + k[i,2]);
end
if (j2 == 3)
p2 = 0;
end
end
if (i2 == 2)
if (j2 == 1)
p2 = (K / (1 + k[i,2])) * (1 - pc1c2);
end
if (j2 == 2)
p2 = (1 - K / (1 + k[i,2])) * (1 - pc1c2);
end
if (j2 == 3)
p2 = pc1c2;
end
end
if (i2 == 3)
if (j2 == 1)
p2 = 0;
end
if (j2 == 2)
p2 = pc2c1;
end
if (j2 == 3)
p2 = 1 - pc2c1;
end
end
# %----------------------------------------
# % p3
if (i3 == 1)
if (j3 == 1)
p3 = (1 - K * k[i,3] / (1 + k[i,3]));
end
if (j3 == 2)
p3 = K * k[i,3] / (1 + k[i,3]);
end
if (j3 == 3)
p3 = 0;
end
end
if (i3 == 2)
if (j3 == 1)
p3 = (K / (1 + k[i,3])) * (1 - pc1c2);
end
if (j3 == 2)
p3 = (1 - K / (1 + k[i,3])) * (1 - pc1c2);
end
if (j3 == 3)
p3 = pc1c2;
end
end
if (i3 == 3)
if (j3 == 1)
p3 = 0;
end
if (j3 == 2)
p3 = pc2c1;
end
if (j3 == 3)
p3 = 1 - pc2c1;
end
end
# %----------------------------
# %p4
if (i4 == 1)
if (j4 == 1)
p4 = (1 - K * k[i,4] / (1 + k[i,4]));
else
p4 = K * k[i,4] / (1 + k[i,4]);
end
else
if (j4 == 2)
p4 = (1 - K / (1 + k[i,4]));
else
p4 = K / (1 + k[i,4]);
end
end
PPP[j,i] = p1 * p2 * p3 * p4;
end
end
end
end
end
end
end
end
d_g = 100000;
g_old = 100000;
g_final = 0;
i = 0;
tmp = similar(p);
# ggg = zeros(MVector{36,Float64});
# ggg = 1 ./ sum( (1 ./ g), dims=2 ); #%sums rows
fill!(gg, 0.0);
row_sum!(gg, g)
while (d_g / (g_old + g_final) > .001e-5)
i = i + 1;
# p=p*PPP;
mul!(tmp, PPP, p)
p, tmp = tmp, p;
g_final = dot(gg, p); # (p*ggg)[1];
d_g = abs(g_old - g_final);
g_old = g_final;
end
return g_final, i;
end
@inline function row_sum!(buffer, g) #fill the buffer in a loop -> no allocations
for j = 1:size(g, 2), i = 1:size(g, 1)
@inbounds buffer[i] += 1 / g[i, j];
end
buffer .= 1 ./ buffer;
end
# end
function getGj_Real(N, vj, par)
gj = zeros(Real, N);
pc1c2 = 0;
pc2c1 = 0;
pc1c2 = 0.01;
pc2c1 = 0.001;
ppp = zeros(Real, 36);
ppp[1] = 1;
iter = 0;
for i = 1:N
gj[i], .. = MC36SM_Mindaugo_SS_Real(vj[i], par, ppp, pc1c2, pc2c1);
end
return gj
end
function getGj_Float(N, vj, par)
gj = zeros(N);
pc1c2 = 0;
pc2c1 = 0;
pc1c2 = 0.01;
pc2c1 = 0.001;
ppp = zeros(36);
ppp[1] = 1;
iter = 0;
for i = 1:N
gj[i], .. = MC36SM_Mindaugo_SS(vj[i], par, ppp, pc1c2, pc2c1);
end
return gj
end
parAllFast = [ .15 20 20e-9 2e-9 1e10 1e10 .5 -1
.15 20 20e-9 2e-9 1e10 1e10 .5 1 ];
parAllSlow = [ .1 30 40e-9 1e-15 1e10 1e10 .5 -1
.1 30 40e-9 1e-15 1e10 1e10 .5 1 ];
par = [parAllFast; parAllSlow];
N = 100;
vj_e = collect(LinRange(0, 120, N));
@btime getGj_Float(N, vj_e, par);
@btime getGj_Real(N, vj_e, par);
Maybe someone could comment about such problem?