Thanks to all, now Julia code runs faster than C++ compiled with default flags, but slower than C++ with optimizations enabled.
C++ with default flags:
g++ testMC36SM.cpp -o runTest
./runTest
Done. 148956 iterations.
Time required for execution: 0.652836 seconds.
C++ with optimization flags enabled:
g++ testMC36SM.cpp -O3 -march=native -o runTest
./runTest
Done. 149301 iterations.
Time required for execution: 0.112877 seconds.
Julia:
@time vj,gj=run();
done. 149301 iterations.
0.352668 seconds (1.35 M allocations: 314.194 MiB, 8.52% gc time)
Results are quite good. But it would be interesting to see if it Julia code could be optimized further to reach closer to C++
Also it is interesting that iterations number slightly differs, despite same initial conditions for C++ and Julia codes. Results slightly differs, about 1%.
Julia code:
using LinearAlgebra
function run()
println("starting..")
vj = LinRange(0, 120, 100);
gj = zeros(length(vj))
parAllFast = [ .15 20 20e-9 2e-9 1e10 1e10 .5 -1;
.15 20 20e-9 2e-9 1e10 1e10 .5 1;]
parAllSlow = [ .1 30 40e-9 1e-15 1e10 1e10 .5 -1;
.1 30 40e-9 1e-15 1e10 1e10 .5 1];
par = [parAllFast; parAllSlow];
pc1c2 = 0;
pc2c1 = 0;
pc1c2 = 0.01;
pc2c1 = 0.001;
ppp = zeros(1, 36);
ppp[1] = 1;
iter = 0;
for i = 1:length(vj)
gj[i], ii = MC36SM_Mindaugo_SS(vj[i], par, ppp, pc1c2, pc2c1);
iter += ii;
end
println("done. ", iter)
return vj, gj
end
function MC36SM_Mindaugo_SS(vj::Float64, par::Array{Float64,2}, p::Array{Float64}, pc1c2::Float64, pc2c1::Float64)
PPP = zeros(36,36);
p1 = 0;
p2 = 0;
p3 = 0;
p4 = 0;
A = par[:,1]';
v0 = par[:,2]';
Go = par[:,3]';
Gc = par[:,4]';
Ro = par[:,5]';
Rc = par[:,6]';
Pt = par[:,7]';
P = par[:,8]';;
K = Pt[1];
g = zeros(36,4);
v = zeros(36,4);
k = zeros(36,4);
R = zeros(36,4);
# @bp
# //states conductances
for i1=1:2
for i2=1:3
for i3=1:3
for i4=1:2
if (i1==1)
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Go[1];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Ro[1];
else
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Gc[1];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Rc[1];
end
if (i2==1)
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Go[2];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Ro[2];
else
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Gc[2];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Rc[2];
end
if (i3==1)
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Go[3];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Ro[3];
else
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Gc[3];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Rc[3];
end
if (i4==1)
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Go[4];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Ro[4];
else
g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Gc[4];
R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Rc[4];
end
end
end
end
end
for a = 1:4 #// voltages and rectification
@inbounds gg = 1 ./ sum( (1 ./ g), dims = 2 ); #sums rows
@inbounds v=vj*[gg gg gg gg]./g;
g=g .* exp.(v./R);
end
for i = 1:36
for j = 1:4
@inbounds k[i,j] = exp(A[j] * (v[i,j] * P[j] - v0[j]));
end
end
# //------------Matrica P-------------------------------------------------
for i1=1:2 for i2=1:3 for i3=1:3 for i4=1:2
for j1=1:2 for j2=1:3 for j3=1:3 for j4=1:2
i=(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1;
j=(j1-1)*18+(j2-1)*6+(j3-1)*2+(j4-1)*1+1;
# %-------be Pt---------------------
# % p1
if (i1==1)
if (j1==1)
p1=(1-K*k[i,1]/(1+k[i,1]));
else
p1=K*k[i,1]/(1+k[i,1]);
end
else
if (j1==2)
p1=(1-K/(1+k[i,1]));
else
p1=K/(1+k[i,1]);
end
end
# %------------------------------
# % p2
if (i2==1)
if (j2==1)
p2=(1-K*k[i,2]/(1+k[i,2]));
end
if (j2==2)
p2=K*k[i,2]/(1+k[i,2]);
end
if (j2==3)
p2=0;
end
end
if (i2==2)
if (j2==1)
p2=(K/(1+k[i,2]))*(1-pc1c2);
end
if (j2==2)
p2=(1-K/(1+k[i,2]))*(1-pc1c2);
end
if (j2==3)
p2=pc1c2;
end
end
if (i2==3)
if (j2==1)
p2=0;
end
if (j2==2)
p2=pc2c1;
end
if (j2==3)
p2=1-pc2c1;
end
end
# %----------------------------------------
# % p3
if (i3==1)
if (j3==1)
p3=(1-K*k[i,3]/(1+k[i,3]));
end
if (j3==2)
p3=K*k[i,3]/(1+k[i,3]);
end
if (j3==3)
p3=0;
end
end
if (i3==2)
if (j3==1)
p3=(K/(1+k[i,3]))*(1-pc1c2);
end
if (j3==2)
p3=(1-K/(1+k[i,3]))*(1-pc1c2);
end
if (j3==3)
p3=pc1c2;
end
end
if (i3==3)
if (j3==1)
p3=0;
end
if (j3==2)
p3=pc2c1;
end
if (j3==3)
p3=1-pc2c1;
end
end
# %----------------------------
# %p4
if (i4==1)
if (j4==1)
p4=(1-K*k[i,4]/(1+k[i,4]));
else
p4=K*k[i,4]/(1+k[i,4]);
end
else
if (j4==2)
p4=(1-K/(1+k[i,4]));
else
p4=K/(1+k[i,4]);
end
end
PPP[i,j]=p1*p2*p3*p4;
end
end
end
end
end
end
end
end
d_g = 100000;
g_old = 100000;
g_final = 0;
i = 0;
# @bp
tmp = similar(p);
while (d_g / (g_old + g_final) > .001e-5)
i = i + 1;
# p=p*PPP;
mul!(tmp, p, PPP)
p, tmp = tmp, p;
ggg = 1 ./ sum( (1 ./ g), dims=2 ); #%sums rows
g_final = dot(ggg, p); # (p*ggg)[1];
d_g = abs(g_old - g_final);
g_old = g_final;
# print(" ",i,", ", d_g / (g_old + g_final), " ")
end
# print(i, " ")
# println(i)
# @bp
return g_final, i;
end
# end
C++ code:
#include <math.h>
#include <iostream>
#include <time.h>
#include <fstream>
double MC36SM_Mindaugo_SS(double vj, double *par, double *p, double Pt, double pc1c2, double pc2c1, int &iter)
{
double *A = par;
double *v0 = par + 1 * 4;
double *Go = par + 2 * 4;
double *Gc = par + 3 * 4;
double *Ro = par + 4 * 4;
double *Rc = par + 5 * 4;
// double *Pt = par + 6 * 4; //not used
double *P = par + 7 * 4;
double g[36][4] = {};// initialize to 0
double v[36][4] = {};
double k[36][4] = {};
double R[36][4] = {};
//states conductances
for (int i1 = 0; i1 < 2; i1++)
for (int i2 = 0; i2 < 3; i2++)
for (int i3 = 0; i3 < 3; i3++)
for (int i4 = 0; i4 < 2; i4++)
{
if (i1 == 0)
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Go[0];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Ro[0];
}
else
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Gc[0];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Rc[0];
}
if (i2 == 0)
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Go[1];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Ro[1];
}
else
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Gc[1];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Rc[1];
}
if (i3 == 0)
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Go[2];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Ro[2];
}
else
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Gc[2];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Rc[2];
}
if (i4 == 0)
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Go[3];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Ro[3];
}
else
{
g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Gc[3];
R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Rc[3];
}
}
double h[36][4] = {};
for (int i = 0; i < 4; i++) for (int j = 0; j < 36; j++) h[j][i] = g[j][i];
for (int a = 0; a < 4; a++) // voltages and rectification
{
double gg[36] = {};
for (int i = 0; i < 36; i++) //gg = 1. / sum(1. / g, 2); %sums rows
{
for (int j = 0; j < 4; j++)
gg[i] += 1 / g[i][j];
gg[i] = 1 / gg[i];
}
for (int i = 0; i < 4; i++) //v=vj*[gg gg gg gg]./g;
for (int j = 0; j < 36; j++)
v[j][i] = vj * gg[j] / g[j][i];
for (int i = 0; i < 4; i++) //g=g.*exp(v./R);
for (int j = 0; j < 36; j++)
g[j][i] = h[j][i] * exp(v[j][i] / R[j][i]);
}
for (int i = 0; i < 36; i++) //k=exp(repmat(A,16,1).*(v.*repmat(P,16,1)-repmat(v0,16,1)));
for (int j = 0; j < 4; j++)
k[i][j] = exp(A[j] * (v[i][j] * P[j] - v0[j]));
double K = Pt;
double PPP[36][36] = {};
//------------Matrica P-------------------------------------------------
double l = pc1c2 / pc2c1;
double p1, p2, p3, p4;
for (int i1 = 0; i1 < 2; i1++)
for (int i2 = 0; i2 < 3; i2++)
for (int i3 = 0; i3 < 3; i3++)
for (int i4 = 0; i4 < 2; i4++)
for (int j1 = 0; j1 < 2; j1++)
for (int j2 = 0; j2 < 3; j2++)
for (int j3 = 0; j3 < 3; j3++)
for (int j4 = 0; j4 < 2; j4++)
{
int i = i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1;
int j = j1 * 18 + j2 * 6 + j3 * 2 + j4 * 1;
////////////// p1
if (i1 == 0)
{
if (j1 == 0)
p1 = (1 - K * k[i][0] / (1 + k[i][0]));
else
p1 = K * k[i][0] / (1 + k[i][0]);
}
else
{
if (j1 == 1)
p1 = (1 - K / (1 + k[i][0]));
else
p1 = K / (1 + k[i][0]);
}
///////////////p2
if (i2 == 0)
{
if (j2 == 0)
p2 = (1 - K * k[i][1] / (1 + k[i][1]));
if (j2 == 1)
p2 = K * k[i][1] / (1 + k[i][1]);
if (j2 == 2)
p2 = 0;
}
else if (i2 == 1)
{
if (j2 == 0)
p2 = (K / (1 + k[i][1]))*(1 - pc1c2);
if (j2 == 1)
p2 = (1 - K / (1 + k[i][1]))*(1 - pc1c2);
if (j2 == 2)
p2 = pc1c2;
}
else if (i2 == 2)
{
if (j2 == 0)
p2 = 0;
if (j2 == 1)
p2 = pc2c1;
if (j2 == 2)
p2 = 1 - pc2c1;
}
////////////////p3
if (i3 == 0)
{
if (j3 == 0)
p3 = (1 - K * k[i][2] / (1 + k[i][2]));
if (j3 == 1)
p3 = K * k[i][2] / (1 + k[i][2]);
if (j3 == 2)
p3 = 0;
}
else if (i3 == 1)
{
if (j3 == 0) //c1->o
p3 = (K / (1 + k[i][2]))*(1 - pc1c2);
if (j3 == 1)
p3 = (1 - K / (1 + k[i][2]))*(1 - pc1c2);
if (j3 == 2)
p3 = pc1c2;
}
else if (i3 == 2)
{
if (j3 == 0)
p3 = 0;
if (j3 == 1)
p3 = pc2c1;
if (j3 == 2)
p3 = 1 - pc2c1;
}
/////////////////p4
if (i4 == 0)
{
if (j4 == 0)
p4 = (1 - K * k[i][3] / (1 + k[i][3]));
else
p4 = K * k[i][3] / (1 + k[i][3]);
}
else
{
if (j4 == 1)
p4 = (1 - K / (1 + k[i][3]));
else
p4 = K / (1 + k[i][3]);
}
PPP[i][j] = p1 * p2*p3*p4;
}
// -------------- SS skaiciavimas
double d_g = 100000;
double g_old = 100000;
double g_final = 0;
int i = 0;
while (d_g / (g_old + g_final) > .001e-5)
{
i++;
double q[36] = {};
for (int j = 0; j < 36; j++) for (int i = 0; i < 36; i++) q[j] += p[i] * PPP[i][j]; // q=p*PPP;
double ggg[36] = {};
for (int i = 0; i < 36; i++) //ggg = 1. / sum(1. / g, 2); %sums rows
{
for (int j = 0; j < 4; j++) ggg[i] += 1 / g[i][j];
ggg[i] = 1 / ggg[i];
}
g_final = 0;
for (int j = 0; j < 36; j++) //g_final = q*ggg';
g_final += q[j] * ggg[j];
for (int i = 0; i < 36; i++) p[i] = q[i]; //grazina p
d_g = fabs(g_old - g_final);
g_old = g_final;
}
iter += i;
return g_final;
}
int main()
{
const int T = 100;
double gj[T] = {};
double vj[T] = {};
double par[4*8] = { .15, .15, .1, .1, 20, 20, 30, 30, 20e-9, 20e-9, 40e-9, 40e-9, 2e-9, 2e-9, 1e-15, 1e-15,
1e10, 1e10, 1e10, 1e10, 1e10, 1e10, 1e10, 1e10, 0.5, 0.5, 0.5, 0.5, -1, 1, -1, 1 };
// parAllFast = [ .15 20 20e-9 2e-9 1e-15 1e-15 .5 -1;
// .15 20 20e-9 2e-9 1e-15 1e-15 .5 1;]
// parAllSlow = [ .1 30 40e-9 1e-15 1e10 1e10 .5 -1;
// .1 30 40e-9 1e-15 1e10 1e10 .5 1];
// double par[4*8] = { .15, 28, 24e-9, 24e-9/10, 800, 50, .01, -1, .1, 36, 140e-9, 140e-9/10, 1e10, 1e10, .01, 1,
// .15, 28, 24e-9, 24e-9/10, 800, 50, .01, -1, .1, 36, 140e-9, 140e-9/10, 1e10, 1e10, .01, 1 };
double p[36] = {};
p[0] = 1;
std::cout << "Starting..\n";
for (int t = 0; t < T; t++)
{
vj[t] = (double) 120 * t / T;
}
clock_t t_start, t_end;
t_start = clock();
int iter = 0;
for (int t = 0; t < T; t++)
{
gj[t] = MC36SM_Mindaugo_SS(vj[t],par,p,0.5,0.01,0.001,iter);
}
t_end = clock();
std::cout << "Time required for execution: "
<< (double)(t_end - t_start) / CLOCKS_PER_SEC
<< " seconds." << "\n";
std::ofstream outfile("outfile.txt");
for (int t = 0; t < T; t++)
{
outfile << vj[t] << '\t' << gj[t] << std::endl;
}
std::cout << "Done. " << iter << "\n";
outfile.close();
return 0;
}