Thanks to all, now Julia code runs faster than C++ compiled with default flags, but slower than C++ with optimizations enabled.
C++ with default flags:
g++ testMC36SM.cpp -o runTest                   
./runTest                     
Done. 148956 iterations.
Time required for execution: 0.652836 seconds.
C++ with optimization flags enabled:
g++ testMC36SM.cpp -O3 -march=native -o runTest
./runTest
Done. 149301 iterations.
Time required for execution: 0.112877 seconds.
Julia:
@time vj,gj=run();
done. 149301 iterations.
  0.352668 seconds (1.35 M allocations: 314.194 MiB, 8.52% gc time)
Results are quite good. But it would be interesting to see if it Julia code could be optimized further to reach closer to C++ 
Also it is interesting that iterations number slightly differs, despite same initial conditions for C++ and Julia codes. Results slightly differs, about 1%.
Julia code:
using LinearAlgebra
function run()
    println("starting..")
    vj = LinRange(0, 120, 100);
    gj = zeros(length(vj))
    parAllFast = [ .15 20 20e-9 2e-9  1e10  1e10  .5 -1;
                   .15 20 20e-9 2e-9  1e10  1e10  .5  1;]
    parAllSlow = [ .1  30 40e-9 1e-15 1e10  1e10  .5 -1;
                   .1  30 40e-9 1e-15 1e10  1e10  .5  1];
    par = [parAllFast; parAllSlow];
    pc1c2 = 0;
    pc2c1 = 0;
    pc1c2 = 0.01;
    pc2c1 = 0.001;
    ppp = zeros(1, 36);
    ppp[1] = 1;
    iter = 0;
    for i = 1:length(vj)
        gj[i], ii = MC36SM_Mindaugo_SS(vj[i], par, ppp, pc1c2, pc2c1);
        iter += ii;
    end
    println("done. ", iter)
    return vj, gj
end
function MC36SM_Mindaugo_SS(vj::Float64, par::Array{Float64,2}, p::Array{Float64}, pc1c2::Float64, pc2c1::Float64)
    PPP = zeros(36,36);
    p1 = 0;
    p2 = 0;
    p3 = 0;
    p4 = 0;
    A = par[:,1]';
    v0 = par[:,2]';
    Go = par[:,3]';
    Gc = par[:,4]';
    Ro = par[:,5]';
    Rc = par[:,6]';
    Pt = par[:,7]';
    P = par[:,8]';;
   K = Pt[1];
   g = zeros(36,4);
   v = zeros(36,4);
   k = zeros(36,4);
   R = zeros(36,4);
#    @bp
   # //states conductances
   for i1=1:2
       for i2=1:3
           for i3=1:3
               for i4=1:2
                   if (i1==1)
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Go[1];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Ro[1];
                   else
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Gc[1];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,1]=Rc[1];
                   end
                   if (i2==1)
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Go[2];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Ro[2];
                   else
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Gc[2];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,2]=Rc[2];
                   end
                   if (i3==1)
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Go[3];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Ro[3];
                   else
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Gc[3];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,3]=Rc[3];
                   end
                   if (i4==1)
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Go[4];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Ro[4];
                   else
                       g[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Gc[4];
                       R[(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1,4]=Rc[4];
                   end
               end
           end
       end
   end
   for a = 1:4 #// voltages and rectification
    @inbounds gg = 1 ./ sum( (1 ./ g), dims = 2 ); #sums rows
    @inbounds v=vj*[gg gg gg gg]./g;
       g=g .* exp.(v./R);
   end
   for i = 1:36
       for j = 1:4
           @inbounds k[i,j] = exp(A[j] * (v[i,j] * P[j] - v0[j]));
       end
   end
   # //------------Matrica P-------------------------------------------------
   for i1=1:2 for i2=1:3 for i3=1:3 for i4=1:2
    for j1=1:2 for j2=1:3 for j3=1:3 for j4=1:2
      i=(i1-1)*18+(i2-1)*6+(i3-1)*2+(i4-1)*1+1;
      j=(j1-1)*18+(j2-1)*6+(j3-1)*2+(j4-1)*1+1;
      # %-------be Pt---------------------
      # % p1
       if (i1==1)
           if (j1==1)
               p1=(1-K*k[i,1]/(1+k[i,1]));
           else
               p1=K*k[i,1]/(1+k[i,1]);
           end
       else
           if (j1==2)
               p1=(1-K/(1+k[i,1]));
           else
               p1=K/(1+k[i,1]);
           end
       end
   # %------------------------------
   #   % p2
       if (i2==1)
           if (j2==1)
               p2=(1-K*k[i,2]/(1+k[i,2]));
           end
           if (j2==2)
               p2=K*k[i,2]/(1+k[i,2]);
           end
           if (j2==3)
               p2=0;
           end
       end
       if (i2==2)
           if (j2==1)
               p2=(K/(1+k[i,2]))*(1-pc1c2);
           end
           if (j2==2)
               p2=(1-K/(1+k[i,2]))*(1-pc1c2);
           end
           if (j2==3)
               p2=pc1c2;
           end
       end
       if (i2==3)
           if (j2==1)
               p2=0;
           end
           if (j2==2)
               p2=pc2c1;
           end
           if (j2==3)
               p2=1-pc2c1;
           end
       end
     # %----------------------------------------
     # % p3
       if (i3==1)
           if (j3==1)
               p3=(1-K*k[i,3]/(1+k[i,3]));
           end
           if (j3==2)
               p3=K*k[i,3]/(1+k[i,3]);
           end
           if (j3==3)
               p3=0;
           end
       end
       if (i3==2)
           if (j3==1)
               p3=(K/(1+k[i,3]))*(1-pc1c2);
           end
           if (j3==2)
               p3=(1-K/(1+k[i,3]))*(1-pc1c2);
           end
           if (j3==3)
               p3=pc1c2;
           end
       end
       if (i3==3)
           if (j3==1)
               p3=0;
           end
           if (j3==2)
               p3=pc2c1;
           end
           if (j3==3)
               p3=1-pc2c1;
           end
       end
     # %----------------------------
     # %p4
       if (i4==1)
           if (j4==1)
               p4=(1-K*k[i,4]/(1+k[i,4]));
           else
               p4=K*k[i,4]/(1+k[i,4]);
           end
       else
           if (j4==2)
               p4=(1-K/(1+k[i,4]));
           else
               p4=K/(1+k[i,4]);
           end
       end
    PPP[i,j]=p1*p2*p3*p4;
   end
   end
   end
   end
   end
   end
   end
   end
   d_g = 100000;
   g_old = 100000;
   g_final = 0;
   i = 0;
#    @bp
   tmp = similar(p);
   while (d_g / (g_old + g_final) > .001e-5)
       i = i + 1;
    #    p=p*PPP;
       mul!(tmp, p, PPP) 
       p, tmp = tmp, p;
       ggg = 1 ./ sum( (1 ./ g), dims=2 ); #%sums rows
       g_final = dot(ggg, p); # (p*ggg)[1];
       d_g = abs(g_old - g_final);
       g_old = g_final;
    #    print(" ",i,", ", d_g / (g_old + g_final), " ")
   end
#    print(i, " ")
#    println(i)
#    @bp
   return g_final, i;
end
# end
C++ code:
#include <math.h>
#include <iostream>
#include <time.h>
#include <fstream>
double MC36SM_Mindaugo_SS(double vj, double *par, double *p, double Pt, double pc1c2, double pc2c1, int &iter)
{
	double *A = par;
	double *v0 = par + 1 * 4;
	double *Go = par + 2 * 4;
	double *Gc = par + 3 * 4;
	double *Ro = par + 4 * 4;
	double *Rc = par + 5 * 4;
	// double *Pt = par + 6 * 4; //not used
	double *P = par + 7 * 4;
	double g[36][4] = {};// initialize to 0
	double v[36][4] = {};
	double k[36][4] = {};
	double R[36][4] = {};
	//states conductances
	for (int i1 = 0; i1 < 2; i1++)
		for (int i2 = 0; i2 < 3; i2++)
			for (int i3 = 0; i3 < 3; i3++)
				for (int i4 = 0; i4 < 2; i4++)
				{
					if (i1 == 0)
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Go[0];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Ro[0];
					}
					else
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Gc[0];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][0] = Rc[0];
					}
					if (i2 == 0)
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Go[1];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Ro[1];
					}
					else
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Gc[1];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][1] = Rc[1];
					}
					if (i3 == 0)
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Go[2];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Ro[2];
					}
					else
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Gc[2];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][2] = Rc[2];
					}
					if (i4 == 0)
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Go[3];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Ro[3];
					}
					else
					{
						g[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Gc[3];
						R[i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1][3] = Rc[3];
					}
				}
	double h[36][4] = {};
	for (int i = 0; i < 4; i++) 	for (int j = 0; j < 36; j++)		h[j][i] = g[j][i];
	for (int a = 0; a < 4; a++) // voltages and rectification
	{
		double gg[36] = {};
		for (int i = 0; i < 36; i++) 		//gg = 1. / sum(1. / g, 2); %sums rows
		{
			for (int j = 0; j < 4; j++)
				gg[i] += 1 / g[i][j];
			gg[i] = 1 / gg[i];
		}
		for (int i = 0; i < 4; i++) //v=vj*[gg gg gg gg]./g;
			for (int j = 0; j < 36; j++)
				v[j][i] = vj * gg[j] / g[j][i];
		for (int i = 0; i < 4; i++) //g=g.*exp(v./R);
			for (int j = 0; j < 36; j++)
				g[j][i] = h[j][i] * exp(v[j][i] / R[j][i]);
	}
	for (int i = 0; i < 36; i++) //k=exp(repmat(A,16,1).*(v.*repmat(P,16,1)-repmat(v0,16,1)));
		for (int j = 0; j < 4; j++)
			k[i][j] = exp(A[j] * (v[i][j] * P[j] - v0[j]));
	double K = Pt;
	double PPP[36][36] = {};
	//------------Matrica P-------------------------------------------------
	double l = pc1c2 / pc2c1;
	double p1, p2, p3, p4;
	for (int i1 = 0; i1 < 2; i1++)
		for (int i2 = 0; i2 < 3; i2++)
			for (int i3 = 0; i3 < 3; i3++)
				for (int i4 = 0; i4 < 2; i4++)
					for (int j1 = 0; j1 < 2; j1++)
						for (int j2 = 0; j2 < 3; j2++)
							for (int j3 = 0; j3 < 3; j3++)
								for (int j4 = 0; j4 < 2; j4++)
								{
									int i = i1 * 18 + i2 * 6 + i3 * 2 + i4 * 1;
									int j = j1 * 18 + j2 * 6 + j3 * 2 + j4 * 1;
									////////////// p1
									if (i1 == 0)
									{
										if (j1 == 0)
											p1 = (1 - K * k[i][0] / (1 + k[i][0]));
										else
											p1 = K * k[i][0] / (1 + k[i][0]);
									}
									else
									{
										if (j1 == 1)
											p1 = (1 - K / (1 + k[i][0]));
										else
											p1 = K / (1 + k[i][0]);
									}
									///////////////p2
									if (i2 == 0)
									{
										if (j2 == 0)
											p2 = (1 - K * k[i][1] / (1 + k[i][1]));
										if (j2 == 1)
											p2 = K * k[i][1] / (1 + k[i][1]);
										if (j2 == 2)
											p2 = 0;
									}
									else if (i2 == 1)
									{
										if (j2 == 0)
											p2 = (K / (1 + k[i][1]))*(1 - pc1c2);
										if (j2 == 1)
											p2 = (1 - K / (1 + k[i][1]))*(1 - pc1c2);
										if (j2 == 2)
											p2 = pc1c2;
									}
									else if (i2 == 2)
									{
										if (j2 == 0)
											p2 = 0;
										if (j2 == 1)
											p2 = pc2c1;
										if (j2 == 2)
											p2 = 1 - pc2c1;
									}
									////////////////p3
									if (i3 == 0)
									{
										if (j3 == 0)
											p3 = (1 - K * k[i][2] / (1 + k[i][2]));
										if (j3 == 1)
											p3 = K * k[i][2] / (1 + k[i][2]);
										if (j3 == 2)
											p3 = 0;
									}
									else if (i3 == 1)
									{
										if (j3 == 0) //c1->o
											p3 = (K / (1 + k[i][2]))*(1 - pc1c2);
										if (j3 == 1)
											p3 = (1 - K / (1 + k[i][2]))*(1 - pc1c2);
										if (j3 == 2)
											p3 = pc1c2;
									}
									else if (i3 == 2)
									{
										if (j3 == 0)
											p3 = 0;
										if (j3 == 1)
											p3 = pc2c1;
										if (j3 == 2)
											p3 = 1 - pc2c1;
									}
									/////////////////p4
									if (i4 == 0)
									{
										if (j4 == 0)
											p4 = (1 - K * k[i][3] / (1 + k[i][3]));
										else
											p4 = K * k[i][3] / (1 + k[i][3]);
									}
									else
									{
										if (j4 == 1)
											p4 = (1 - K / (1 + k[i][3]));
										else
											p4 = K / (1 + k[i][3]);
									}
									PPP[i][j] = p1 * p2*p3*p4;
								}
	// -------------- SS skaiciavimas
	double d_g = 100000;
	double g_old = 100000;
	double g_final = 0;
	int i = 0;
	while (d_g / (g_old + g_final) > .001e-5)
	{
		i++;
		double q[36] = {};
		for (int j = 0; j < 36; j++) for (int i = 0; i < 36; i++)	 q[j] += p[i] * PPP[i][j]; // q=p*PPP;
		double ggg[36] = {};
		for (int i = 0; i < 36; i++) 		//ggg = 1. / sum(1. / g, 2); %sums rows
		{
			for (int j = 0; j < 4; j++)	ggg[i] += 1 / g[i][j];
			ggg[i] = 1 / ggg[i];
		}
		g_final = 0;
		for (int j = 0; j < 36; j++) //g_final = q*ggg';
			g_final += q[j] * ggg[j];
		for (int i = 0; i < 36; i++)	p[i] = q[i]; //grazina p
		d_g = fabs(g_old - g_final);
		g_old = g_final;
	}
	iter += i;
	return g_final;
}
int main()
{
    const int T = 100;
	double gj[T] = {};
	double vj[T] = {};
	double par[4*8] = { .15, .15, .1, .1,   20, 20, 30, 30,   20e-9, 20e-9, 40e-9, 40e-9,  2e-9, 2e-9, 1e-15, 1e-15,
						1e10, 1e10, 1e10, 1e10,    1e10, 1e10, 1e10, 1e10,    0.5, 0.5, 0.5, 0.5,  -1, 1, -1, 1  };
    // parAllFast = [ .15 20 20e-9 2e-9  1e-15 1e-15 .5 -1;
    //                .15 20 20e-9 2e-9  1e-15 1e-15 .5  1;]
    // parAllSlow = [ .1  30 40e-9 1e-15 1e10  1e10  .5 -1;
    //                .1  30 40e-9 1e-15 1e10  1e10  .5  1];						
    // double par[4*8] = { .15, 28, 24e-9, 24e-9/10, 800, 50, .01, -1,   .1, 36, 140e-9, 140e-9/10, 1e10, 1e10, .01, 1,
    //                     .15, 28, 24e-9, 24e-9/10, 800, 50, .01, -1,   .1, 36, 140e-9, 140e-9/10, 1e10, 1e10, .01, 1  };
    double p[36] = {};
    p[0] = 1;
	std::cout << "Starting..\n";
	for (int t = 0; t < T; t++)
	{
			vj[t] = (double) 120 * t / T;
	}
	clock_t t_start, t_end;
	t_start = clock();
	int iter = 0;
	for (int t = 0; t < T; t++)
	{
		gj[t] = MC36SM_Mindaugo_SS(vj[t],par,p,0.5,0.01,0.001,iter);
	}
	t_end = clock();
	std::cout << "Time required for execution: "
		<< (double)(t_end - t_start) / CLOCKS_PER_SEC
		<< " seconds." << "\n";
	std::ofstream outfile("outfile.txt");
	for (int t = 0; t < T; t++)
	{
		outfile << vj[t] << '\t' << gj[t] << std::endl;
	}
	std::cout << "Done. " << iter << "\n";
	outfile.close();
	return 0;
}