w=rand(300,300);x=rand(300,300);
r=vec(rand(300,1));b=rand(300,1);
j=12;N=300;
function orig()
sAll=0.0;k=1;
while k<=1000000
sAll=sAll+sum(w[:,j].*(r + b[j]*x[:,j]));
k=k+1;
end
return sAll;
end

And I write a helper function to speed it up:

function w_r_b_x_n(w,r,b,x,j,N)
sAll=0.0;k=1;
while k<=1000000
s=0.0;i=1;bj=b[j];
while i<=N
@inbounds s+=(w[i,j]*(r[i] + bj*x[i,j]));
i=i+1;
end
k=k+1;sAll=sAll+s;
end
return sAll;
end

I wrote a bunch of similar magical helper functions. Can the compiler automatically optimize this code?

w=rand(300,300);x=rand(300,300);
r=vec(rand(300,1));b=rand(300,1);
j=12;N=300;
function orig()
sAll=0.0;k=1;
while k<=1000000
sAll=sAll+sum(w[:,j].*(r + b[j]*x[:,j]));
k=k+1;
end
return sAll;
end
function w_r_b_x_n(w,r,b,x,j,N)
sAll=0.0;k=1;
while k<=1000000
s=0.0;i=1;bj=b[j];
while i<=N
@inbounds s+=(w[i,j]*(r[i] + bj*x[i,j]));
i=i+1;
end
k=k+1;sAll=sAll+s;
end
return sAll;
end
@time orig()
@time w_r_b_x_n(w,r,b,x,j,N)

Please add three backtics like this ``` before and after your code.

You needn’t write all your code in loops to achieve performance. You can use the sum intrinsic which is very fast and more accurate, just take care of unnecessary allocations. Write the sum like this:

sum( @. @views w[:,j] * (r + b[j]*d[:,j]) )

and it will be almost as fast as the loop.

Curerntly, @views still allocates, but this may change in the future and you will not need to write loops for performance.