# Does eigen( ) get stuck in processing of the results?

I wrote the below code (originally in MATLAB, ported to Julia). Unfortunately, the code runs at pretty much the same speed as the MATLAB original.

``````using LinearAlgebra
using StaticArrays
using DelimitedFiles
using BenchmarkTools
using Profile
using Plots

# --- Bond transform matrix
function BondMatrix2D(d)
b = [cosd(d)     cosd(90 - d);
cosd(90 + d) cosd(d)     ];
M1 = b.^2;
M2 = 2 * b[:, 2] .* b[:, 1];
M4 = b[1,1]*b[2,2] + b[1,2]*b[2,1]
M = [M1    M2;
-M2'/2 M4]
return M
end

# --- Decompose the C matrix ; as this is required repeatedly in AFmatrix and ds3ds1
function CMatrixDecomp(C)
DD = @views (C[2,3]^2-C[2,2]*C[3,3]);
CzziCzx =(@views[-(C[1,3]*C[2,2]-C[1,2]*C[2,3])/DD 1;
(C[1,3]*C[2,3]-C[1,2]*C[3,3])/DD 0]);
CxzCzzi = (@views [-(C[1,3]*C[2,2]-C[1,2]*C[2,3])/DD (C[1,3]*C[2,3]-C[1,2]*C[3,3])/DD;
1                              0]);

CxxmCxzCzziCzx = (@views [(C[1,3]^2*C[2,2]-2*C[1,2]*C[1,3]*C[2,3]+C[1,1]*C[2,3]^2+C[1,2]^2*C[3,3]-C[1,1]*C[2,2]*C[3,3])/DD 0;
0                    0]);
Czzi = @views inv([C[3,3] C[2,3];
C[2,3] C[2,2]]);

return CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi
end

# --- Creation of A (system) matrix
function AFmatrix(C,s1,rho,f,h,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
A1 = -s1 * CzziCzx;
A2 = Czzi;
A3 = rho*I - s1^2*CxxmCxzCzziCzx;
A4 = -s1 * CxzCzzi;
A = [ A1 A2           ;
A3 A4];
F = @views [ CzziCzx*h[:,1]+h[:,2] ; f + s1 * CxxmCxzCzziCzx * h[:,1] ];
return A,F
end

# --- Computation of d(s3)/d(s1) derivative
function ds3ds1(C,D0,s1,N1,N2,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
# --- The differentiated system matrix A
-2*s1*CxxmCxzCzziCzx    -CxzCzzi];
# ---
ds3 = @views D0 \ (dAds * dropdims(D0[:,[N1 N2]], dims = (findall(size(D0[:,[N1 N2]]) .== 1)...,))); # Drop singleton dimensions, otherwise we get a [4x1x2] matrix
ds31 = @views ds3[N1,1];
ds32 = @views ds3[N2,2];
return ds31,ds32
end

# --- pairwise minimum
function pairwisemin(s3,l3)
mins,idx = findmin(abs2.( repeat( l3 , 1, 2  ) - repeat(transpose(s3),2,1) ) , dims=1 );
idx = [idx[1][1] idx[2][1]];
return idx;
end

function AssignEigenvaluesEigenvectors(C0,s1,rho0,f,h,s30e,s30,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
A0,F0   = AFmatrix(C0,s1,rho0,f,h,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi);
L0,D0   = eigen(A0);
posneg0 = sign.(imag( L0./s1 ));
negidx  = @views pairwisemin( s30e[1:2], L0[ posneg0.==+1 ] );
posidx  = @views pairwisemin( s30e[3:4], L0[ posneg0.==-1 ] );
posneg0[posneg0.==-1]=2 .+ vec(posidx);
posneg0[posneg0.==+1]=     vec(negidx);
posneg0I= round.(Int, posneg0);
s30[ posneg0I] = (L0);
D0[:,posneg0I] = (D0);
return s30,D0,F0
end

function CdH()

theta = 0;
M = BondMatrix2D(22.5)
C0 = [64 18 -4;
18 46 -2;
-4 -2 7 ]*3.1e8;
C0 = M * C0 * M';
rho0 = 2000;
C1 = 4.5*C0;
rho1 = 1.5*rho0;
# --- Decompose the matrix
CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0 = CMatrixDecomp(C0);
CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1 = CMatrixDecomp(C1);

# -------------------- Setup source details
f = [1;0]
h = [0 0; 0 0];
h0 = 200;
h1 = -300;
x  = 10;
N0 =  [3 3 4 4]; # P/ P/ S/ S/
N1 =  [1 2 1 2]; # P\ S\ P\ S\
# -------------------- Setup temporal vector
ts = collect(0:2e-5:0.8);
solution1 = ts.*0;
# -------------------- Setup initial iteration details
A0,F0 = AFmatrix(C0,0,rho0,f,h,CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0)
L0,D0 = eigen(A0);
L0init  = [-sort( -L0[ L0.<0 ] ); sort( L0[ L0.>0 ])]; # >> Sort as [-a1, -a2, a1, a2], with a1<a2.

A1,F1 = AFmatrix(C1,0,rho1,f,h,CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1)
L1,D1 = eigen(A1);
L1init  = [-sort( -L1[ L1.<0 ] ); sort( L1[ L1.>0 ])]; # >> Sort as [-a1, -a2, a1, a2], with a1<a2.

# --- Julia-specific initializing of variables, which have to survive in global scope
Fp = 1+im;

# -------------------- Start iterating
for N = 1:4
println(N)
s1new= complex(0);
s1   = complex(0);
s30c = complex(L0init);
s31c = complex(L1init);
s30  = complex(L0init);
s31  = complex(L1init);
for ii=1:length(ts)
t=ts[ii];
if t< ( L0init[N0[N]]*h0+L0init[N1[N]]*h1 ) # = where s_1 = 0.
continue;
end
s30h = complex(s30c);
s31h = complex(s31c);
s30c = complex(s30);
s31c = complex(s31);
s30e = complex(2*s30c - s30h);    # Expected s3 (eigen)values (medium 1)
s31e = complex(2*s31c - s31h);    # Expected s3 (eigen)values (medium 2)
jj=0;
while (abs(s1*x+s30[N0[N]]*h0+s30[N1[N]]*h1-t )>1e-10 || jj==0) && (jj<500)
s1              = real(s1new) + im*(abs(imag(s1new)));
# --- Assign eigenvalues and eigenvectors to medium 1
s30,D0,F0       = AssignEigenvaluesEigenvectors(C0,s1+im*eps(),rho0,f,h,s30e,s30e,CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0);
# --- Newton-Raphson iteration
ds3ds11,ds3ds12 = ds3ds1(C0,D0,s1,N0[N],N1[N],CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0);
Fp              = ( x + ds3ds11 * h0 + ds3ds12 * h1 );
s1new           = s1 - ( s1*x + s30[N0[N]]*h0 + s30[N1[N]]*h1-t ) / Fp;
jj+=1;
end
s31,D1,F1 = AssignEigenvaluesEigenvectors(C1,s1,rho1,f,h,s31e,s31e,CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1);
# --- Scattering matrix
Q   = D1 \ D0;
R   = @views Q[1:2,1:2] \ Q[1:2,3:4];
R   = R[N1[N],N0[N]-2]

DF = D0 \ F0;
DF = DF[N0[N]];
BJN = D0[:,N1[N]] * DF * R
# BJN = D0[:,N1[N]] .* ( transpose(D0I[N0[N],:]) * F0 ) * R ;
dsdt = 1/Fp;

sol = 1/pi * imag( BJN * dsdt );
solution1[ii] += cos(theta)*sol[1] - sin(theta)*sol[2];
end
display(plot(ts,solution1));
end

return solution1

end

CdH(); # >> Actually running the program.
``````

The program supplied here takes about 35 seconds to run on my MacBook, where I use the JuliaPro with the long-term support (v1.0.5-2) version.

When I check the program with the profiler, it seems that the main bottleneck of the program is the `L0,D0 = eigen(A0);` operation, which takes about 25% of the run-time. Here, `A0` is always a 4x4 complex-valued matrix. Unfortunately, the `StaticArrays` package doesnâ€™t allow for this format (or at least, it gives errors for me when I then use it with the `eigen` function), so I donâ€™t know if I could make this call more efficient by somehow telling Julia about what format to expectâ€¦ However, I wondered if there were other ways of increasing the speed of my code. So, I went another few levels down in the profiler (please can someone confirm that Iâ€™m reading this correctly?), and see that it spends the most time computing the eigenvectors/eigenvalues in `getindex(t::Tuple, r::AbstractArray{<:Any,1}) = ([t[ri] for ri in r]...,)` which lives in `tuple.jl`. This leads to my question: is the Julia call to Eigen somehow getting stuck in collecting the results from the call to BLAS into a tuple, rather than spending time in the BLAS function itself? Does someone have a suggestion about how I can speed up my code if that is the case?

Can you try the latest stable version (1.5)?

-viral

1 Like

Since you already narrowed down the problem, it would make sense to post a minimal working example on your eigenvalue problem. This makes it easier to reproduce your problem and provide help.

Okay, this is a simpler version that exhibits the same run-time properties.

``````using Profile
using LinearAlgebra
function test_program()
for i = 1:100
A0 = rand(4,4) * (1+im)*i/1000
L0,D0   = eigen(A0);
end
end

Profile.clear()
@profile test_program()
Juno.profiler()
``````

(run it a few times, to get rid of compile-time performance issues).

1 Like

If the bottleneck is eigen, then it is almost certain that Matlab and Julia run the same code: Lapack + BLAS. No wonder then if they are the same speed.

4 Likes

Profiling confirms that the Lapack function `geevx!` is called at some point. The same holds true if you use `StaticArrays`, which is why there is no speed up in this case. So unless you have a specific algorithm that outperforms Lapck for complex 4x4 matrices Julia and Matlab will perform similarly.

Thanks for all your responses. I totally understand that Julia and MATLAB use the same underlying Lapack/BLAS function calls. But my question has to do with the fact that the profiler shows that the majority of time is not spent with Lapack/BLAS. I have screenshotted and annotated the profiling result for the test_program I posted above:

My understanding is that the horizontal extent of a block indicates the time spent in a particular function, and calls to sub-programs are layered vertically. Right?

What you can see is that there are some calls to lapack.jl, from which Lapack/BLAS is called. But also, you can see that the longest time is spent in tuple.jl (and calls to array.jl). To me, it gives the suggestion that my simple code spends a lot of time doing work that is NOT related to computing eigenvalues/eigenvectors, but is doing work related to input/output operationsâ€¦ Right? What is it doing there, because it looks like itâ€™s spending more time on I/O than on actual eigendecompositions?

What I wonder about is whether this tuple.jl part can be kept to a minimum, when I repeat this step hundreds of times for identically sized matrices with an identical type. I have no hope of beating Lapack/BLAS with my own code, of course, but I would like my program to spend the majority of its time running lapack.jl â€“ is there a way to make it so? I hope itâ€™s clear what Iâ€™m trying to doâ€¦

1 Like

I tried your code and a variant and could not observe your profile issue

``````function test_program2()
A0 = rand(4,4) * (1+im) * 12/1000
for i = 1:1000
L0,D0   = eigen(A0);
end
end
``````

See the following output generated with StatProfilerHTML.

I also tried it with much larger matrices (`rand(50, 50)`) and the time spent in Lapack was then 98%.

Cheers,

Felix

Is this with static arrays?

Nope, just the code given, with JuliaPro v.1.0.5-2 (long-term support).

Since you probably have the profiling output, could you provide the trace that leads to the â€śexpensiveâ€ť call in tuple.jl?