I wrote the below code (originally in MATLAB, ported to Julia). Unfortunately, the code runs at pretty much the same speed as the MATLAB original.

```
using LinearAlgebra
using StaticArrays
using DelimitedFiles
using BenchmarkTools
using Profile
using Plots
# --- Bond transform matrix
function BondMatrix2D(d)
b = [cosd(d) cosd(90 - d);
cosd(90 + d) cosd(d) ];
M1 = b.^2;
M2 = 2 * b[:, 2] .* b[:, 1];
M4 = b[1,1]*b[2,2] + b[1,2]*b[2,1]
M = [M1 M2;
-M2'/2 M4]
return M
end
# --- Decompose the C matrix ; as this is required repeatedly in AFmatrix and ds3ds1
function CMatrixDecomp(C)
DD = @views (C[2,3]^2-C[2,2]*C[3,3]);
CzziCzx =(@views[-(C[1,3]*C[2,2]-C[1,2]*C[2,3])/DD 1;
(C[1,3]*C[2,3]-C[1,2]*C[3,3])/DD 0]);
CxzCzzi = (@views [-(C[1,3]*C[2,2]-C[1,2]*C[2,3])/DD (C[1,3]*C[2,3]-C[1,2]*C[3,3])/DD;
1 0]);
CxxmCxzCzziCzx = (@views [(C[1,3]^2*C[2,2]-2*C[1,2]*C[1,3]*C[2,3]+C[1,1]*C[2,3]^2+C[1,2]^2*C[3,3]-C[1,1]*C[2,2]*C[3,3])/DD 0;
0 0]);
Czzi = @views inv([C[3,3] C[2,3];
C[2,3] C[2,2]]);
return CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi
end
# --- Creation of A (system) matrix
function AFmatrix(C,s1,rho,f,h,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
A1 = -s1 * CzziCzx;
A2 = Czzi;
A3 = rho*I - s1^2*CxxmCxzCzziCzx;
A4 = -s1 * CxzCzzi;
A = [ A1 A2 ;
A3 A4];
F = @views [ CzziCzx*h[:,1]+h[:,2] ; f + s1 * CxxmCxzCzziCzx * h[:,1] ];
return A,F
end
# --- Computation of d(s3)/d(s1) derivative
function ds3ds1(C,D0,s1,N1,N2,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
# --- The differentiated system matrix A
dAds = [-CzziCzx zeros(2,2);
-2*s1*CxxmCxzCzziCzx -CxzCzzi];
# ---
ds3 = @views D0 \ (dAds * dropdims(D0[:,[N1 N2]], dims = (findall(size(D0[:,[N1 N2]]) .== 1)...,))); # Drop singleton dimensions, otherwise we get a [4x1x2] matrix
ds31 = @views ds3[N1,1];
ds32 = @views ds3[N2,2];
return ds31,ds32
end
# --- pairwise minimum
function pairwisemin(s3,l3)
mins,idx = findmin(abs2.( repeat( l3 , 1, 2 ) - repeat(transpose(s3),2,1) ) , dims=1 );
idx = [idx[1][1] idx[2][1]];
return idx;
end
function AssignEigenvaluesEigenvectors(C0,s1,rho0,f,h,s30e,s30,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi)
A0,F0 = AFmatrix(C0,s1,rho0,f,h,CzziCzx,CxzCzzi,CxxmCxzCzziCzx,Czzi);
L0,D0 = eigen(A0);
posneg0 = sign.(imag( L0./s1 ));
negidx = @views pairwisemin( s30e[1:2], L0[ posneg0.==+1 ] );
posidx = @views pairwisemin( s30e[3:4], L0[ posneg0.==-1 ] );
posneg0[posneg0.==-1]=2 .+ vec(posidx);
posneg0[posneg0.==+1]= vec(negidx);
posneg0I= round.(Int, posneg0);
s30[ posneg0I] = (L0);
D0[:,posneg0I] = (D0);
return s30,D0,F0
end
function CdH()
theta = 0;
M = BondMatrix2D(22.5)
C0 = [64 18 -4;
18 46 -2;
-4 -2 7 ]*3.1e8;
C0 = M * C0 * M';
rho0 = 2000;
C1 = 4.5*C0;
rho1 = 1.5*rho0;
# --- Decompose the matrix
CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0 = CMatrixDecomp(C0);
CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1 = CMatrixDecomp(C1);
# -------------------- Setup source details
f = [1;0]
h = [0 0; 0 0];
# -------------------- Setup receiver details
h0 = 200;
h1 = -300;
x = 10;
N0 = [3 3 4 4]; # P/ P/ S/ S/
N1 = [1 2 1 2]; # P\ S\ P\ S\
# -------------------- Setup temporal vector
ts = collect(0:2e-5:0.8);
solution1 = ts.*0;
# -------------------- Setup initial iteration details
A0,F0 = AFmatrix(C0,0,rho0,f,h,CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0)
L0,D0 = eigen(A0);
L0init = [-sort( -L0[ L0.<0 ] ); sort( L0[ L0.>0 ])]; # >> Sort as [-a1, -a2, a1, a2], with a1<a2.
A1,F1 = AFmatrix(C1,0,rho1,f,h,CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1)
L1,D1 = eigen(A1);
L1init = [-sort( -L1[ L1.<0 ] ); sort( L1[ L1.>0 ])]; # >> Sort as [-a1, -a2, a1, a2], with a1<a2.
# --- Julia-specific initializing of variables, which have to survive in global scope
Fp = 1+im;
# -------------------- Start iterating
for N = 1:4
println(N)
s1new= complex(0);
s1 = complex(0);
s30c = complex(L0init);
s31c = complex(L1init);
s30 = complex(L0init);
s31 = complex(L1init);
for ii=1:length(ts)
t=ts[ii];
if t< ( L0init[N0[N]]*h0+L0init[N1[N]]*h1 ) # = where s_1 = 0.
continue;
end
s30h = complex(s30c);
s31h = complex(s31c);
s30c = complex(s30);
s31c = complex(s31);
s30e = complex(2*s30c - s30h); # Expected s3 (eigen)values (medium 1)
s31e = complex(2*s31c - s31h); # Expected s3 (eigen)values (medium 2)
jj=0;
while (abs(s1*x+s30[N0[N]]*h0+s30[N1[N]]*h1-t )>1e-10 || jj==0) && (jj<500)
s1 = real(s1new) + im*(abs(imag(s1new)));
# --- Assign eigenvalues and eigenvectors to medium 1
s30,D0,F0 = AssignEigenvaluesEigenvectors(C0,s1+im*eps(),rho0,f,h,s30e,s30e,CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0);
# --- Newton-Raphson iteration
ds3ds11,ds3ds12 = ds3ds1(C0,D0,s1,N0[N],N1[N],CzziCzx0,CxzCzzi0,CxxmCxzCzziCzx0,Czzi0);
Fp = ( x + ds3ds11 * h0 + ds3ds12 * h1 );
s1new = s1 - ( s1*x + s30[N0[N]]*h0 + s30[N1[N]]*h1-t ) / Fp;
jj+=1;
end
s31,D1,F1 = AssignEigenvaluesEigenvectors(C1,s1,rho1,f,h,s31e,s31e,CzziCzx1,CxzCzzi1,CxxmCxzCzziCzx1,Czzi1);
# --- Scattering matrix
Q = D1 \ D0;
R = @views Q[1:2,1:2] \ Q[1:2,3:4];
R = R[N1[N],N0[N]-2]
DF = D0 \ F0;
DF = DF[N0[N]];
BJN = D0[:,N1[N]] * DF * R
# BJN = D0[:,N1[N]] .* ( transpose(D0I[N0[N],:]) * F0 ) * R ;
dsdt = 1/Fp;
sol = 1/pi * imag( BJN * dsdt );
solution1[ii] += cos(theta)*sol[1] - sin(theta)*sol[2];
end
display(plot(ts,solution1));
end
return solution1
end
CdH(); # >> Actually running the program.
```

The program supplied here takes about 35 seconds to run on my MacBook, where I use the JuliaPro with the long-term support (v1.0.5-2) version.

When I check the program with the profiler, it seems that the main bottleneck of the program is the `L0,D0 = eigen(A0);`

operation, which takes about 25% of the run-time. Here, `A0`

is always a 4x4 complex-valued matrix. Unfortunately, the `StaticArrays`

package doesnâ€™t allow for this format (or at least, it gives errors for me when I then use it with the `eigen`

function), so I donâ€™t know if I could make this call more efficient by somehow telling Julia about what format to expectâ€¦ However, I wondered if there were other ways of increasing the speed of my code. So, I went another few levels down in the profiler (please can someone confirm that Iâ€™m reading this correctly?), and see that it spends the most time computing the eigenvectors/eigenvalues in `getindex(t::Tuple, r::AbstractArray{<:Any,1}) = ([t[ri] for ri in r]...,)`

which lives in `tuple.jl`

. This leads to my question: is the Julia call to Eigen somehow getting stuck in collecting the results from the call to BLAS into a tuple, rather than spending time in the BLAS function itself? Does someone have a suggestion about how I can speed up my code if that is the case?

Many thanks in advance!