Below I will copy in some code from a larger application; in this part of the code the primary goal is to set up several somewhat large matrices utilized for some subsequent finite element operations. There are a number of repmat
,reshape
, zeros()
calls, for examples.
When executing this with @benchmark
(detailed step list below), it runs about 20% slower than the original Matlab code - it also has a large % assigned to GC:
BenchmarkTools.Trial:
memory estimate: 457.35 MiB
allocs estimate: 114800
--------------
minimum time: 556.528 ms (67.65% GC)
median time: 568.348 ms (66.76% GC)
mean time: 578.302 ms (66.46% GC)
maximum time: 642.323 ms (68.25% GC)
--------------
samples: 9
evals/sample: 1
The questions are: what can I do to make it faster? I tried allocating arrays in advance, redoing functions to operate on variables in place (e.g., function name with !, getIxIy!() ).
Is it correct for me to assume that I am losing a lot of time in GC?
One issue is that the matrix stiffCoeffXX has complex values in for matrix elements within 10 locations from its edges, but the interior is entirely real - I donāt know have Matlab does this, but maybe it can do real arithmetic in the middle automatically?
Any ideas would be appreciated.
To run the code saved within a file named test.jl
, I do:
include("test.jl")
@benchmark assembleMatrix(3,3,stiffCoeffXX,elementStiffMatrixXX)
(the latter two arguments are set up with in the program).
Here is the complete code in the file test.jl:
nX=120
nY=120
elementStiffMatrixXX = [0.361111 -0.406535 0.0593126 -0.0138889 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; -0.406535 0.694444 -0.347222 0.0593126 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0593126 -0.347222 0.694444 -0.406535 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; -0.0138889 0.0593126 -0.406535 0.361111 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.80556 -2.03267 0.296563 -0.0694444 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -2.03267 3.47222 -1.73611 0.296563 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.296563 -1.73611 3.47222 -2.03267 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -0.0694444 0.296563 -2.03267 1.80556 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.80556 -2.03267 0.296563 -0.0694444 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.03267 3.47222 -1.73611 0.296563 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.296563 -1.73611 3.47222 -2.03267 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.0694444 0.296563 -2.03267 1.80556 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.361111 -0.406535 0.0593126 -0.0138889; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.406535 0.694444 -0.347222 0.0593126; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0593126 -0.347222 0.694444 -0.406535; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.0138889 0.0593126 -0.406535 0.361111];
stiffCoeffXX = rand(nX,nY)+rand(nX,nY)*im
function getIxIy(basisOrder::Int64,nx0::Int64,ny0::Int64)
localElementNode = getDOFInElement(basisOrder,nx0,ny0)::Array{Int64,2}
totalDeg = (basisOrder+1)*(basisOrder+1)
iX = repmat(localElementNode,totalDeg,1)
iY = reshape( (repmat(localElementNode[:],1,totalDeg)')[:],totalDeg^2,nx0*ny0)
return(iX,iY)
end
function getDOFInElement(basisOrder::Int64,nx::Int64,ny::Int64)::Array{Int64,2}
nodes = zeros(Int64, (basisOrder+1)^2, nx*ny)
nodeMatrix = reshape( 1:(nx*basisOrder+1)*(ny*basisOrder+1), nx*basisOrder+1, ny*basisOrder+1)
for j in 1:ny
for i in 1:nx
nodes[:,(j-1)*nx+i] = (nodeMatrix[ (j-1)*basisOrder+1:j*basisOrder+1, (i-1)*basisOrder+1:i*basisOrder+1 ]')[:]
end
end
return(nodes)
end
function assembleMatrix(basisOrder::Int64,coeff::Array{Complex{Float64},2},
localElementMatrix::Array{Float64,2})
iX,iY = getIxIy(basisOrder,size(coeff)[1],size(coeff)[2])
coeff = (coeff.')[:]
A = sparse(iX[:], iY[:], ( (localElementMatrix[:])*( coeff.'))[:] )
return(A)
end