Sparse matrix assembly with bad timings

Hi,

I took the 2016 codes from Julia iFEM 2: Optimizing Stiffness Matrix Assembly and made some cosmetic changes to run them with Julia 1.7.

Chris’ old timings were:

tic; assemblingstandard(node,elem); toc;
tic; assemblingsparse(node,elem); toc;
tic; assembling(node,elem); toc;
 
Elapsed time is 0.874919 seconds.
Elapsed time is 0.698763 seconds.
Elapsed time is 0.020453 seconds.

mine:

  7.117206 seconds (1.17 M allocations: 880.707 MiB, 8.60% gc time, 18.66% compilation time)
  0.872631 seconds (1.09 M allocations: 90.582 MiB, 23.57% compilation 
time)
  1.388404 seconds (271.57 k allocations: 41.201 MiB, 93.56% compilation time)

Where did the speed of assembling() go? Where is the error?
Thx

UPDATE, using BenchmarkTools:

 4.168 s (1020002 allocations: 872.65 MiB)
 982.999 ms (1020033 allocations: 86.91 MiB)
 86.067 ms (259 allocations: 29.39 MiB)
using LinearAlgebra
using SparseArrays

# http://www.stochasticlifestyle.com/julia-ifem-1-mesh-generation-and-julias-type-system/
function squaremesh(square,h)
    #square = [x0 x1 y0 y1], h is stepsize
    x0 = square[1]; x1= square[2]
    y0 = square[3]; y1= square[4]
    x,y = meshgrid(x0:h:x1,y0:h:y1)
    node = [x[:] y[:]];
    ni = size(x,1); # number of rows
    N = size(node,1);
    t2nidxMap = 1:N-ni;
    topNode = ni:ni:N-ni;
    t2nidxMap = deleteat!(collect(t2nidxMap),collect(topNode))
    k = t2nidxMap;
    elem = [k.+ni k.+(ni+1) k ; k.+1 k k.+(ni+1)]
    return node,elem
  end

# http://www.stochasticlifestyle.com/julia-ifem2/
function localstiffness(p)
    At = Array{Float64}(undef, 3,3)
    B = [p[1,:]-p[3,:]; p[2,:]-p[3,:]];
    B = reshape(B,2,2)       # line added
    G = [1 0 ; 0 1 ; -1 -1]';
    area = 0.5*abs(det(B));
    for i = 1:3
      for j = 1:3
        At[i,j] = area*dot(B\G[:,i], B\G[:,j]); 
      end
    end
    return(At,area)
  end

function assemblingstandard(node,elem)
    N=size(node,1); NT=size(elem,1);
    A=zeros(N,N);
    for t=1:NT
      At,=localstiffness(node[vec(elem[t,:]),:]);
      for i=1:3
        @simd for j=1:3
          @inbounds A[elem[t,i],elem[t,j]]=A[elem[t,i],elem[t,j]]+At[i,j];
        end
      end
    end
    return(A)
  end

  function assemblingsparse(node,elem)
    N = size(node,1); 
    NT = size(elem,1);
    i = Array{Int64}(undef, NT,3,3);
    j = Array{Int64}(undef, NT,3,3);
    s = zeros(NT,3,3);
    index = 0;
    for t = 1:NT
      At, = localstiffness(node[vec(elem[t,:]),:]);
      for ti = 1:3, tj = 1:3
          i[t,ti,tj] = elem[t,ti];
          j[t,ti,tj] = elem[t,tj];
          s[t,ti,tj] = At[ti,tj];
      end
    end
    return(sparse(vec(i), vec(j), vec(s)));
  end

  function assembling(node,elem)
    N = size(node,1); NT = size(elem,1);
    ii = Vector{Int64}(undef, 9*NT);
    jj = Vector{Int64}(undef, 9*NT);
    sA = Vector{Float64}(undef, 9*NT);
    ve = Array{Float64}(undef, NT,2,3)
    ve[:,:,3] = node[vec(elem[:,2]),:]-node[vec(elem[:,1]),:];
    ve[:,:,1] = node[vec(elem[:,3]),:]-node[vec(elem[:,2]),:];
    ve[:,:,2] = node[vec(elem[:,1]),:]-node[vec(elem[:,3]),:];
    area = 0.5*abs.(-ve[:,1,3].*ve[:,2,2]+ve[:,2,3].*ve[:,1,2]);
    index = 0;
    for i = 1:3, j = 1:3
        @inbounds begin
            ii[index+1:index+NT] = elem[:,i]
            jj[index+1:index+NT] = elem[:,j]
            sA[index+1:index+NT] = sum(ve[:,:,i].* ve[:,:,j], dims=2) ./(4*area) # Replacing dot(ve[:,:,i],ve[:,:,j],2)
            index = index + NT;
        end
    end
    return(sparse(ii,jj,sA))
  end

node,elem = squaremesh([0 1 0 1], 0.01)
@time assemblingstandard(node,elem);
@time assemblingsparse(node,elem);
@time assembling(node,elem);
 


? You might try to run the timing twice, first for compilation and then for run time.

@goerch I thought I did re-run, but with BenchmarkTools you are on the safer side.
Thanks a lot!

1 Like