Speeding up array multiplication

Hi Here is the updated version, and the code I acutally used in the code. My code calls this function Construct_projector:

function Construct_projector(k_set::Vector{Vector{Float64}},ϵr::Float64,
  density_matrix::Array{ComplexF64},single_matrix::Array{ComplexF64},
  energy_input::Float64,BG_density_matrix::Array{ComplexF64},fcmatrix::Array{Matrix{Float64}},Area::Float64,dimension::Int,
  target_density::Float64,temp::Float64,itcount::Int)
 
  

 Fock_matrix=zeros(ComplexF64,dimension,dimension,length(k_set))


 Threads.@threads for ja in 1:dimension
  for jb in 1:(ja - 1)
      mul!(
          @view(Fock_matrix[ja, jb, :]),
          @view(fcmatrix[ja, jb][:, :]),
          @view(density_matrix[ja, jb, :]),
          1,
          1,
      )
  end
 end
 

 Threads.@threads for ja in eachindex(k_set)
  axpy!(1, @view(Fock_matrix[:, :, ja])', @view(Fock_matrix[:, :, ja]))
 end


 Threads.@threads for ja in 1:dimension
  mul!(
      @view(Fock_matrix[ja, ja, :]),
      @view(fcmatrix[ja, ja][:, :]),
      @view(density_matrix[ja, ja, :]),
      1,
      1,
  )
 end


SOME MORE OPERATIONS...
end

As you can see, this is a rather large function that takes a lot of arguments. I am only putting here the part that’s taking the most of time in this function. Some more operations takes at most 2 to 3 seconds

As I recently run this code on the computing nodes when requesting 8 cpus, and dimension is exactly 80, and k_set is of length 961, each call of this function takes around 17-20 seconds.

fcmatrix is now a matrix of matrix instead of a 4D array. It is only defined for fcmatrix[ja,jb] for ja>=jb. All other entries are undef. It’s constructed once and never changed. However, the density_matrix will change iteratively (it will actually be produced as an ouput of this Construct_projector function), so I will need to call this Construct_projector function many times.

The overall structure of my code is:


function main(initial_density_matrix+ Many other arguments)

    fcmatrix=Matrix{Matrix{Float64}}(undef,dimension,dimension)

   CODE THAT CONSTRUCTS fcmatrix
    
      input_density_matrix=initial_density_matrix
while condition
      output_density_matrix=Construct_Projector(input_density_matrix+ Other arguments)
      input_density_matrix=output_density_matrix
end




return input_density_matrix

end