Performance of computing the diagonal of V' * A *V

As @stevengj maybe there is a better method, but one thing you might try with the current approach is

function diagonal_elements(n::Int, A::AbstractMatrix{T}, V::AbstractMatrix{T}, temp::Vector{T}) where {T <: AbstractFloat}
    diag_elem = Vector{AbstractFloat}(undef, n)

   @inbounds for i = 1:n
      vi =  view(V, :, i)
      mul!(temp, A, vi)
      diag_elem[i] .= dot(vi, temp)
    end
    return diag_elem
end

Here, even though the array allocation is part of the function, I still get a 25% speed-up on my computer by doing the diagonal element assignment in place with diag_elem[i] .= dot(vi, temp).

(edit: note the name change since this is no longer a mutating function)