As @stevengj maybe there is a better method, but one thing you might try with the current approach is
function diagonal_elements(n::Int, A::AbstractMatrix{T}, V::AbstractMatrix{T}, temp::Vector{T}) where {T <: AbstractFloat}
diag_elem = Vector{AbstractFloat}(undef, n)
@inbounds for i = 1:n
vi = view(V, :, i)
mul!(temp, A, vi)
diag_elem[i] .= dot(vi, temp)
end
return diag_elem
end
Here, even though the array allocation is part of the function, I still get a 25% speed-up on my computer by doing the diagonal element assignment in place with diag_elem[i] .= dot(vi, temp).
(edit: note the name change since this is no longer a mutating function)