Hello everyone,

I need some help on optimization of a function, which looks like this

```
# re-edit
SpMatrixListUnion = Union{Vector{SparseMatrixCSC{Float64,Int64}},Vector{SparseMatrixCSC{Complex128,Int64}}}
function avg_V_on_ψ!(
res::Array{Float64,2}, # result to write
vec_list::Vector{Matrix{Float64}}, # a list of V's,
OP_list::SpMatrixListUnion # a list of O's
) # the computation is V' * O * V,
#result is written into Array "res" in a consistent manner
vl = length(vec_list)
opl = length(OP_list)
for i=1:vl
I,J,V = findnz(OP_list[i]) # helpful when OP_list[i] is a very sparse matrix
res[:,i] = real.(sum((vec_list[i][I,:]).*broadcast(*,V,vec_list[i][J,:]),1)) # V' * O * V for sparse matrix O
end
return nothing
end
```

For each matrix `O=OP_list[i]`

in OP_list and each vector set `V=vec_list[i] in vec_list`

, compute the average `V'*O*V`

and write result to res. I am working with sparse matrices for O in OP_list, so I have done a first step optimization by using findnz. For really sparse matrices, the result is good but not satisfactory.

It seems to me that there cannot be further reduction on the number of arithmatic operations. So I hope to ask some experienced users for help, to make such function even faster, because it is the only bottleneck in my program …

Thanks in advance in any suggestions!

BTW you can verify the code by running the following test:

```
# avg_V_on_ψ0 is a slow but guaranteed version for the task
avg_V_on_ψ0(vecs,OP) = [real(dot(vecs[:,k],OP*vecs[:,k])) for k=1:size(vecs)[2]]
# the test
vlist = [rand(400,400) for k=1:300];
oplist = [1000.0sprand(400,400,0.0001) for k=1:300];
res2 = zeros(Float64,400,300);
gc()
@time res1 = [avg_V_on_ψ0(vlist[i],oplist[i]) for i=1:300];
@time avg_V_on_ψ!(res2,vlist,oplist)
# test : norm should be equal ~
norm(hcat(res1...)-res2)
```