Here’s a sample function for use with a differential equation solver to solve a system of matrices equations, d M_i / dt =f(M_{1...n})
. The matrix M_i is represented by m[:,:,i]
, and its time derivative is stored in dm. [random number just used as example, not my actual equation but just similar mathematical form (involving matrix-matrix production and matrix-scalar production); and the size can be much bigger than 21 2-by-2 matrices]
m = rand(2,2,21)
dm = zeros(2,2,21)
b = rand(2,2)
kf(t) = sin(t)*b
comu(a,b) = a*b-b*a
function du(dm,m_in,t)
c1 = 1
c2 = 2
m = [view(m_in,:,:,i) for i in 1:21]
for k = 1:20
dm[:,:,k] = c1*m[k]+c2*comu(m[k],m[k+1])
for i = 1:k
dm[:,:,k] -= comu(kf(t)*m[i],m[k-i+1])
end
end
end
and @time du(dm,m,1.0)
returns:
0.000174 seconds (2.25 k allocations: 185.516 KiB)
for the 2nd run (after the jit compile is done).
Is there a way to cut down the number of allocations? I already used view()
for the array slice and dm is already pre-allocated. Since this gets called on the order of 1e6 times for the (needed to call du for each time-step, and to get error estimation, etc), reducing the allocation here may be helpful/practical.
Changing to broadcast with
function du(dm,m_in,t)
c1 = 1
c2 = 2
m = [view(m_in,:,:,i) for i in 1:21]
for k = 1:20
dm[:,:,k] = c1*m[k] .+ c2*comu(m[k],m[k+1])
for i = 1:k
dm[:,:,k] .-= comu(kf(t)*m[i],m[k-i+1])
end
end
end
gives similar results,
0.000264 seconds (2.25 k allocations: 178.953 KiB)
[slower, lower memory but about the same amount of allocations]
I can cut down to within 1k allocation with:
m = [rand(2,2) for i in 1:21]
dm = [zeros(2,2) for i in 1:21]
function comu(a,b,x_,y_)
mul!(x_,a,b)
mul!(y_,b,a)
x_ - y_
end
function du(dm,m,t)
c1 = 1
c2 = 2
x = similar(m[1])
y = similar(m[1])
for k = 1:20
dm[k] = c1*m[k] .+ c2*comu(m[k],m[k+1],x,y)
for i = 1:k
dm[k] .-= comu(kf(t)*m[i],m[k-i+1],x,y)
end
end
end
result:
0.000106 seconds (926 allocations: 81.313 KiB)
Is there anything further that can be done that still preserve readability of the code? Thanks!