I can’t figure out why allocations are happening when that should simply destructively update zsum.
Here’s the relevant portions of the allocated trace:
0 i = j
0 while i <= nsync
185614464 zsum = zsum + (buffer[i] * sync[i])
0 i = i + 1
- end
-
- # and then the most recent values
0 if j > 1
0 i = 1
0 while i <= j-1
172792704 zsum = zsum + (buffer[i] * sync[i])
0 i = i + 1
- end
- end
The other thing I noticed, which is very suprising, is that replacing the while loops with, e.g.
for i=j:nsync
...
end
allocates memory (!) but the WHILE loop does not, hence the reason I am using them. I had figured that for a simple integer sequence the FOR loop would be re-written as a WHILE but apparently that is not the case.
Does this mean I should be writing all my integer counting loops using WHILE loops instead of FOR loops ?!
Also I would appreciate any comments on better ways to implement was is very much a producer-consumer model. This is being done to implement a discrete simulation, i.e. a process which consumes a sample at a time.
thanks !
complete original code…
#
#
#
const nsync = 28
sync = randn(nsync)
buffer = zeros(Float64, nsync)
function process1(cout)
println("process1")
while true
# doing this instead of x=randn(), y = randn(), z=complex(x,y)
# cuts way down on GC allocations.
for i=1:1000
z = randn()
put!(cout, z)
end
end
end
function process2(cin::Channel{Float64}, cout::Channel{Float64})
j = 1
zsum = complex(0.0, 0.0)
println("process2")
while true
buffer[j] = take!(cin)
# j will now point to the _oldest_ value
# the current value is at j-1
j = (j % nsync) + 1
zsum = 0.0
# process the oldest values
i = j
while i <= nsync
zsum = zsum + (buffer[i] * sync[i])
i = i + 1
end
# and then the most recent values
if j > 1
i = 1
while i <= j-1
zsum = zsum + (buffer[i] * sync[i])
i = i + 1
end
end
put!(cout, zsum)
end
end
function process3()
channel1 = Channel{Float64}(1000)
channel2 = Channel{Float64}(1)
t1 = Task(()->process1(channel1))
t2 = Task(()->process2(channel1, channel2))
println("starting")
schedule(t1)
schedule(t2)
yield()
z = 0.0
for i=1:100000
z = take!(channel2)
# println(i, " z=", z)
end
println("done")
end
#println(code_llvm(process2, (Channel{Complex{Float64}}, Channel{Complex{Float64}})))
@time process3()
process3()
#Profile.print()