Nice to see some interest in adding this! Yes, it’s possible to do quite a lot better, although how much of this is Julia and how much is just “cleanup” is debatable. (I do think that Julia’s “mindset” helps with this “cleanup.”)
- First, you don’t need any of those temporary arrays—this is the “vectorize everything” mindset of Matlab
- Once you notice that the values in the computation aren’t arbitrary entries extracted from some matrix, but instead change in predictable ways, you notice opportunities for speedup
- You realize that you can pre-calculate the range of
l
that will be in-bounds - That allows you to get rid of all the branches in the inner loop
- You realize that you can pre-calculate the range of
Here’s my code for radon
(I didn’t tackle iradon
):
function radon(A::AbstractMatrix, angles)
w, h = size(A)
d = floor(Int, sqrt(w*w+h*h)/2) # half-length of the diagonal
Nr = 2d+1
R = linspace(-Nr/2, Nr/2, Nr)
sinogram = zeros(Nr, length(angles))
for j = 1:length(angles)
a = angles[j]
θ = a + pi/2
sinθ, cosθ = sin(θ), cos(θ)
for k = 1:length(R)
rk = R[k]
x0, y0 = rk*cosθ + w/2, rk*sinθ + h/2
# for a line perpendicular to this displacement from the center,
# determine the length in either direction within the boundary of the image
lfirst, llast = interior(x0, y0, sinθ, cosθ, w, h, R)
tmp = 0.0
@inbounds for l in lfirst:llast
rl = R[l]
x, y = x0 - rl*sinθ, y0 + rl*cosθ
ix, iy = trunc(Int, x), trunc(Int, y)
fx, fy = x-ix, y-iy
tmp += (1-fx)*((1-fy)*A[ix,iy] + fy*A[ix,iy+1]) +
fx*((1-fy)*A[ix+1,iy] + fy*A[ix+1,iy+1])
end
sinogram[k,j] = tmp
end
end
return sinogram;
end
function interior(x0, y0, sinθ, cosθ, w, h, R::Range)
rx1, rxw = (x0-1)/sinθ, (x0-w)/sinθ
ry1, ryh = (1-y0)/cosθ, (h-y0)/cosθ
rxlo, rxhi = minmax(rx1, rxw)
rylo, ryhi = minmax(ry1, ryh)
rfirst = max(minimum(R), ceil(Int, max(rxlo, rylo)))
rlast = min(maximum(R), floor(Int, min(rxhi, ryhi)))
Rstart, Rstep = first(R), step(R)
lfirst, llast = ceil(Int, (rfirst-Rstart)/Rstep), floor(Int, (rlast-Rstart)/Rstep)
if lfirst == 0
lfirst = length(R)+1
end
# Because of roundoff error, we still can't trust that the forward
# calculation will be correct. Make adjustments as needed.
while lfirst <= llast
rl = R[lfirst]
x, y = x0 - rl*sinθ, y0 + rl*cosθ
if (1 <= x < w) && (1 <= y < h)
break
end
lfirst += 1
end
while lfirst <= llast
rl = R[llast]
x, y = x0 - rl*sinθ, y0 + rl*cosθ
if (1 <= x < w) && (1 <= y < h)
break
end
llast -= 1
end
lfirst, llast
end
That gives
julia> @time radon0(z, angles);
10.549118 seconds (14 allocations: 11.665 MB)
julia> @time radon(z, angles);
6.498923 seconds (6 allocations: 3.887 MB)
where radon0
is your original code (with the type-instability fixed).
As Simon noted, I can make it faster with threading. It should merely be a matter of starting julia with JULIA_NUM_THREADS=2
(or how ever many physical cores you have or want to use) and adding Threads.@threads
in front of the j
loop, except for the infamous performance of captured variables in closures · Issue #15276 · JuliaLang/julia · GitHub. You therefore have to use a “function barrier”:
function radon(A::AbstractMatrix, angles)
w, h = size(A)
w2, h2 = w/2, h/2
d = floor(Int, sqrt(w*w+h*h)/2) # half-length of the diagonal
Nr = 2d+1
R = linspace(-Nr/2, Nr/2, Nr)
sinogram = zeros(Nr, length(angles))
radon_threads!(sinogram, A, angles, R, w, h)
end
function radon_threads!(sinogram, A, angles, R, w, h)
Threads.@threads for j = 1:length(angles)
# the rest is the same
With that, plus -O3
as a command-line option, I get
julia> @time radon(z, angles);
3.349954 seconds (8 allocations: 3.887 MB)
which is approximately three-fold faster than your original. If you have more cores, of course it’s even better.
Finally, I think you have a bug in your interpolation formula: I think f21
and f12
are swapped. Also, do you really mean R = linspace(-Nr/2, Nr/2, Nr)
rather than R = linspace(-(Nr-1)/2, (Nr-1)/2, Nr)
? If I just use R = -d:d
in my version, it’s even faster.
Last but not least,
There might be if you do Pkg.update()
. Good timing on your part: the tag was merged yesterday .