In order to save memory in my computations I created a module with array wrappers (named TransformableFields) and in-place rfft! and irfft!.
The module can be found here InPlaceRealFFT.jl. There is no dependencies, so It can be just copied and one is able to use it.
I am using fft’s to compute derivates (among other things), but the inplace version is given much slower results. Here is a sample code
using InPlaceRealFFT
function rfftfreq(n::I,s::R) where {I<:Integer,R<:Real}
d = 2π*s/n
[(n/2 - i)/(d*n) for i = n/2:-1:0]
end
function dx(field::A,len::Float64,n=1::Int) where {T<:Union{Float32,Float64},A<:AbstractArray{T,3}}
nx,ny,nz = size(field)
fieldhat = rfft(field,1)
kim = (rfftfreq(nx,len) .* im) .^ n
Threads.@threads for l = 1:nz
for j = 1:ny
for i = 1:length(kim)
@inbounds fieldhat[i,j,l] = fieldhat[i,j,l]*kim[i]
end
end
end
return irfft(fieldhat,nx,1)
end
function dx!(field::TransformableField{T,3},len::T,n=1::Int) where T<:Union{Float64,Float32}
nx,ny,nz = size(field.r)
rfft!(field,1)
kim = (rfftfreq(nx,len) .* im) .^ n
fieldhat = field.c
nx,ny,nz = size(field)
Threads.@threads for l = 1:nz
for j = 1:ny
for i = 1:nx
@inbounds fieldhat[i,j,l] = fieldhat[i,j,l]*kim[i]
end
end
end
return irfft!(field,1)
end
lx=1.0;
a = randt(1024,512,512); # randomly generated TransformableField
b = copy(a.r); # b Simple Array
@time c = dx(b,lx);
@time dx!(a,lx);
If I run this script I have the following output:
21.337080 seconds (1.60 M allocations: 4.088 GiB, 2.14% gc time)
93.240138 seconds (777.40 M allocations: 27.121 GiB, 3.63% gc time)
It seems to me that dx!
is spending most of the time in the for loop. I can’t figure out why that is allocating so much.
Probably both functions can be optimised, but I expected that they would give similar results the way they are written now.
This was run using Julia 0.6rc1.