Hi, I’ve been trying to perform operations on Arrays that may have missing values, using the below prototype function:
function nanop(f::Function,data::Array;dim=1)
ndim = ndims(data); dsize = size(data);
if ndim==2
if dim==1; newdim = [2,1]; data = permutedims(data,newdim); nsize = dsize[2]; end
elseif ndim>2
if dim==1; newdim = convert(Array,2:ndim); newdim = vcat(newdim,1);
data = permutedims(data,newdim);
nsize = dsize[2:end]
elseif dim<ndim
newdim1 = convert(Array,1:dim-1);
newdim2 = convert(Array,dim+1:ndim);
newdim = vcat(newdim1,newdim2,dim)
data = permutedims(data,newdim);
nsize = tuple(dsize[newdim1]...,dsize[newdim2]...)
else nsize = dsize[1:end-1]
end
else; nsize = dsize[1:end-1]
end
data = reshape(data,:,dsize[dim]); l = size(data,1); out = zeros(l);
for ii = 1 : l; dataii = data[ii,:];
out[ii] = f(skipmissing(dataii));
end
return reshape(out,nsize)
end
However, benchmarking using @btime shows that something is seriously slowing down performance for large arrays.
@btime nanop(mean,rand(480,241,365),dim=3);;
@btime nanop(maximum,rand(480,241,365),dim=3);
@btime nanop(minimum,rand(480,241,365),dim=3);
@btime nanop(std,rand(480,241,365),dim=3);
gives me
619.931 ms (231370 allocations: 663.69 MiB)
618.972 ms (115690 allocations: 661.93 MiB)
619.175 ms (115690 allocations: 661.93 MiB)
829.410 ms (231370 allocations: 663.69 MiB)
Is there any way to reduce memory allocation, because frankly I’m going to be doing operations like
@btime nanop(mean,rand(480,241,365,4),dim=4);;
@btime nanop(maximum,rand(480,241,365,4),dim=4);
@btime nanop(minimum,rand(480,241,365,4),dim=4);
@btime nanop(std,rand(480,241,365,4),dim=4);
which frankly scares the crap out of me since I’ll have to perform the for-loop operation many many more times than in my first few test cases.