Hi, I’ve been trying to perform operations on Arrays that may have missing values, using the below prototype function:
function nanop(f::Function,data::Array;dim=1)
    ndim = ndims(data); dsize = size(data);
    if ndim==2
        if dim==1; newdim = [2,1]; data = permutedims(data,newdim); nsize = dsize[2]; end
    elseif ndim>2
        if dim==1; newdim = convert(Array,2:ndim); newdim = vcat(newdim,1);
            data = permutedims(data,newdim);
            nsize = dsize[2:end]
        elseif dim<ndim
            newdim1 = convert(Array,1:dim-1);
            newdim2 = convert(Array,dim+1:ndim);
            newdim  = vcat(newdim1,newdim2,dim)
            data    = permutedims(data,newdim);
            nsize   = tuple(dsize[newdim1]...,dsize[newdim2]...)
        else nsize  = dsize[1:end-1]
        end
    else; nsize = dsize[1:end-1]
    end
    data = reshape(data,:,dsize[dim]); l = size(data,1); out = zeros(l);
    for ii = 1 : l; dataii = data[ii,:];
        out[ii] = f(skipmissing(dataii));
    end
    return reshape(out,nsize)
end
However, benchmarking using @btime shows that something is seriously slowing down performance for large arrays.
@btime nanop(mean,rand(480,241,365),dim=3);;
@btime nanop(maximum,rand(480,241,365),dim=3);
@btime nanop(minimum,rand(480,241,365),dim=3);
@btime nanop(std,rand(480,241,365),dim=3);
gives me
  619.931 ms (231370 allocations: 663.69 MiB)
  618.972 ms (115690 allocations: 661.93 MiB)
  619.175 ms (115690 allocations: 661.93 MiB)
  829.410 ms (231370 allocations: 663.69 MiB)
Is there any way to reduce memory allocation, because frankly I’m going to be doing operations like
@btime nanop(mean,rand(480,241,365,4),dim=4);;
@btime nanop(maximum,rand(480,241,365,4),dim=4);
@btime nanop(minimum,rand(480,241,365,4),dim=4);
@btime nanop(std,rand(480,241,365,4),dim=4);
which frankly scares the crap out of me since I’ll have to perform the for-loop operation many many more times than in my first few test cases.