Sometimes we have the need to shuffle a given list of arrays over a given dimension, but keeping the relative order on that dimension ( e.g. when we have in machine learning to shuffle features and label matrices over the dimension concerning the observations).
I hence developed the following function that does that. Do you have comments / ideas on how to improve it ?
import Random.shuffle
"""
shuffle(data;dims,rng)
Shuffle a vector of n-dimensional arrays across dimension `dims` keeping the same order between the arrays
# Parameters
- `data`: The vector of arrays to shuffle
- `dims`: The dimension over to apply the shuffle [def: `1`]
- `rng`: An `AbstractRNG` to apply for the shuffle
# Notes
- All the arrays must have the same size for the dimension to shuffle
# Example
julia> a = [1 2 30; 10 20 30]; b = [100 200 300];
julia> (aShuffled, bShuffled) = shuffle([a,b],dims=2)
2-element Vector{Matrix{Int64}}:
[1 30 2; 10 30 20]
[100 300 200]
"""
function shuffle(data::AbstractArray{T,1};dims=1,rng=Random.GLOBAL_RNG) where T <: AbstractArray
Ns = [size(m,dims) for m in data]
length(Set(Ns)) == 1 || @error "In `shuffle(arrays)` all individual arrays need to have the same size on the dimension specified"
N = Ns[1]
ridx = Random.shuffle(rng, 1:N)
out = similar(data)
for (i,a) in enumerate(data)
aidx = [collect(1:i) for i in size(a)]
aidx[dims] = ridx
out[i] = a[aidx...]
end
return out
end
shuffle(rng::AbstractRNG,data::AbstractArray{T,1};dims=1) where T <: AbstractArray = shuffle(data;dims=dims,rng=rng)