I am performing array operations, where I retrieve elements of an array (this array is nested). The functions below are minimal working examples of what I am trying to do.
using TimerOutputs
using StaticArrays
using StatsBase
const track = TimerOutput()
########## normal array:
function nestedNormalArray(input)
output = zeros(20)
for i in 1:20
output[i] = input[I][rand(1:5)]
end
return output
end
@timeit track "arrayNestedInput" for j in 1:10000000
temp = Vector{Any}(undef,20)
for k in 1:20
random = digits(rand(1:((2^5)-1)), base=2, pad=5)
temp[k] = random
end
nestedNormalArray(temp)
end
########## static array:
function nestedStaticArray(input)
output = zeros(MVector{20,Int32})
for i in 1:20
output[i] = input[I][rand(1:5)]
end
return output
end
@timeit track "staticArrayNestedInput" for j in 1:10000000
temp = Vector{Any}(undef,20)
for k in 1:20
random = digits(rand(1:((2^5)-1)), base=2, pad=5)
temp[k] = MVector{5}(random)
end
inside = SVector{20}(temp)
nestedStaticArray(inside)
end
I used the package TimerOutputs to get the allocated memory and the time both functions needed. Surprisingly the version with the static arrays needed a lot more allocations and time.
You are benchmarking on global variables, and using Vector{Any}. Thatβs basically the two worst things you can do performance-wise
Make sure to put your code in functions, and then benchmark it using the BenchmarkTools package. And avoid Vector{Any} at all cost, that will kill performance completely.
I put my code into functions, so that temp and inside are no longer global, and I changed Vector{Any} into Vector{Vector{Int64}} .
using TimerOutputs
using StaticArrays
const track = TimerOutput()
########## normal array:
function nestedNormalArray(input)
output = zeros(20)
for i in 1:20
output[i] = input[i][sample(1:5)]
end
return output
end
function makeNormalNestedArray()
temp = Vector{Vector{Int64}}(undef,20)
for k in 1:20
random = digits(rand(1:((2^5)-1)), base=2, pad=5)
temp[k] = random
end
return temp
end
@timeit track "arrayNestedInput" for j in 1:10000000
nestedNormalArray(makeNormalNestedArray())
end
########## static array:
function nestedStaticArray(input)
output = zeros(MVector{20,Int32})
for i in 1:20
output[i] = input[i][sample(1:5)]
end
return output
end
function makeNestedStaticArray()
temp = Vector{Vector{Int64}}(undef,20)
for k in 1:20
random = digits(rand(1:((2^5)-1)), base=2, pad=5)
temp[k] = MVector{5}(random)
end
inside = SVector{20}(temp)
return inside
end
@timeit track "staticArrayNestedInput" for j in 1:10000000
nestedStaticArray(makeNestedStaticArray())
end
Not sure I understand. Does this make a difference? rand(input[i]) will select a random element of input[i], is that not what is wanted? Maybe you can add some more detail to your MWE?
I will try to give my MWE more detail. In general, my goal is to give my function nestedStaticArray(input, idx) a boolean vector input and a nested vector idx which has (inside each nested vector) the indexes which I want to retrieve from input with a for loop.
using TimerOutputs
using StaticArrays
const track = TimerOutput()
function nestedStaticArray(input, idx, reference)
output = zeros(MVector{20,Int32})
for i in 1:20
tempi = input[idx[i]]
t = 0
for u in 0:(length(tempi)-1)
t = t+ 2^u * tempi[u+1]
end
output[i] = reference[t+1]
end
return output
end
function makeNestedStaticArray()
temp = Vector{Vector{Int64}}(undef,20)
for k in 1:20
random = rand(1:5, 5)
temp[k] = MVector{5}(random)
end
inside = SVector{20}(temp)
return inside
end
function makeReferenceStatic()
output = zeros(MVector{2^5,Int32})
for i in 1:(2^5)
if i % 2 == 0
output[i] = 1
else
output[i] = 0
end
end
return output
end
@timeit track "staticArrayNestedInput" for j in 1:10000000
nestedStaticArray(digits(rand(1:((2^20)-1)), base=2, pad=20),makeNestedStaticArray(), makeReferenceStatic() )
end
I am giving my function nestedStaticArray a input vector of length 20 (boolean), in the ith iteration of the for loop I want to create a smaller boolean vector tempi consisting only of a subarray of input, namely input at the indexes of idx[i], the boolean vector tempi is then turned into a number of base 10 and output now gets the value of reference at this computed index. (The function makeReferenceStatic() is normally more complicated, but for the sake of this MWE I constructed it this way, and I also donβt do this: digits(rand(1:((2^20)-1)), base=2, pad=20), but I give my function a different boolean vector of length 20 each time I call the function)
Not sure if this does the same as yours, but itβs faster at least:
makeref() = iseven.(SOneTo(32))
rand5() = SVector{5}(rand(1:5) for _ in 1:5)
makenested() = SVector{20}(rand5() for _ in 1:20)
function nestedsarray(input, ind, ref)
output = MVector{20, Bool}(undef)
for i in eachindex(ind)
tempi = input[ind[i]]
t, u2 = 0, 1
for u in eachindex(tempi)
t += u2 * tempi[u]
u2 *= 2
end
output[i] = ref[t+1]
end
return SVector{20,Bool}(output)
end
nestedsarray(rand(SVector{20,Bool}), makenested(), makeref())
I recommend using BenchmarkTools instead of running your code 10 million times: