Hi everyone,
I am quite new to the language, and was wondering if people could help me optimising my piece of code. The idea for this array type is to reduce the total Base.summarysize for a vector of vector as much as possible. I did this by creating a type that has a “pointer” to the actual vector, like StridedArrays, but more minimal. The size of the array in StridedArrays does not really change.
struct FMat{N} <: AbstractArray{Int,1}
data::Vector{<:Integer}
@inline function FMat(vals::Vector{Vector{T}}) where {T}
N = size(vals, 1)
flattened = reduce(vcat, vals)
A = Array{T,1}(undef, N + 1 + size(flattened, 1))
A[1] = 1
A[N+2:end] = flattened
A[2:N+1] = accumulate(+, size(i, 1) for i in vals) .+ 1
new{N}(A)
end
@inline function FMat{N}(vals::Vector{<:Int}) where {N}
new{N}(vals)
end
end
Base.length(::FMat{N}) where {N} = N
Base.size(::FMat{N}) where {N} = (N,)
@inline function Base.getindex(A::FMat{N}, i::Int) where {N}
return A.data[A.data[i]+N+1:A.data[i+1]+N]
end
@inline function Base.getindex(A::FMat{N}, u::UnitRange) where {N}
i_start = u.start
i_stop = u.stop
@views indices = A.data[i_start:i_stop+1] .- A.data[i_start] .+ 1
a = vcat(indices, A.data[N+A.data[i_start]+1:N+A.data[i_stop+1]])
return FMat{length(u)}(a)
end
Base.setindex!(A::FMat{N}, ::Int, ::Int) where {N} = A
IndexStyle(::Type{<:FMat}) = IndexLinear()
The code is able to halve the total memory size of the vector of vectors, which I was quite happy with. But then I started to check the actual performance of the indexing, it turned out the indexing is ~5x slower.
running
elements = Vector{Vector{Int}}()
for i in 1:7
nrows = rand(UInt16) + 1
for _ in 1:nrows
push!(elements, abs.(rand(Int16, i)))
end
end
fm = FMat(elements)
julia> @btime fm[end]
190.954 ns (9 allocations: 240 bytes)
julia> @btime elements[end]
30.271 ns (1 allocation: 16 bytes)
if i instead change it to
julia> @btime @view fm[end]
28.810 ns (1 allocation: 48 bytes)
This of course is a big improvement, but I don’t want to keep writing @view everytime I want to get the element.
julia> @btime fm[end-5:end]
2.708 μs (29 allocations: 1.77 KiB)
julia> @btime elements[end-5:end]
118.097 ns (6 allocations: 192 bytes)
julia> @btime @view fm[end-5:end]
80.627 ns (3 allocations: 96 bytes)
julia> @btime @view elements[end-5:end]
94.805 ns (5 allocations: 128 bytes)
which in fact is a lot better. Any help would be greatly appreciated!