Hello, I define the following custom type which basically is a wrapper but with the size of the array as a type parameter
module test
using BenchmarkTools
struct Lattice{S, T, N} <: AbstractArray{T, N}
lattice::AbstractArray{T, N}
function Lattice(λ::AbstractArray{T,N}) where {T,N}
new{Tuple{size(λ)...}, T, N}(λ)
end
end
Base.length(Λ::Lattice) = length(Λ.lattice)
Base.eltype(Λ::Lattice{S, T, N}) where {S, T, N} = T
Base.ndims(Λ::Lattice{S, T, N}) where {S, T, N} = N
Base.size(Λ::Lattice) = size(Λ.lattice)
Base.size(Λ::Lattice, i::Integer) = size(Λ.lattice, i)
Base.getindex(Λ::Lattice, i...) = Λ.lattice[i...]
Base.setindex!(Λ::Lattice, value, i...) = Λ.lattice[i...] = value
Base.view(Λ::Lattice, i...) = view(Λ.lattice, i...)
const λ = rand([-1,1,0], 4, 4)
const Λ = Lattice(λ)
## testing performance
summation(A) = sum(A)
@btime summation(λ)
@btime summation(Λ)
@code_llvm sum(λ)
@code_llvm sum(Λ)
end
The output is:
WARNING: replacing module test
56.554 ns (0 allocations: 0 bytes)
37.156 μs (33 allocations: 1.14 KiB)
define i64 @julia_sum_64840(i8** dereferenceable(40)) #0 !dbg !5{
top:
%1 = call i64 @julia__mapreduce_64637(i8** nonnull %0)
ret i64 %1
}
define i8** @julia_sum_64842(i8** dereferenceable(8)) #0 !dbg !5{
top:
%1 = call i8** @julia_mapfoldl_64828(i8** inttoptr (i64 139717740967168 to i8**), i8** nonnull %0)
ret i8** %1
}
So I see that something is different for my type but I cannot understand what makes it work 1000 times slower… Could someone help me to get the bottleneck of this definition?
Edit: when adding mutable ot hte struct definition I get 30x speed up but still much slower:
WARNING: replacing module test
56.555 ns (0 allocations: 0 bytes)
3.667 μs (17 allocations: 576 bytes)
define i64 @julia_sum_65143(i8** dereferenceable(40)) #0 !dbg !5{
top:
%1 = call i64 @julia__mapreduce_64637(i8** nonnull %0)
ret i64 %1
}
define i8** @julia_sum_65145(i8** dereferenceable(8)) #0 !dbg !5{
top:
%1 = call i8** @julia_mapfoldl_65131(i8** inttoptr (i64 139717740967168 to i8**), i8** nonnull %0)
ret i8** %1
}