import Base.size
import Base.IndexStyle
import Base.getindex
import Base.setindex!
using LinearAlgebra
using BenchmarkTools
struct Dummy{T} <: AbstractVector{T}
coeffs::Vector{T}
end
############### Base ###############################
size(c::Dummy{T}) where T = size(c.coeffs)
IndexStyle(::Type{<:Dummy}) = IndexLinear()
getindex(c::Dummy{T}, I::Int) where T = c.coeffs[I]
setindex!(c::Dummy{T}, v, I::Int) where T = setindex!(c.coeffs, v, I)
### Experiments
coeff = randn(Float32,10^9);
x = Dummy{Float32}(coeff);
@benchmark norm(x)
@benchmark norm(x.coeffs)
my example is here. I created an abstractvector called Dummy with its attribute coeffs. And then I set getindex/setindex to act directly on the coeffs attributed of Dummy. Now I found huge performance loss on norm of this AbstractVector Dummy, as
julia> @benchmark norm(x)
BenchmarkTools.Trial: 2 samples with 1 evaluation.
Range (min β¦ max): 2.956 s β¦ 2.959 s β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 2.958 s β GC (median): 0.00%
Time (mean Β± Ο): 2.958 s Β± 1.809 ms β GC (mean Β± Ο): 0.00% Β± 0.00%
β β
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
2.96 s Histogram: frequency by time 2.96 s <
Memory estimate: 16 bytes, allocs estimate: 1.
julia> @benchmark norm(x.coeffs)
BenchmarkTools.Trial: 17 samples with 1 evaluation.
Range (min β¦ max): 302.239 ms β¦ 303.657 ms β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 302.967 ms β GC (median): 0.00%
Time (mean Β± Ο): 302.929 ms Β± 426.119 ΞΌs β GC (mean Β± Ο): 0.00% Β± 0.00%
β β β β β ββ β ββ ββ β β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
302 ms Histogram: frequency by time 304 ms <
Memory estimate: 32 bytes, allocs estimate: 2.
May I get any suggestion on how to make the norm of this AbstractVector as fast as evaluating the norm of its coeffs? An overload of the norm would definitely work but it sounds hacky to me. Thanks much for your input!
I think you just want to define norm(x::Dummy, p) = norm(x.coeffs,p). The problem is that your version is falling back to the generic calculation rather than using BLAS.
Thanks for your quick reply. Then following your suggestion, I should overload every operation, e.g. dot, norm which BLAS has its own ways to make it faster, right? That is still a lot of hacky work and I really wonder if there is any easier way to deal with these things for AbstractArrays in Julia.
Sure, but what has that got to do with anything? Do you want norm to access these other fields? Then clearly you must write a method. But if all you want is for an existing norm method to be used, then one way to do that is by fitting into its signature.
Other fields have nothing to do with norm. So I did
import Base.size
import Base.IndexStyle
import Base.getindex
import Base.setindex!
import Base.pointer
using LinearAlgebra
using BenchmarkTools
struct Dummy{T} <: DenseVector{T}
coeffs::Vector{T}
end
############### Base ###############################
size(c::Dummy{T}) where T = size(c.coeffs)
IndexStyle(::Type{<:Dummy}) = IndexLinear()
getindex(c::Dummy{T}, I::Int) where T = c.coeffs[I]
setindex!(c::Dummy{T}, v, I::Int) where T = setindex!(c.coeffs, v, I)
pointer(c::Dummy{T}) where T = pointer(c.coeffs) # removing this line doesn't work either
### Experiments
coeff = randn(Float32,10^4);
x = Dummy{Float32}(coeff);
@benchmark norm(x)
@benchmark norm(x.coeffs)
and got this error for norm(x)
julia> norm(x)
ERROR: conversion to pointer not defined for Dummy{Float32}
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] unsafe_convert(#unused#::Type{Ptr{Float32}}, a::Dummy{Float32})
@ Base ./pointer.jl:67
[3] nrm2(n::Int64, X::Dummy{Float32}, incx::Int64)
@ LinearAlgebra.BLAS /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/LinearAlgebra/src/blas.jl:487
[4] nrm2(x::Dummy{Float32})
@ LinearAlgebra.BLAS /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/LinearAlgebra/src/blas.jl:493
[5] norm2
@ /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/LinearAlgebra/src/dense.jl:105 [inlined]
[6] norm(itr::Dummy{Float32}, p::Int64)
@ LinearAlgebra /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/LinearAlgebra/src/generic.jl:627
[7] norm(itr::Dummy{Float32})
@ LinearAlgebra /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/LinearAlgebra/src/generic.jl:625
[8] top-level scope
@ REPL[19]:1
What do I do? Sorry didnβt find any online resource about this
julia> Base.unsafe_convert(::Type{Ptr{T}}, A::Dummy{T}) where {T} =
Base.unsafe_convert(Ptr{T}, A.coeffs)
julia> @btime norm($x) # was 15.625 ΞΌs
min 12.416 ΞΌs, mean 12.647 ΞΌs (0 allocations)
99.28318f0
julia> @btime norm($(x.coeffs))
min 12.416 ΞΌs, mean 12.562 ΞΌs (0 allocations)
99.28318f0
julia> @which pointer(x) # no method added here
pointer(x::AbstractArray{T}) where T in Base at abstractarray.jl:1170
julia> @btime $(rand(10^4, 10^4)) * $x; # was 23.167 ms without BLAS
min 13.536 ms, mean 14.144 ms (4 allocations, 156.34 KiB)