Strided array from array of struct


#1

I’ve got a Vector{Foo}, where Foo is some struct with fields a::Int, b::Float64, c::Symbol. How can I get a view corresponding to map(f->f.b, myvec)? In other words, how can I build a strided array that starts at &myvec+sizeof(Int) with stride sizeof(Foo)? Do I have to manually build a SubArray object?

Also, all I want is to tack on a DataFrame-like interface on top of Vector{Foo}. Is there any package that does that?


#2

Maybe not exactly what you want (it’s not a view), but what about getfield.(v, :b), where v is your Vector{Foo}.

UPDATE:

julia> @time map(f->f.b, v)
  0.078088 seconds (9.46 k allocations: 539.537 KiB)
3-element Array{Float64,1}:
 1.0
 2.0
 3.0

julia> @time getfield.(v, :b)
  0.000219 seconds (57 allocations: 1.891 KiB)
3-element Array{Float64,1}:
 1.0
 2.0
 3.0

#3

getfield.(v, :b) is very elegant, but it ought to be equivalent to map performance-wise, or else something is very wrong. You’re most likely measuring compilation time.


#4

Ignoring the “don’t benchmark in global scope” rule:

julia> mutable struct Foo
       a::Int
       b::Float64
       c::Symbol
       end

julia> v = Vector{Foo}(3);

julia> v[1] = Foo(1,1.0,:a);

julia> v[2] = Foo(2,2.0,:a);

julia> v[3] = Foo(3,3.0,:a);

julia> @time map(f->f.b, v);
  0.098778 seconds (42.10 k allocations: 2.451 MiB)

julia> @time map(f->f.b, v);
  0.067279 seconds (9.47 k allocations: 514.928 KiB)

julia> @time getfield.(v, :b);
  0.706285 seconds (189.01 k allocations: 10.101 MiB, 11.72% gc time)

julia> @time getfield.(v, :b);
  0.000211 seconds (57 allocations: 1.891 KiB)

#5

I thought the point of this thread was to get a view and not what the fastest way is to create a new vector with the b fields?


#6

Yes, I would like to get a view.


#7

I know this has been posted a while ago, but I tried to do what the OP asked for as an exercise.
I don’t know how efficient it is, but, here it is:

julia> struct Foo
        a::Int
        b::Float64
        c::Symbol
      end

julia> struct StructView{TA,TS,T,N,S} <: AbstractArray{T,N}
        data::TA
        function StructView(data::AbstractArray{TS,N}, S::Symbol) where {TS,N}
          S in fieldnames(TS) || throw(ArgumentError("Struct $(TS) has no field named $S"))
          T = typeof(getfield(data[1],S))
          TA = typeof(data)
          new{TA,TS,T,N,S}(data)
        end
      end

julia> @inline Base.size(a::StructView) = size(a.data)

julia> @inline Base.getindex(a::StructView{TA,TS,T,N,S}, i::Int) where {TA,TS,T,N,S} = getfield(a.data[i],S)

julia> @inline Base.getindex(a::StructView{TA,TS,T,N,S}, I::Vararg{Int,N}) where {TA,TS,T,N,S} = getfield(a.data[I...],S)

julia> Base.IndexStyle(::Type{StructView{TA,TS,T,N,S}}) where{TA,TS,T,N,S} = IndexStyle(TA)

julia> @inline Base.parent(a::StructView) = a.data

julia> a = Foo(0,1.,:S)
Foo(0, 1.0, :S)

julia> va = fill(a,10)
10-element Array{Foo,1}:
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)
 Foo(0, 1.0, :S)

julia> viewa = StructView(va,:a)
10-element StructView{Array{Foo,1},Foo,Int64,1,:a}:
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0

julia> IndexStyle(typeof(viewa))
IndexLinear()

julia> viewb = StructView(va,:b)
10-element StructView{Array{Foo,1},Foo,Float64,1,:b}:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

julia> viewc = StructView(va,:c)
10-element StructView{Array{Foo,1},Foo,Symbol,1,:c}:
 :S
 :S
 :S
 :S
 :S
 :S
 :S
 :S
 :S
 :S

julia> va[1] = Foo(22,22.,:"22")
Foo(22, 22.0, Symbol("22"))

julia> viewa
10-element StructView{Array{Foo,1},Foo,Int64,1,:a}:
 22
  0
  0
  0
  0
  0
  0
  0
  0
  0

julia> viewb
10-element StructView{Array{Foo,1},Foo,Float64,1,:b}:
 22.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0
  1.0

julia> viewc
10-element StructView{Array{Foo,1},Foo,Symbol,1,:c}:
 Symbol("22")
 :S
 :S
 :S
 :S
 :S
 :S
 :S
 :S
 :S 

#8

Another exercise:

julia> struct T
           a::Int
           b::Float64
       end

julia> strided_getindex(a::Array{T}, el_ind::Int, ::Val{f_ind}) where {f_ind} = unsafe_wrap(Array, reinterpret(Ptr{T.types[f_ind]}, pointer(a)) + fieldoffset(T, f_ind) + (sizeof(T))*(el_ind-1), (), false)[];

julia> a = [T(1, 2.), T(2, 3.), T(3, 4.)]
3-element Array{T,1}:
 T(1, 2.0)
 T(2, 3.0)
 T(3, 4.0)

julia> strided_getindex(a, 1, Val{1}())
1

julia> strided_getindex(a, 1, Val{2}())
2.0

julia> strided_getindex(a, 2, Val{1}())
2

julia> strided_getindex(a, 2, Val{2}())
3.0