Hello, here’s something I don’t understand about Query.jl: After running a few query commands and calling collect
, I was expecting the result to be of type Array{MyObs{Float64}, 1}
, but got Array{MyObs,1}
instead. When I checked the type of each element of the array, it’s indeed of type MyObs{Float64}
. So why is collect
not returning Array{MyObs{Float64}, 1}
?
Reproducible example:
using LinearAlgebra, Random, StatsModels, Query, DataFrames
struct MyObs{T <: LinearAlgebra.BlasReal}
y::Vector{T}
X::Matrix{T}
Z::Matrix{T}
xty::Vector{T}
zty::Vector{T}
end
function MyObs(
y::Vector{T},
X::Matrix{T},
Z::Matrix{T}
) where T <: LinearAlgebra.BlasReal
xty = transpose(X) * y
zty = transpose(Z) * y
MyObs{T}(y, X, Z, xty, zty)
end
function myobs(data_obs, feformula::FormulaTerm, reformula::FormulaTerm)
y, X = StatsModels.modelcols(feformula, data_obs)
Z = StatsModels.modelmatrix(reformula, data_obs)
return MyObs(y, X, Z)
end
Random.seed!(1)
reps = 20; N = 10; p = 5; q = 2
X = Matrix{Float64}(undef, N*reps, p)
randn!(X)
rand_intercept = zeros(N*reps)
for j in 1:N
rand_intercept[(reps * (j-1) + 1) : reps * j] .= Random.randn(1)
end
y = X * ones(p) + rand_intercept + Random.randn(N*reps)
id = repeat(1:N, inner = reps)
dat = hcat(rename!(DataFrame(hcat(id)), [:id]), DataFrame(hcat(y, X)))
rename!(dat, Symbol.(["id", "y", "x1", "x2", "x3", "x4", "x5"]))
function test_id(subset_id::Vector{T}, x::T, k::Int) where T
# test whether each element of x is in subset_id
res = searchsortedfirst(subset_id, x) <= k
return res
end
k=5; subset_id = [1:1:5;]
feformula = @formula(y ~ 1 + x1 + x2 + x3 + x4 + x5)
reformula = @formula(y ~ 1)
feformula = apply_schema(feformula, schema(feformula, dat))
reformula = apply_schema(reformula, schema(reformula, dat))
Then running
obsvec = dat |> @groupby(_.id) |> @filter(test_id(subset_id, key(_), k)) |> @map(myobs(_, feformula, reformula)) |> collect
typeof(obsvec)
We get
Array{MyObs,1}