How do I obtain the `T` values in a vector `Vector{Union{Missing, T}}`?

Say I have a vector of billion length long and I want to retrieve the T-types values, and where it is missing, I want to replace it with ANY value. E.g.

x = [1, missing, 2]

getT(x)
# shoud return
# [1, T, 2]
# where T can be any value such tthat `T <: Int64`

What’s a fast way to do that? I think Missings.replace |> collect will be slow due to the need to replace.

coalesce maybe?

2 Likes

I think the implementation will do some ifs. BEst to just extract the underlying array of bytes… Hmm

it can be done inplace? or you need a new vector?

Need a new vector.

the fastest way that i found was to create a default type <: Real and overload the * operator, so when it encounters a missing, it returns the default val, my code is the following:


nn = 10000000
struct DefaultNumber{T<: Real} <: Real 
    val::T
end
default_number(x::Real) = DefaultNumber{typeof(x)}(x)

Base.:*(a1::DefaultNumber,a2::Real) = a2
Base.:*(a1::Real,a2::DefaultNumber) = *(a2,a1)
Base.:*(a1::DefaultNumber,a2::Missing) = a1.val
Base.:*(a1::Missing,a2::DefaultNumber) = a2.val

#create vector
x_old = Vector{Union{Int64,Missing}}(undef,nn)
for i in 1:nn
    rand() < 0.8 && (global x_old[i] = rand(Int64))
end

function replace_missing(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
    default_val = default_number(val)
    x_new = default_val.*x_old
end

function replace_missing2(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
    default_val = default_number(val)
    x_new = Vector{T2}(undef,length(x_old))
    @inbounds @simd for i in 1:length(x_old)
    x_new[i] = default_val*x_old[i]
    end
    return x_new
end

function replace_missing3(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
    return collect(Missings.replace(x_old, val))
end

function replace_missing4(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
    x_new = Vector{T2}(undef,length(x_old))
    @inbounds @simd for i in 1:length(x_old)
        if !ismissing(x_old[i])
        x_new[i] = x_old[i]
        else
            x_new[i] = val
        end
    end
    return x_new
end
julia> @btime replace_missing(x_old,2) #broadcasting default number
  39.520 ms (2 allocations: 76.29 MiB)

julia> @btime replace_missing2(x_old,2) #for loop with default number
  38.559 ms (2 allocations: 76.29 MiB)

 @btime replace_missing3(x_old,2) #using Missings.collect
  51.644 ms (3 allocations: 76.29 MiB)

julia> @btime replace_missing4(x_old,2) #for loop with if-else
  45.405 ms (2 allocations: 76.29 MiB)

julia> @btime coalesce.(x_old,2) #as suggested by tbeason
  39.303 ms (4 allocations: 76.29 MiB)

the problem with that is the Union{Missing,T} is not a isbits type

Does replace! works for you?

julia> replace!([1, missing, 2, 3, missing], missing => 0)
5-element Array{Union{Missing, Int64},1}:
 1
 0
 2
 3
 0

I still don’t see why coalesce.(x,0) isn’t what you want…?

4 Likes

yeah, is pretty fast too

1 Like