Say I have a vector of billion length long and I want to retrieve the T-types values, and where it is missing, I want to replace it with ANY value. E.g.
x = [1, missing, 2]
getT(x)
# shoud return
# [1, T, 2]
# where T can be any value such tthat `T <: Int64`
What’s a fast way to do that? I think Missings.replace |> collect will be slow due to the need to replace.
the fastest way that i found was to create a default type <: Real and overload the * operator, so when it encounters a missing, it returns the default val, my code is the following:
nn = 10000000
struct DefaultNumber{T<: Real} <: Real
val::T
end
default_number(x::Real) = DefaultNumber{typeof(x)}(x)
Base.:*(a1::DefaultNumber,a2::Real) = a2
Base.:*(a1::Real,a2::DefaultNumber) = *(a2,a1)
Base.:*(a1::DefaultNumber,a2::Missing) = a1.val
Base.:*(a1::Missing,a2::DefaultNumber) = a2.val
#create vector
x_old = Vector{Union{Int64,Missing}}(undef,nn)
for i in 1:nn
rand() < 0.8 && (global x_old[i] = rand(Int64))
end
function replace_missing(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
default_val = default_number(val)
x_new = default_val.*x_old
end
function replace_missing2(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
default_val = default_number(val)
x_new = Vector{T2}(undef,length(x_old))
@inbounds @simd for i in 1:length(x_old)
x_new[i] = default_val*x_old[i]
end
return x_new
end
function replace_missing3(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
return collect(Missings.replace(x_old, val))
end
function replace_missing4(x_old::T,val::T2) where T<:AbstractVector{Union{T2,Missing}} where T2
x_new = Vector{T2}(undef,length(x_old))
@inbounds @simd for i in 1:length(x_old)
if !ismissing(x_old[i])
x_new[i] = x_old[i]
else
x_new[i] = val
end
end
return x_new
end
julia> @btime replace_missing(x_old,2) #broadcasting default number
39.520 ms (2 allocations: 76.29 MiB)
julia> @btime replace_missing2(x_old,2) #for loop with default number
38.559 ms (2 allocations: 76.29 MiB)
@btime replace_missing3(x_old,2) #using Missings.collect
51.644 ms (3 allocations: 76.29 MiB)
julia> @btime replace_missing4(x_old,2) #for loop with if-else
45.405 ms (2 allocations: 76.29 MiB)
julia> @btime coalesce.(x_old,2) #as suggested by tbeason
39.303 ms (4 allocations: 76.29 MiB)