From the point of view of the function itself, I do not know if you can get faster than the simple loop:
julia> function nothing_to_missing!(x)
for i in eachindex(x)
if isnothing(x[i])
x[i] = missing
end
end
end
However, the performance is very much dependent on the type of vector:
# Vector Any[]
julia> @btime nothing_to_missing!(x) setup=(x=Any[isodd(i) ? nothing : 1 for i in 1:1000 ]) evals=1
4.905 μs (0 allocations: 0 bytes)
# Vector Union{Int,Nothing,Missing}
julia> @btime nothing_to_missing!(x) setup=(x=Union{Int,Nothing,Missing}[isodd(i) ? nothing : 1 for i in 1:1000 ]) evals=1
1.343 μs (0 allocations: 0 bytes)
This for arrays. I do not know if dataframes have some specific behavior concerning these values.
Edit: With a DataFrame it seems to be much slower. But I am not completely sure if this benchmark makes sense, I am not a regular user of data frames:
julia> function nothing_to_missing!(df,col)
for i in eachindex(df[col])
if isnothing(df[col][i])
df[col][i] = missing
end
end
end
julia> @btime nothing_to_missing!(df,1) setup=(df=DataFrame([Any[isodd(i) ? nothing : 1 for i in 1:1000 ]],[:x])) evals=1
145.723 μs (1978 allocations: 46.52 KiB)
julia> @btime nothing_to_missing!(df,1) setup=(df=DataFrame([Union{Int,Missing,Nothing}[isodd(i) ? nothing : 1 for i in 1:1000 ]],[:x])) evals=1
127.832 μs (1978 allocations: 46.52 KiB)