I am having a hard time finding the “minimal” type necessary for a vector and converting to it. By “minimal” I mean the type that can still hold all of the values without being too general. For example:
String > Float64 > Int
since Strings
can contain Float64
which in turn can contain Int
. I understand that not all Int
can be expressed exactly as Float64
but
julia> promote_type(Int, Float64)
Float64
My attempt so far is the following:
using Parsers
struct ConvType
T::DataType
needsmissing::Bool
end
function guesstype(v; n = 10000)
if n >= length(v)
vu = copy(v)
else
inds = sample(1:length(v), n, replace = false)
vu = v[inds]
end
missings = ismissing.(vu)
needsmissing = any(missings)
vu = vu[missings .== false]
min_T = Int
for val in vu
new_T = _promote(val)
if new_T <: AbstractFloat || new_T == String
min_T = new_T
end
min_T == String && break
end
return ConvType(min_T, needsmissing)
end
function _promote(s::String)
s = strip(s)
p = Parsers.tryparse(Float64, s)
if isnothing(p)
return String
else
return _promote(p)
end
end
function _promote(n::T) where T <: AbstractFloat
if round(n) == n
return Int
else
return T
end
end
function _promote(a::T) where T
return T
end
function convone(s::String, ::Type{T}) where T <: Integer
s = replace(s, r"\.\d*"=>"")
return Parsers.parse(T, s)
end
convone(s::String, ::Type{T}) where T <: Number = Parsers.parse(T, s)
function convone(n::T, ::Type{String}) where T <: Number
string(n)
end
convone(a::T, ::Type{T}) where T = a
function convone(a::T, ::Type{S}) where T<:Number where S<:Number
S(a)
end
convone(::Missing, ::Type{T}) = missing
function conveach(v)
T = guesstype(v)
for (i, val) in enumerate(v)
v[i] = convone(val, T.T)
end
OT = T.needsmissing ? Union{T.T, Missing} : T.T
Vector{OT}(v)
end
Here some example output
julia> conveach(Any[1, "123", 1., "1.00"])
4-element Array{Int64,1}:
1
123
1
1
julia> conveach(["1.1", "2", 1, 2])
4-element Array{Float64,1}:
1.1
2.0
1.0
2.0
julia> conveach(["a", "1", 1, 1.1])
4-element Array{String,1}:
"a"
"1"
"1"
"1.1"
I understand that Julia’s type system/hierarchy is extremely complicated but this code seems quite involved just to promote between String
, Float64
and Int
. Any suggestions on making this easier?