Pandas.series.str.extract equivalent?

dataframes

#1

How do I do the following pandas manipulation on a Julia Dataframe column:

>>> s = Series(['a1', 'b2', 'c3'])
>>> s.str.extract('(?P<letter>[ab])(?P<digit>\d)')
  letter digit
0      a     1
1      b     2
2    NaN   NaN

#2

I am not aware of an inbuilt method, but to get you started.
Julia support regexes and so the first parts would be:

data = ["a1", "b2", "c3"]
regex =  r"(?P<letter>[ab])(?P<digit>\d)"
result = match.(regex, data) # notice the "dot" after match
3-element Array{Any,1}:
 RegexMatch("a1", letter="a", digit="1")
 RegexMatch("b2", letter="b", digit="2")
 nothing                                

So now you have to convert the result array into a DataFrame
Which is a bit annoying and the below is not the most efficient way of doing it.

getnames(m::RegexMatch) = collect(values(Base,PCRE.capture_names(m.regex.regex)))
getnames(m::Void) = String[]

columns = Symbol.(unique(reduce(append!, getnames.(result))))
df = DataFrame(fill(String, length(columns)), columns, 0)
for rm in result
    if rm === nothing
        push!(df, fill(NA, length(colums))
        continue
    end
    row = Any[]
    for column in columns
        if column ∉ getnames(rm)
            push!(row, rm[column])
        else
            push!(row, NA)
        end
    end
    push!(df, row)
end

#3

@vchuravy looks rather tedious; thanks for the details nevertheless.