Dear all,
I want to reshape the wide data to panel data, here is an example.
julia> df = DataFrame(id = [1, 2, 3], gdp1999 = [2, 3, 4], gdp2000 = [3, 4, 5],
                      gdp2001 = [6, 7, 8], emp1999 = [3, 2, 1], emp2000 = [4, 7, 9],
                      emp2001 = [5, 4, 2])
3×7 DataFrame
 Row │ id     gdp1999  gdp2000  gdp2001  emp1999  emp2000  emp2001
     │ Int64  Int64    Int64    Int64    Int64    Int64    Int64
─────┼─────────────────────────────────────────────────────────────
   1 │     1        2        3        6        3        4        5
   2 │     2        3        4        7        2        7        4
   3 │     3        4        5        8        1        9        2
what I want is
 Row │ id     year       gdp     emp
     │ Int64  SubStrin…  Int64?  Int64?
─────┼──────────────────────────────────
   1 │     1  1999            2       3
   2 │     1  2000            3       4
   3 │     1  2001            6       5
   4 │     2  1999            3       2
   5 │     2  2000            4       7
   6 │     2  2001            7       4
   7 │     3  1999            4       1
   8 │     3  2000            5       9
   9 │     3  2001            8       2
 
            
              
            
           
          
            
            
              I try like this, But I don’t think it is a good way. Anyone has better idea to do this?
julia> df = DataFrame(id = [1, 2, 3], gdp1999 = [2, 3, 4], gdp2000 = [3, 4, 5],
                             gdp2001 = [6, 7, 8], emp1999 = [3, 2, 1], emp2000 = [4, 7, 9],
                             emp2001 = [5, 4, 2])
3×7 DataFrame
 Row │ id     gdp1999  gdp2000  gdp2001  emp1999  emp2000  emp2001
     │ Int64  Int64    Int64    Int64    Int64    Int64    Int64
─────┼─────────────────────────────────────────────────────────────
   1 │     1        2        3        6        3        4        5
   2 │     2        3        4        7        2        7        4
   3 │     3        4        5        8        1        9        2
julia> df1 = stack(df, Between(:gdp1999, :gdp2001), [:id], variable_name = :year, value_name = "gdp")
9×3 DataFrame
 Row │ id     year     gdp
     │ Int64  String   Int64
─────┼───────────────────────
   1 │     1  gdp1999      2
   2 │     2  gdp1999      3
   3 │     3  gdp1999      4
   4 │     1  gdp2000      3
   5 │     2  gdp2000      4
   6 │     3  gdp2000      5
   7 │     1  gdp2001      6
   8 │     2  gdp2001      7
   9 │     3  gdp2001      8
julia> @rtransform! df1 :year = SubString(:year, 4, 7)
9×3 DataFrame
 Row │ id     year       gdp
     │ Int64  SubStrin…  Int64
─────┼─────────────────────────
   1 │     1  1999           2
   2 │     2  1999           3
   3 │     3  1999           4
   4 │     1  2000           3
   5 │     2  2000           4
   6 │     3  2000           5
   7 │     1  2001           6
   8 │     2  2001           7
   9 │     3  2001           8
julia> df2 = stack(df, Between(:emp1999, :emp2001), [:id], variable_name = :year, value_name = :emp)
9×3 DataFrame
 Row │ id     year     emp
     │ Int64  String   Int64
─────┼───────────────────────
   1 │     1  emp1999      3
   2 │     2  emp1999      2
   3 │     3  emp1999      1
   4 │     1  emp2000      4
   5 │     2  emp2000      7
   6 │     3  emp2000      9
   7 │     1  emp2001      5
   8 │     2  emp2001      4
   9 │     3  emp2001      2
julia> @rtransform! df2 :year = SubString(:year, 4, 7)
9×3 DataFrame
 Row │ id     year       emp
     │ Int64  SubStrin…  Int64
─────┼─────────────────────────
   1 │     1  1999           3
   2 │     2  1999           2
   3 │     3  1999           1
   4 │     1  2000           4
   5 │     2  2000           7
   6 │     3  2000           9
   7 │     1  2001           5
   8 │     2  2001           4
   9 │     3  2001           2
julia> df = outerjoin(df1, df2, on = [:id, :year])
9×4 DataFrame
 Row │ id     year       gdp     emp
     │ Int64  SubStrin…  Int64?  Int64?
─────┼──────────────────────────────────
   1 │     1  1999            2       3
   2 │     2  1999            3       2
   3 │     3  1999            4       1
   4 │     1  2000            3       4
   5 │     2  2000            4       7
   6 │     3  2000            5       9
   7 │     1  2001            6       5
   8 │     2  2001            7       4
   9 │     3  2001            8       2
julia> sort!(df, :id)
9×4 DataFrame
 Row │ id     year       gdp     emp
     │ Int64  SubStrin…  Int64?  Int64?
─────┼──────────────────────────────────
   1 │     1  1999            2       3
   2 │     1  2000            3       4
   3 │     1  2001            6       5
   4 │     2  1999            3       2
   5 │     2  2000            4       7
   6 │     2  2001            7       4
   7 │     3  1999            4       1
   8 │     3  2000            5       9
   9 │     3  2001            8       2
 
            
              
            
           
          
            
            
              it is not exactly the most intuitive and direct procedure in the world, but so be it
dfp=permutedims(df,1,strict=false)
dfp1=select(dfp,2:4,:id=>ByRow(x->(x[1:3], x[4:7]))=>[:t,:year])
st=stack(dfp1,1:3)
unstack(st,[:variable,:year],:t,:value)
 
            
              
            
           
          
            
            
              an other way
dfp=permutedims(df,1,strict=false)
dfp2=select(dfp,2:4=>ByRow(Base.vect)=>:val,:id=>ByRow(x->(x[1:3], x[4:7]))=>[:t,:year])
flatten(unstack(dfp2,:year,:t,:val),[:gdp,:emp])
 
            
              
            
           
          
            
            
              julia> dfp=permutedims(df,1,strict=false)
ERROR: MethodError: no method matching permutedims(::DataFrame, ::Int64; strict=false)
Closest candidates are:
on DataFrames@1.3.3. Besides, permutedims needs the first column to by either symbols or strings, no? (edit : that is what the not yet officially released strict keyword is about)
             
            
              
            
           
          
            
            
              The kwarg strict set to false could manage that:
•  strict : if true (the default), an error will be raised if
julia> dfp=permutedims(df,1,strict=false)
6×4 DataFrame
 Row │ id       1      2      3     
     │ String   Int64  Int64  Int64
─────┼──────────────────────────────
   1 │ gdp1999      2      3      4
   2 │ gdp2000      3      4      5
   3 │ gdp2001      6      7      8
   4 │ emp1999      3      2      1
   5 │ emp2000      4      7      9
   6 │ emp2001      5      4      2
julia> dfp=permutedims(df,1)
ERROR: ArgumentError: all elements of src_namescol must support conversion to String
this the status
(v1.7) pkg> st DataFrames
      Status `C:\Users\sprmn\.julia\v1.7\Project.toml`
  [a93c6f00] DataFrames v1.4.0 `https://github.com/JuliaData/DataFrames.jl.git#main`
 
            
              1 Like 
            
            
           
          
            
            
              Right. That’s handy of course.
But it’s a yet unreleased feature. You are on the main branch, not a release 
             
            
              
            
           
          
            
            
              try this
dfs=stack(df,2:7)
dfp1=select(dfs,[1,3],:variable=>ByRow(x->(x[1:3], x[4:7]))=>[:t,:year])
unstack(dfp1,[:id,:year],:t,:value)
PS
             
            
              2 Likes 
            
            
           
          
            
            
              
 Raymond:
 
julia> df = DataFrame(id = [1, 2, 3], gdp1999 = [2, 3, 4], gdp2000 = [3, 4, 5],
                      gdp2001 = [6, 7, 8], emp1999 = [3, 2, 1], emp2000 = [4, 7, 9],
                      emp2001 = [5, 4, 2])
 
 
Thanks very much. it works well.
             
            
              
            
           
          
            
            
              
using DataFrames, DataFramesMeta
df = DataFrame(id=[1, 2, 3],
    gdp1999=[2, 3, 4], gdp2000=[3, 4, 5], gdp2001=[6, 7, 8],
    emp1999=[3, 2, 1], emp2000=[4, 7, 9], emp2001=[5, 4, 2],
    infla1999=[2, 1, 4], infla2000=[3, 4, 5], infla2001=[5, 6, 7])
df1 = stack(df, Not(:id))
@rselect! df1 :id :value :t = :variable[1:end-4] :year = :variable[end-3:end]
unstack(df1, [:id, :year], :t, :value)
Thanks so much.
             
            
              
            
           
          
            
            
              one more small generalization beyond yours, which works with version 1.4.0 # main.
julia> df = DataFrame(id = [1, 2, 3], gdp1999 = [2, 3, 4], gdp2000 = [3, 4, 5], gdp2001 = [6, 7, 8], 
                      emp1999 = [3, 2, 1], emp2000 = [4, 7, 9], emp2001 = [5, 4, 2],
                      infla1999=[2, 1, 4], infla2002=[3, 4, 5], infla2001=[5, 6, 7])
julia> unstack(dfp1,[:id,:year],:t,:value)
12×5 DataFrame
 Row │ id     year    gdp      emp      infla
     │ Int64  String  Int64?   Int64?   Int64?
─────┼──────────────────────────────────────────
   1 │     1  1999          2        3        2
   2 │     2  1999          3        2        1
   3 │     3  1999          4        1        4
   4 │     1  2000          3        4  missing
   5 │     2  2000          4        7  missing
   6 │     3  2000          5        9  missing
   7 │     1  2001          6        5        5
   8 │     2  2001          7        4        6
   9 │     3  2001          8        2        7
  10 │     1  2002    missing  missing        3
  11 │     2  2002    missing  missing        4
  12 │     3  2002    missing  missing        5
julia> unstack(dfp1,[:id,:year],:t,:value, fill="\U1F64A")
12×5 DataFrame
 Row │ id     year    gdp  emp  infla 
     │ Int64  String  Any  Any  Any   
─────┼────────────────────────────────
   1 │     1  1999    2    3    2
   2 │     2  1999    3    2    1
   3 │     3  1999    4    1    4
   4 │     1  2000    3    4    🙊
   5 │     2  2000    4    7    🙊
   6 │     3  2000    5    9    🙊
   7 │     1  2001    6    5    5
   8 │     2  2001    7    4    6
   9 │     3  2001    8    2    7
  10 │     1  2002    🙊   🙊   3
  11 │     2  2002    🙊   🙊   4
  12 │     3  2002    🙊   🙊   5
 
            
              
            
           
          
            
              
                pmarg  
              
                  
                    June 24, 2022, 10:43am
                   
                  12 
               
             
            
              I have written a function that uses column names instead of numbers and can reshape multiple groups at the same time:
function pivot_longer(df::AbstractDataFrame,bycol::Symbol,cols::Vector{Symbol};period = :Period)
    # temp = nothing
     df_longer = nothing
     m = nothing
    for col ∈ cols
        temp = select(df,Regex(String(col)))
        for colname ∈ names(temp)
            m = match(Regex("($(col))(?<Value>\\d+)"),String(colname))
        if m == nothing
            println("Column $(col) has no regex match")
        else
            rename!(temp,colname=>Symbol(m[2]))
        end
        end
        temp[!,bycol] = df[!,bycol]
        temp = stack(temp,Not(bycol),variable_eltype=String)
        rename!(temp,:variable => period, :value => Symbol(m[1]))
        if df_longer == nothing
            df_longer = temp
        else
            df_longer = outerjoin(df_longer, temp, on = [bycol,period],makeunique=true)
        end
    end
    return df_longer
end
Results to:
julia> pivot_longer(df,:id,[:gdp, :emp],period=:year)
9×4 DataFrame
 Row │ id     year    gdp     emp
     │ Int64  String  Int64?  Int64?
─────┼───────────────────────────────
   1 │     1  1999         2       3
   2 │     2  1999         3       2
   3 │     3  1999         4       1
   4 │     1  2000         3       4
   5 │     2  2000         4       7
   6 │     3  2000         5       9
   7 │     1  2001         6       5
   8 │     2  2001         7       4
   9 │     3  2001         8       2
 
            
              1 Like 
            
            
           
          
            
            
              
 rocco_sprmnt21:
 
 df = DataFrame(id = [1, 2, 3], gdp1999 = [2, 3, 4], gdp2000 = [3, 4, 5], gdp2001 = [6, 7, 8], 
                      emp1999 = [3, 2, 1], emp2000 = [4, 7, 9], emp2001 = [5, 4, 2],
                      infla1999=[2, 1, 4], infla2002=[3, 4, 5], infla2001=[5, 6, 7])
 
 
This function is very useful. Thanks.
             
            
              
            
           
          
            
            
              This function is very useful. Thanks.