I would not use DataFrame
for this task (you could do your DataFrames.jl code faster, but in this case it is better not to use it if you need performance).
Instead I recommend you use a Matrix
directly:
julia> function fun(t1)
colmap = Dict(x => i for (i, x) in enumerate(union(values(t1)...)))
mat = zeros(Int, length(t1), length(colmap))
for (i, x) in enumerate(values(t1))
for v in x
mat[i, colmap[v]] += 1
end
end
return mat
end
fun (generic function with 1 method)
julia> @time fun(t1)
0.115027 seconds (169.71 k allocations: 9.692 MiB, 99.98% compilation time)
3×6 Matrix{Int64}:
1 0 0 0 1 0
0 1 1 1 1 1
1 1 0 0 1 1
julia> @time fun(t1)
0.000015 seconds (17 allocations: 1.266 KiB)
3×6 Matrix{Int64}:
1 0 0 0 1 0
0 1 1 1 1 1
1 1 0 0 1 1