df1 = DataFrame(XLSX.readtable("C:/Users/Brett/Documents/Soc332Assignment3.xlsx","TestData")...)
#change everything to Float64
for name in names(df1)
df1[!, name] = identity.(df1[:, name])
end
#change everything to real
function new_vector_correct_type(x)
T = reduce(promote_type, typeof.(x))
if T == Any
return identity.(x)
else
return T.(x)
end
end
Test_matrix= Matrix(df1)
function make_pivottable(Test_matrix)
cmap = countmap(Test_matrix)
k = string.(collect(keys(cmap)))
v = collect(values(cmap))
ptab = DataFrame([k v], [:key, :counts])
sort!(ptab, :counts, rev=true)
return ptab
end
Test_pivot= make_pivottable(Test_matrix)
When I try to run this, I get an error
UndefKeywordError: keyword argument dims not assigned
(::Base.var"#kw##sort")(::NamedTuple{(:alg,),Tuple{SortingAlgorithms.RadixSortAlg}}, ::typeof(sort), ::Array{Int64,2}) at none:0
addcounts_radixsort!(::Dict{Int64,Int64}, ::Array{Int64,2}) at counts.jl:348
#addcounts!#70 at counts.jl:262 [inlined]
#addcounts! at none:0 [inlined]
#countmap#73 at counts.jl:389 [inlined]
countmap at counts.jl:389 [inlined]
make_pivottable(::Array{Int64,2}) at Soc332Assignment3.jl:58
top-level scope at Soc332Assignment3.jl:67
Seems to have fixed that issue, now it’s saying the columns aren’t the same length, but not sure why since the original columns were the same length.
I’m not sure what’s going on. The code I posted above seems to work. Without a full MWE (see how I created the DataFrame above), its tough to help you.
f1 = DataFrame(XLSX.readtable("C:/Users/Brett/Documents/Soc332Assignment3.xlsx","TestData")...)
#change everything to Float64
for name in names(df1)
df1[!, name] = identity.(df1[:, name])
end
#change evrything to real
function new_vector_correct_type(x)
T = reduce(promote_type, typeof.(x))
if T == Any
return identity.(x)
else
return T.(x)
end
end
Test_matrix= Matrix(df1)
Test_contingency= freqtable(df, names(df1)...)
#change this over for my data
N = 608
function get_stats(df1, consume, marijuana)
Ns = freqtable(df1, consume, marijuana)
N = sum(Ns)
percents = Ns ./ N
names_Ns = names(Ns)
df = DataFrame()
df[:, marijuana] = names_Ns[1]
for name in names(Ns)[2]
df[:, Symbol(name, :_N)] = Ns[:, name]
df[:, Symbol(name, :_pct)] = percents[:, name]
end
return df1
end
t = freqtable(df, :consume, :marijuana)
get_stats(df, :consume, :marijuana)
PS. It would be really helpful if you could build your skills in generating DataFrames randomly to produce MWEs. I think it would make getting help a lot easier when people have to only copy and paste a single block of code.
Do you see how I’ve been generating random vectors to create my MWEs though? It would be really helpful if you did what I did above when asking for help.
I think you’re right. The problem is that at this stage I’m getting a bounds error. my dataset is [1,2,3,4,5,6],9] , and I think that’s because 9 is not sequential, not sure if it would would work better with 9 as a :string or NAN, since 9 represents a missing value.
I don’t understand what you mean by this. I don’t understand what a vector being “sequential” has to do with your problem. I would bet that if you isolated the problem to an MWE you would find that the code works.
I see. I should not have used df as both the name of the input data frame and the name of the output data frame in my function. I can see how that can result in errors. my apologies for writing sloppy code!
You did the correct thing by naming the input data frame df1 in your function.