Oh, sorry I though I those snippets should be enough, thank you for the for loop it looked like something I wanted but after modifying it (so basically going around extract_name
) it hasn’t worked for me giving blanck outputs:
just modified for loop:
p = plot()
for sdf in groupby(df, :img_name)
histogram!(sdf.f_circ)
end
p.show()
the whole code:
using CSV, DataFrames, Statistics, RCall, Plots; pyplot();
folder_path = ".../file.csv"
#read csv and rename the Cols of it
df = CSV.read(folder_path, copycols = true, header = 0)
hd = Dict("Col1" =>"slice", "Col2" =>"count_",
"Col3" => "img_name", "Col4" => "circ")
rename!(df, [Symbol("Col$i") for i in 1:size(df,2)])
rename!(df, hd)
#TIDYING DATA:
#1. shorten redundant names with the help of manually composed Dict:
unique_names = Array(unique!(df.img_name))
sort(unique_names)#copied names from unique_names later
name_dict = Dict(
"Labeling[\nname=ImageA0.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA0",
"Labeling[\nname=ImageA1.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA1",
"Labeling[\nname=ImageA2.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA2",
"Labeling[\nname=ImageA3.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA3",
"Labeling[\nname=ImageA4.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA4",
"Labeling[\nname=ImageA5.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA5",
"Labeling[\nname=ImageA6.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA6",
"Labeling[\nname=ImageA7.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA7",
"Labeling[\nname=ImageA8.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA8",
"Labeling[\nname=ImageA9.tif;\nsource=;\ndimensions=512,512 (X,Y)]"=> "ImageA9")
replace!(df[!, :img_name], name_dict...)
df.slice = replace.(df[!, :slice], "\nsource=;" => "")
df.slice = replace.(df[!, :slice], "\ndimensions" => "dim")
df.slice = replace.(df[!, :slice], "\npixel" => "")
println("\t **after** renaming and before parsing: ", first(df,5))
#2. Preparing for parsing: replacing dots for comma to parse as Float later
df.f_circ = replace.(df[:, :circ], "0," => "0.")
for i in 1:size(df,1)
if startswith.(df[i, :circ], "0,") || startswith.(df[i, :circ], "1,")
df.f_circ[i] = replace.(df[i, :circ], "0," => "0.")
df.f_circ[i] = replace.(df[i, :f_circ], "1," => "1.")
end
end
#Investigation
df[!,:f_circ] = parse.(Float16,df[!, :f_circ])
println("\t **after** renaming and parsing: ",first(df,5))
#groupby for loop **here**
output of the last of investiagion println
(hence outlook of data in df)
**after** renaming and parsing: 5×5 DataFrame
│ Row │ slice │ count_ │ img_name │ circ │ f_circ │
│ │ String │ Int64 │ String │ String │ Float16 │
├─────┼──────────────────────────────────────────────────────────────┼────────┼──────────┼─────────────┼─────────┤
│ 1 │ Image[\nname=1;dim=14,14 (X,Y);\nmin=325,99; type=BitType)] │ 1 │ ImageA7 │ 0,733056435 │ 0.733 │
│ 2 │ Image[\nname=2;dim=19,20 (X,Y);\nmin=196,123; type=BitType)] │ 2 │ ImageA6 │ 0,787814224 │ 0.7876 │
│ 3 │ Image[\nname=3;dim=14,12 (X,Y);\nmin=50,138; type=BitType)] │ 3 │ ImageA4 │ 0,749961791 │ 0.75 │
│ 4 │ Image[\nname=4;dim=13,14 (X,Y);\nmin=339,141; type=BitType)] │ 4 │ ImageA5 │ 0,715588033 │ 0.716 │
│ 5 │ Image[\nname=5;dim=1,1 (X,Y);\nmin=474,143; type=BitType)] │ 5 │ ImageA1 │ 0 │ 0.0 │
sorry for the general messy thread and thanks for looking replies