Can you try
function createDataset(path)
N = sum(length(readdir("$path/$label")) for label in readdir(path))
X = zeros(Float32,28,28,N)
y = zeros(Float32,N)
i=1
for label in readdir(path)
for file in readdir("$path/$label")
img = load("$path/$label/$file")
chim = channelview(img)
@views data[:,:,i] .= chim
y[i] = parse(Float32,label)
i+=1
end
end
return X,y
end
how mmany img do you have ?
It is very slow indeed, Is it load thats so slow ?
I can cheat to go down to 1s (on my computer) for 60K images
function createDataset(path)
N = sum(length(readdir("$path/$label")) for label in readdir(path))
@info "$N Images found"
X = [Matrix{Float32}[] for _ in 1:Threads.nthreads()]
y = [Float32[] for _ in 1:Threads.nthreads()]
for label in readdir(path)
Threads.@threads :static for file in readdir("$path/$label")
img = load("$path/$label/$file")
chim = channelview(img)
i = Threads.threadid()
push!(X[i],chim)
push!(y[i],parse(Float32,label))
end
end
XX = zeros(Float32,28,28,N)
yy = zeros(Float32,N)
n = 1
for i in 1:Threads.nthreads()
for j in 1:length(X[i])
@views XX[:,:,n] .= X[i][j]
yy[n] = y[i][j]
n += 1
end
end
return XX,yy
end