Hi all,
I am currently trying to build a recurrent neural network using Flux, and during training Julia stops mid-way without throwing an error. I am using JuliaPro with the atom ide.
julia> versioninfo()
Julia Version 1.4.2
Commit 44fa15b150* (2020-05-23 18:35 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i7-7567U CPU @ 3.50GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-8.0.1 (ORCJIT, skylake)
Environment:
JULIA_EDITOR = atom -a
JULIA_NUM_THREADS = 2
JULIA_PKG_SERVER = pkg.juliacomputing.com
JULIA_DEPOT_PATH = /Users/x/.juliapro/JuliaPro_v1.4.2-1:/Applications/JuliaPro-1.4.2-1.app/Contents/Resources/julia/Contents/Resources/julia/local/share/julia:/Applications/JuliaPro-1.4.2-1.app/Contents/Resources/julia/Contents/Resources/julia/share/julia
I also tried running the same script in a jupyter notebook, and set it to verbose and got this error:
[I 10:47:01.385 NotebookApp] KernelRestarter: restarting kernel (1/5), keep random ports
kernel 1762e7d4-0bc8-4d92-b15e-b018c3536f5c restarted
[E 10:47:01.418 NotebookApp] KernelRestarter: restart callback <bound method ZMQChannelsHandler.on_kernel_restarted of ZMQChannelsHandler(1762e7d4-0bc8-4d92-b15e-b018c3536f5c)> failed
Traceback (most recent call last):
File "/Users/mousaghannnam/opt/anaconda3/lib/python3.7/site-packages/jupyter_client/restarter.py", line 86, in _fire_callbacks
callback()
File "/Users/mousaghannnam/opt/anaconda3/lib/python3.7/site-packages/notebook/services/kernels/handlers.py", line 476, in on_kernel_restarted
self._send_status_message('restarting')
File "/Users/mousaghannnam/opt/anaconda3/lib/python3.7/site-packages/notebook/services/kernels/handlers.py", line 472, in _send_status_message
self.write_message(json.dumps(msg, default=date_default))
File "/Users/mousaghannnam/opt/anaconda3/lib/python3.7/site-packages/tornado/websocket.py", line 339, in write_message
raise WebSocketClosedError()
tornado.websocket.WebSocketClosedError
kernel 1762e7d4-0bc8-4d92-b15e-b018c3536f5c restarted
Have any of you seen anything like this? All of the code up to the training works fine, so perhaps it has something to do with the increased amount of processing needed. I appreciate any and all advice you can offer. Thank you!
(Edit): Here is the code:
using CSV #Add CSV package
using Flux
using Flux: onehot, chunk, batchseq, throttle, logitcrossentropy
using DataFrames
using Parameters: @with_kw
using Lathe.preprocess: TrainTestSplit
#Set WD
cd("/.../")
# Hyperparameter arguments
@with_kw mutable struct Args
lr::Float64 = 1e-2 # Learning rate
seqlen::Int = 50 # Length of batchseqences
nbatch::Int = 50 # number of batches text is divided into
throttle::Int = 30 # Throttle timeout
end
function getData()
#Load Data
myData = CSV.read("./some_data.csv") |> DataFrame!
#Function for creating dictionary of word frequencies
function Counter(d::DataFrame)
outp = Dict{String, Int64}()
for i = 1:size(myData,1)
for j in split(myData[i,1])
if haskey(outp, j)
outp[j] += 1
else
outp[j] = 1
end
end
end
return outp
end
lexiconFreq = Counter(myData) #Table of word frequencies
#Obtain the unique words
uniqueWords = collect(keys(lexiconFreq))
N = length(uniqueWords)
#Create oneHotVectors from these words
oneHotWords = map(word -> Flux.onehot(word, uniqueWords), uniqueWords)
#Create a dictionary of words to oneHotVectors
oneHotDict = Dict(uniqueWords .=> oneHotWords)
oneHotDict["1"] #Test
#Create a vector of one hot vectors, for each sentence
function getData(myData)
x = Tuple{Flux.OneHotMatrix,Int64}[]
for i = 1:size(myData, 1)
tempSentence = split(myData[i, 1])
tempMatrix = Flux.onehotbatch(tempSentence, uniqueWords)
tempTup = (tempMatrix, myData[i, 3])
push!(x, tempTup)
end
return x
end
#Create our input vector
inputData = getData(myData)
#Split up training and test data
#Creating df with just 1 and 3rd row, for binary task
trainD,testD = TrainTestSplit(inputData, 0.9)
trainData = inputData[trainD]
testData = inputData[testD]
return trainData, testData, N
end
function build_model(N)
return Chain(
LSTM(N, 128),
LSTM(128, 128),
Dense(128, N))
end
# Function to construct model
function train(; kws...)
# Initialize the parameters
args = Args(; kws...)
# Get Data
trainData, testData, N = getData()
# Constructing Model
m = build_model(N)
function loss(xs, ys)
l = sum(logitcrossentropy.(m.(xs), ys))
return l
end
## Training
opt = ADAM(args.lr)
tx, ty = testData[5]
evalcb = () -> @show loss(tx, ty)
Flux.train!(loss, params(m), trainData, opt, cb = throttle(evalcb, 15))
return m
end
m = train()