Hi SimpleChains.jl people!
I am trying to build a contrastive classifier on small dimensional data. I built the custom loss function, following the example in the SimpleChains docs. valgrad!
doesn’t error EDIT: but doesn’t touch the gradients I feed it. I also hit a method error in train_unbatched.
ERROR: MethodError: no method matching StrideArraysCore.PtrArray(::Nothing)
Closest candidates are:
StrideArraysCore.PtrArray(::Union{StrideArray, StrideArraysCore.StrideBitArray}) at ~/.julia/packages/StrideArraysCore/VQxXL/src/stridearray.jl:189
StrideArraysCore.PtrArray(::Ptr{T}, ::Tuple{Vararg{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}, N}}) where {T, N} at ~/.julia/packages/StrideArraysCore/VQxXL/src/ptr_array.jl:178
StrideArraysCore.PtrArray(::Ptr, ::Tuple{Vararg{Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8, Static.StaticInt}}}, ::Static.StaticInt{1}) at ~/.julia/packages/StrideArraysCore/VQxXL/src/ptr_array.jl:184
...
Stacktrace:
[1] update!(g::StrideArray{Tuple{Static.StaticInt{707}, Static.StaticInt{4}}, (true, false), Float32, 2, 1, 0, (1, 2), Tuple{Static.StaticInt{4}, Int64}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}, Vector{Float32}}, opt::SimpleChains.ADAM, Xp::StrideArraysCore.PtrArray{Tuple{Static.StaticInt{4}, Int64}, (true, true), Float64, 2, 1, 0, (1, 2), Tuple{Static.StaticInt{8}, Static.StaticInt{32}}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}}, layers::Tuple{TurboDense{true, Static.StaticInt{32}, typeof(tanh)}, TurboDense{true, Static.StaticInt{16}, typeof(tanh)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, ContrastiveCrossEntropyLoss{SampleContrast, Vector{SampleContrast}}}, pen::NoPenalty{SimpleChain{Tuple{Static.StaticInt{4}}, Tuple{TurboDense{true, Static.StaticInt{32}, typeof(tanh)}, TurboDense{true, Static.StaticInt{16}, typeof(tanh)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, ContrastiveCrossEntropyLoss{SampleContrast, Vector{SampleContrast}}}}}, sx::Tuple{Static.StaticInt{4}, Int64}, p::StrideArraysCore.StaticStrideArray{Tuple{Static.StaticInt{707}}, (true,), Float32, 1, 1, 0, (1,), Tuple{Static.StaticInt{4}}, Tuple{Static.StaticInt{1}}, 707}, pm::Ptr{UInt8}, optbuffer::Tuple{StrideArraysCore.PtrArray{Tuple{Static.StaticInt{707}}, (true,), Float32, 1, 1, 0, (1,), Tuple{Static.StaticInt{4}}, Tuple{Static.StaticInt{1}}}, StrideArraysCore.PtrArray{Tuple{Static.StaticInt{707}}, (true,), Float32, 1, 1, 0, (1,), Tuple{Static.StaticInt{4}}, Tuple{Static.StaticInt{1}}}, StrideArraysCore.PtrArray{Tuple{Static.StaticInt{2}}, (true,), Float64, 1, 1, 0, (1,), Tuple{Static.StaticInt{8}}, Tuple{Static.StaticInt{1}}}}, mpt::Int64)
@ SimpleChains ~/.julia/packages/SimpleChains/fifFm/src/optimize.jl:124
[2] train_unbatched_core!(c::SimpleChain{Tuple{Static.StaticInt{4}}, Tuple{TurboDense{true, Static.StaticInt{32}, typeof(tanh)}, TurboDense{true, Static.StaticInt{16}, typeof(tanh)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, ContrastiveCrossEntropyLoss{SampleContrast, Vector{SampleContrast}}}}, pu::Ptr{UInt8}, g::StrideArray{Tuple{Static.StaticInt{707}, Static.StaticInt{4}}, (true, false), Float32, 2, 1, 0, (1, 2), Tuple{Static.StaticInt{4}, Int64}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}, Vector{Float32}}, pX::StrideArraysCore.PtrArray{Tuple{Static.StaticInt{4}, Int64}, (true, true), Float64, 2, 1, 0, (1, 2), Tuple{Static.StaticInt{8}, Static.StaticInt{32}}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}}, p::StrideArraysCore.StaticStrideArray{Tuple{Static.StaticInt{707}}, (true,), Float32, 1, 1, 0, (1,), Tuple{Static.StaticInt{4}}, Tuple{Static.StaticInt{1}}, 707}, opt::SimpleChains.ADAM, iters::Int64, mpt::Int64)
@ SimpleChains ~/.julia/packages/SimpleChains/fifFm/src/optimize.jl:342
[3] with_heap_memory
@ ~/.julia/packages/SimpleChains/fifFm/src/memory.jl:36 [inlined]
[4] with_memory
@ ~/.julia/packages/SimpleChains/fifFm/src/memory.jl:47 [inlined]
[5] train_unbatched!(g::StrideArray{Tuple{Static.StaticInt{707}, Static.StaticInt{4}}, (true, false), Float32, 2, 1, 0, (1, 2), Tuple{Static.StaticInt{4}, Int64}, Tuple{Static.StaticInt{1}, Static.StaticInt{1}}, Vector{Float32}}, p::StrideArraysCore.StaticStrideArray{Tuple{Static.StaticInt{707}}, (true,), Float32, 1, 1, 0, (1,), Tuple{Static.StaticInt{4}}, Tuple{Static.StaticInt{1}}, 707}, _chn::SimpleChain{Tuple{Static.StaticInt{4}}, Tuple{TurboDense{true, Static.StaticInt{32}, typeof(tanh)}, TurboDense{true, Static.StaticInt{16}, typeof(tanh)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, TurboDense{true, Static.StaticInt{1}, typeof(identity)}, ContrastiveCrossEntropyLoss{SampleContrast, Vector{SampleContrast}}}}, X::Matrix{Float64}, opt::SimpleChains.ADAM, t::Int64)
@ SimpleChains ~/.julia/packages/SimpleChains/fifFm/src/optimize.jl:399
New to machine learning, SIMD, and this is beyond my pay grade.
EDIT: Note that my targets, y are half the length of the number of outputs because of this whole weird contrast loss I’m trying out. One target for each pair of inputs. Maybe? I have to let SimpleShains know about this considering the code in src\loss
for the regular cross-ent:
function layer_output_size(::Val{T}, sl::LogitCrossEntropyLoss, s::Tuple) where {T}
_layer_output_size_needs_temp_of_equal_len_as_target(Val{T}(), sl, s)
end
function forward_layer_output_size(::Val{T}, sl::LogitCrossEntropyLoss, s) where {T}
_layer_output_size_needs_temp_of_equal_len_as_target(Val{T}(), sl, s)
end
Don’t really know what the loss layer is doing and what needs to be defined.
If some code context is needed I can show you my custom loss. Trying to use this for reenforcement learning. Mostly copied and pasted docs.
mutable struct ChainActor{C,P,G,F}
β::Float64 # β is a component of the actor.
chain::C
params::P
grads::G
prefun::F
end
function train!(ca::ChainActor, memory; grad_steps = length(memory))
(x,y) = make_xy(memory; prefun = ca.prefun)
chain_loss = SimpleChains.add_loss(ca.chain, ContrastiveCrossEntropyLoss(y))
SimpleChains.valgrad!(ca.grads, chain_loss, x, ca.params)
#SimpleChains.train_unbatched!(ca.grads, ca.params, chain_loss, x, SimpleChains.ADAM(), grad_steps);
# I switch to training unbatched here with even just a single grad_step and MethodError.
end
function (a::ChainActor)(state, action)
x = a.prefun(vcat(state, action, a.β)) # remove time in pendulum example
return a.chain(x, a.params)[1]
end
function init_chain_actor(arch...; prefun = identity, ndims = 1)
chain = SimpleChain(static(ndims), arch..., TurboDense(identity, 1)) # must terminate with a single dimension
params = SimpleChains.init_params(chain)
grads = SimpleChains.alloc_threaded_grad(chain)
ChainActor(1.0, chain, params, grads, prefun)
end
struct ContrastiveCrossEntropyLoss{T,Y<:AbstractVector{T}} <: SimpleChains.AbstractLoss{T}
targets::Y
end
function calculate_loss(loss::ContrastiveCrossEntropyLoss, logits)
# logits is an even number of outputs for the neural net
y = loss.targets
total_loss = zero(Float64)
for ii in eachindex(y)
Δε = logits[2*ii - 1] - logits[2*ii]
total_loss += contrast_loss(y[ii],Δε)
end
total_loss
end
function SimpleChains.layer_output_size(::Val{T}, sl::ContrastiveCrossEntropyLoss, s::Tuple) where {T}
SimpleChains._layer_output_size_no_temp(Val{T}(), sl, s)
end
function SimpleChains.forward_layer_output_size(::Val{T}, sl::ContrastiveCrossEntropyLoss, s) where {T}
SimpleChains._layer_output_size_no_temp(Val{T}(), sl, s)
end
function (loss::ContrastiveCrossEntropyLoss)(previous_layer_output::AbstractArray, p::Ptr, pu)
total_loss = calculate_loss(loss, previous_layer_output)
total_loss, p, pu
end
function SimpleChains.chain_valgrad!(
__,
previous_layer_output::AbstractArray{T},
layers::Tuple{ContrastiveCrossEntropyLoss},
_::Ptr,
pu::Ptr{UInt8},
) where {T}
loss = getfield(layers, 1)
total_loss = calculate_loss(loss, previous_layer_output)
y = loss.targets
# Store the backpropagated gradient in the previous_layer_output array.
for i in eachindex(y)
# Get the value of the last logit
e1 = previous_layer_output[2*i-1]
e2 = previous_layer_output[2*i]
sign_arg = Float32(contrast_grad(y[i], e1-e2))
if isnan(sign_arg)
println("badloss")
end
previous_layer_output[2i-1] = sign_arg
previous_layer_output[2i] = - sign_arg
end
return total_loss, previous_layer_output, pu
end
thanks for the help