Types and gradients, including Forward.gradient

That looks promising. Here’s a working example of the lasso:

using ReverseDiff: compile_gradient

# simulated data
nind = 1000
nvar = 5000
x = randn(nind,nvar)'
y = sum(x[1:5,:],1) .+ randn(nind)'*0.1
oneind = ones(nind)
x = vcat(oneind',x) # add mean indicator
p = size(x,1)
n = size(x,2)
w = 0.0001*randn(1,p)
output = similar(w)

# squared error loss function
loss(w) = sum(abs2.(y - w * x)) / size(y, 2)
loss∇! = compile_gradient(loss, randn(1,p))

# proximal gradient descent function with soft thresholding
function train(w, output, x, y,lambda ; lr=.1)
        loss∇!(output, w)
        w -= lr * output
        w[(w.<(lr * lambda))&(w.>-(lr * lambda))] = 0
    return w
end

# iterator that collects the weights, and prints the loss
niter = 25
lambda = 1.0
for i=1:niter; w = train(w, output, x, y, lambda);
  println(loss(w)); end