Thatās really cool!
Iāve been wanting to experiment more with autodiff, so thereās an excuse.
Hereās a Julia translation of Bob Carpenterās opening post:
import FunctionWrappers: FunctionWrapper
const adj_t = Vector{Float64}
const chain_t = FunctionWrapper{Cvoid,Tuple{adj_t}}
const stack_ = chain_t[]
const next_idx_ = Ref{Int}(0)
@inline next_idx() = next_idx_[] += 1
struct var{T,int}
val::T
idx::int
end
var(v::var) = var(v.val, v.idx)
var(val) = var(val, next_idx())
var() = var(NaN, next_idx())
@inline function Base.:+(x1::var, x2::var)
y = var(x1.val + x2.val)
let x1i = x1.idx, x2i = x2.idx, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x1i] += adj[yid]
adj[x2i] += adj[yid]
nothing
end))
end
y
end
@inline function Base.:+(x1::var, x2)
y = var(x1.val + x2)
let x1i = x1.idx, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x1i] += adj[yid]
nothing
end))
end
y
end
@inline function Base.:+(x1, x2::var)
y = var(x1 + x2.val)
let x2i = x2.idx, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x2i] += adj[yid]
nothing
end))
end
y
end
@inline function Base.:*(x1::var, x2::var)
y = var(x1.val * x2.val)
let x1v = x1.val, x1i = x1.idx, x2v = x2.val, x2i = x2.idx, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x1i] += x2v * adj[yid]
adj[x2i] += x1v * adj[yid]
nothing
end))
end
y
end
@inline function Base.:*(x1::var, x2)
y = var(x1.val * x2)
let x1i = x1.idx, x2v = x2, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x1i] += x2v * adj[yid]
nothing
end))
end
y
end
@inline function Base.:*(x1, x2::var)
y = var(x1 * x2.val)
let x1v = x1, x2i = x2.idx, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[x2i] += x1v * adj[yid]
nothing
end))
end
y
end
@inline function Base.exp(x::var)
y = var(exp(x.val))
let xid = x.idx, yval = y.val, yid = y.idx
push!(stack_, chain_t(adj -> begin
adj[xid] += yval * adj[yid]
nothing
end))
end
y
end
function grad(y::var, x::Vector{<:var})
adj = zeros(y.idx)
adj[y.idx] = 1
for i ā length(stack_):-1:1
stack_[i](adj)
end
dy_dx = Vector{Float64}(undef, length(x))
for i ā eachindex(x)
dy_dx[i] = adj[x[i].idx]
end
dy_dx
end
I wasnāt sure whether I should go with push!
and looping over stack_
backwards, or insert(stack_, 1, ...)
and then a simple fun! āstack!
. I ended up going with the former.
This yields:
julia> resize!(stack_, 0);
julia> next_idx_[] = 0;
julia> x1 = var(10.3);
julia> x2 = var(-1.1);
julia> x = [x1, x2];
julia> y = x1 * x2 * 2 + 7
(x1.idx, x2.idx, y.idx) = (1, 2, 3)
(x1.idx, y.idx) = (3, 4)
(x1.idx, y.idx) = (4, 5)
var{Float64,Int64}(-15.660000000000004, 5)
julia> dy_dx = grad(y, x)
adj = [-2.2, 20.6, 2.0, 1.0, 1.0]
2-element Array{Float64,1}:
-2.2
20.6
I assume (like the C++ version) that this would be very slow. The functions wont inline, vectorize, etc, so there will be a lot of overhead on repeated little calls.
Anyway, I saw that Cassette now has version 1.1 and documentation:
https://github.com/jrevels/Cassette.jl
So definitely looking forward to Capstan!
Any updates?
Also saw that Jarrett Revels is listed among the JuliaCon speakers.
Iām guessing weāll see that talk on YouTube in the next few days?