Automatic differentiation and missing values

I am trying to calculate the Jacobian matrix of a function that is robust with missing values and want to use one of the great Julia packages that do automatic differentiation and have hit some kind of a roadblock:

using ForwardDiff
using Random
using Statistics

function dumb_jacobian(f::Function, x::Vector; δ = 0.01)

    y = f(x)

    J = Matrix{Union{eltype(y), Missing}}(undef, length(y), length(x))
    for i in 1:length(x)
        if ismissing(x[i])
            J[:, i] .= missing
        else
            dx = copy(x)
            dx[i] += δ
            J[:, i] .= (y .- f(dx)) ./ δ
        end
    end

    return J
end

# These are required to make ForwardDiff.jl work with missing values
Base.:(*)(x, ::Missing) = missing
Base.:(*)(::Missing, y) = missing
Base.inv(::Missing) = missing
ForwardDiff.can_dual(::Type{Union{Float64, Missing}}) = true
ForwardDiff.can_dual(::Type{Missing}) = true
ForwardDiff.can_dual(Union{Float64, Missing})


Random.seed!(1)
A = convert(Array{Union{Missing, Float64}}, randn(4, 4))

# Robust with missing values
f(x) = exp.( [mean(skipmissing(A[i, :] .* x)) for i in 1:size(A, 1)] )

# not robust
f2(x) = exp.( A * x )

x = convert(Array{Union{Missing, Float64}}, randn(4))
x2 = copy(x)
x2[1] = missing

julia> f(x)
4-element Array{Float64,1}:
 1.2835393318564443 
 0.9118157450546384 
 0.37761940204728817
 2.8456972590876477 

julia> f(x2)
4-element Array{Float64,1}:
 1.494163897760937  
 0.9659212155251251 
 0.23771683985407535
 4.0228704861827    

julia> f2(x)
4-element Array{Float64,1}:
  2.7141679988294563  
  0.6912391841944968  
  0.020333736944155658
 65.57748885262805    

julia> f2(x2)
4-element Array{Missing,1}:
 missing
 missing
 missing
 missing

julia> ForwardDiff.jacobian(f, x)
4×4 Array{Float64,2}:
  0.0953952  -0.269231   0.170058    0.147219 
  0.0871687   0.0709191  0.0983442  -0.119069 
 -0.0564196   0.216667   0.0551049   0.0385545
 -0.007431   -1.61286    0.685295   -0.0358922

julia> dumb_jacobian(f, x)
4×4 Array{Union{Missing, Float64},2}:
  0.0954307  -0.268949   0.170171    0.147304 
  0.0872103   0.0709466  0.0983973  -0.118991 
 -0.0563775   0.21729    0.0551451   0.0385742
 -0.0074309  -1.6083     0.686121   -0.03589  

julia> ForwardDiff.jacobian(f, x2)
# ERROR: BoundsError: attempt to access (missing,)


julia> dumb_jacobian(f, x2)
4×4 Array{Union{Missing, Float64},2}:
 missing  -0.417297  0.264185    0.228678 
 missing   0.100222  0.139006   -0.168032 
 missing   0.182558  0.0462975   0.0323829
 missing  -3.02861   1.29378    -0.0676472

julia> ForwardDiff.jacobian(f2, x)
4×4 Array{Float64,2}:
  0.80689      -2.27726     1.43842    1.24524   
  0.264327      0.215052    0.298215  -0.36106   
 -0.0121521     0.0466677   0.011869   0.00830421
 -0.684973   -148.67       63.1689    -3.30846   

julia> dumb_jacobian(f2, x)
4×4 Array{Union{Missing, Float64},2}:
  0.80809      -2.26773     1.44223     1.2481    
  0.264833      0.215387    0.29886    -0.360118  
 -0.0121159     0.0472074   0.0119037   0.00832119
 -0.684937   -146.997      63.4742     -3.30763   

julia> ForwardDiff.jacobian(f2, x2)
# ERROR: BoundsError: attempt to access (missing,)
 
julia> dumb_jacobian(f2, x2)
4×4 Array{Missing,2}:
 missing  missing  missing  missing
 missing  missing  missing  missing
 missing  missing  missing  missing
 missing  missing  missing  missing

I am actually surprised that I got this far with ForwardDiff.jl. Would it be possible to get this to work in ForwardDiff.jl or any similar package in Julia?

A quick fix could be just replacing the missings in the input with something else (eg NaN), and ignore that in the output.

I am not aware of a way of extending AD to non-numeric types in a way that is composable and generic.

NaNs work pretty well, as long as your function can deal with them.