So, I ended up trying all the solvers in NLOpt.jl, then various robust solvers in Optimization.jl, and none of them work reliably (more than 90% of the random tests). Then I remembered that from the setup of this problem, it is “almost separable”, that is, coordinate j mostly affects the residual j, and end up coding a simple coordinatewise solver:
"""
$(SIGNATURES)
Solve ``f(x) ≈ 0``, using `f!(y, x)` where ``y = f(x)``. Stops at `iter_max` iterations,
when the coordinates move less than `xabs`, or the residuals are below `ytol`.
Uses a coordinate-wise univariate method. For smooth problems, this is likely to be
inferior to almost all solvers under the sun. But for nondifferentiable residuals which
are monotonic in the corresponding input, it can work. As a last resort.
"""
function coordinatewise_solver(f!, x0, lb, ub;
                               iter_max = 100, xabs = √eps(eltype(x0)), ytol = xabs,
                               method = Roots.Brent())
    x = collect(x0)             # make it a vector
    N = length(x)
    z = similar(x)              # a buffer
    y = similar(x)              # residuals
    Δmin = oftype(xabs, Inf)
    result(status, i) = (; x, y, Δmin, status, iterations = i)
    function f_j(j)
        z .= x
        function(xj)
            z[j] = xj
            f!(y, z)
            y[j]
        end
    end
    for i in 1:iter_max
        Δmin = oftype(xabs, Inf)
        for j in 1:N
            fj = f_j(j)
            if abs(fj(x[j])) > ytol
                # safety depends on Roots not doing threading
                xj = Roots.find_zero(fj, (lb[j], ub[j]))
                Δmin = min(abs(xj - x[j]), Δmin)
                x[j] = xj
            end
        end
        f!(y, x)
        maximum(abs, y) ≤ ytol && return result(:converged, i)
        Δmin ≤ xabs && return result(:small_moves, i)
    end
    result(:iter_max, iter_max)
end
This works 100% of the time. It is also the method I use as a silly strawman when I teach numerical methods in economics, so this is kind of karmic.