Incorrect Infeasibility Status While Using MIPVerify

Hello Everyone,

I am trying to use MIPVerify library for neural network verification against adversarial attacks. My goal is to replace the ReLU function by a trainable mask that depends on the input image. Therefore for layer i, the activation of this layer instead of max(0, w_i^\top \cdot x_i) is calculated by \mathbb{I}_{(w_m^\top \cdot x_0) > 0} \cdot (w_i^\top \cdot x_i) where w_m is the weight of independent network m that is trained to predict the behavior of the ReLU function at layer i and x_0 is the original input.

function p_relu(x::T, x_s::T)::T where {T<:Real}
    if x_s > 0
        return x
    end
    return zero(T)
end

function p_relu(x::AbstractArray{T}, x_s::AbstractArray{T}) where {T<:Real}
    return p_relu.(x, x_s)
end

function p_relu(x::T, l_n::Real, u_n::Real, x_s::T, l_s::Real, u_s::Real)::JuMP.AffExpr where {T<:JuMPLinearType}
    if u_s < l_s
        # TODO (vtjeng): This check is in place in case of numerical error in the calculation of bounds.
        # See sample number 4872 (1-indexed) when verified on the lp0.4 network.
        Memento.warn(
            MIPVerify.LOGGER,
            "Inconsistent upper and lower bounds: u-l = $(u - l) is negative. Attempting to use interval arithmetic bounds instead ...",
        )
        u_s = upper_bound(x_s)
        l_s = lower_bound(x_s)
    end

    if u_s < l_s
        error(
        MIPVerify.LOGGER,
        "Inconsistent upper and lower bounds even after using only interval arithmetic: u-l = $(u - l) is negative",
        )
    elseif u_s <= 0
        # rectified value is always 0
        return zero(T)
    elseif l_s >= 0
        # rectified value is always 1
        return x
    else
        # since we know that u!=l, x is not constant, and thus x must have an associated model
        model = owner_model(x)
        x_rect = @variable(model)
        a = @variable(model, binary = true)

        # refined big-M formulation that takes advantage of the knowledge
        # that lower and upper bounds  are different.


        @constraint(model, x_s <= u_s * a)
        @constraint(model, x_s >= l_s * (1-a))

        @constraint(model, x_rect == x * a)

        # Manually set the bounds for x_rect so they can be used by downstream operations.

        if l_n > 0
            set_lower_bound(x_rect, 0)
        else
            set_lower_bound(x_rect, l_n)
        end
        if u_n > 0
            set_upper_bound(x_rect, u_n)
        else
            set_upper_bound(x_rect, 0)
        end
        return x_rect
    end
end

function p_relu(x::JuMPLinearType, x_s::JuMPLinearType)::JuMP.AffExpr
    u_s = tight_upperbound(x_s, cutoff = 0)
    l_s = lazy_tight_lowerbound(x_s, u_s, cutoff = 0)

    u = tight_upperbound(x, cutoff = 0)
    l = lazy_tight_lowerbound(x, u, cutoff = 0)
    p_relu(x, l, u, x_s, l_s, u_s)
end

function p_relu(
    x::AbstractArray{T},
    x_s::AbstractArray{T};
    nta::Union{TighteningAlgorithm,Nothing} = nothing,
)::Array{JuMP.AffExpr} where {T<:JuMPLinearType}
    show_progress_bar::Bool =
        MIPVerify.LOGGER.levels[MIPVerify.LOGGER.level] > MIPVerify.LOGGER.levels["debug"]
    if !show_progress_bar
        u_s = tight_upperbound.(x_s, nta = nta, cutoff = 0)
        l_s = lazy_tight_lowerbound.(x_s, u_s, nta = nta, cutoff = 0)

        u = tight_upperbound.(x, nta = nta, cutoff = 0)
        l = lazy_tight_lowerbound.(x, u, nta = nta, cutoff = 0)
        return p_relu.(x, l, u, x_s, l_s, u_s)
    else
        p1_s = Progress(length(x_s), desc = "  Calculating upper bounds shallow: ")
        u_s = map(x_i -> (next!(p1_s); tight_upperbound(x_i, nta = nta, cutoff = 0)), x_s)
        p2_s = Progress(length(x_s), desc = "  Calculating lower bounds shallow: ")
        l_s = map(v -> (next!(p2_s); lazy_tight_lowerbound(v..., nta = nta, cutoff = 0)), zip(x_s, u_s))
        reluinfo = ReLUInfo(l_s, u_s)
        Memento.info(MIPVerify.LOGGER, "$reluinfo")


        p1 = Progress(length(x), desc = "  Calculating upper bounds: ")
        u = map(x_i -> (next!(p1); tight_upperbound(x_i, nta = nta, cutoff = 0)), x)
        p2 = Progress(length(x), desc = "  Calculating lower bounds: ")
        l = map(v -> (next!(p2); lazy_tight_lowerbound(v..., nta = nta, cutoff = 0)), zip(x, u))

        # reluinfo = ReLUInfo(l, u)
        # Memento.info(MIPVerify.LOGGER, "$reluinfo")

        p3 = Progress(length(x), desc = "  Imposing p_relu constraint: ")
        return x_r = map(v -> (next!(p3); p_relu(v...)), zip(x, l, u, x_s, l_s, u_s))
    end
end

I followed the same approach of the code in implementing the ReLU function and I only changed the base case and the constraints.
Furthermore, I used two different methods to train the masking network, one using ReLU activation and one with Sigmoid:

@tf.custom_gradient
def relu(ys):
    a = tf.cast(ys > 0, tf.float32)
    def grad(dy):
        return dy * a
    return a, grad

@tf.custom_gradient
def sigmoid(ys):
    a = tf.cast(ys > 0, tf.float32)
    def grad(dy):
        sigmoid_x = tf.math.sigmoid(ys)
        return dy * sigmoid_x * (1 - sigmoid_x)
    return a, grad

However, the models trained using ReLU activation are always INFEASIBLE when I try to find adversarial examples using L_\infty norm even with large \varepsilon values such as 0.5 or even 0.9. The models trained with sigmoid activation do not exhibit the same behavior. And using PGD I am able to find adversarial examples. I would like to know if the formulation of the constraints is correct regarding my goal and what would be the reason for this behavior,

@vtjeng ?

I have already contacted @vtjeng about this issue and he gave me his thoughts. However I can’t find the way to remove the post or edit it anymore so I will mark this as the solution.