Best way to train and evaluate SDDP with constraints including product of control and states variables?

Hello!

I’m solving a problem which is similar to the Milk producer example with the SDDP algorithm.
This question is also related to my first question about SDDP and out-of-sample solutions.
Here is my asset management problem formulation. Given:

  • starting amount of cash
  • position in the asset
  • scenario lattice of asset price with transition probabilities. It has a single root node and then each stage has N nodes.

Optimize on trade (the control variable, how much asset we should buy or sell on current stage t) to achieve max risk measure at the end of the planning horizon T (EAVAR measure in my case).
Then, after calibrating the model, I need to get the decisions for out-of-sample historical prices for each stage to evaluate the model.

The problem I encounter is that the constraint of resulting cash is quadratic since it is including the product of current price and trade (although I JuMP.fix the price in each subproblem):
@constraint(sp, cash.out == cash.in - trade * price - commission_cost).
In that case I can’t solve this problem with a linear optimizer (was trying Clp and Cardinal Optimizer (COPT), the latter says the problem is non-convex). So I use the Ipopt optimizer for this problem, but I wonder, will I get the same optimal solution as for linear formulation of the problem where the prices are constant? (See second version of code below).
Here is my code for this solution, where the price is parametrized:

    model = SDDP.PolicyGraph(
        SDDP.LinearGraph(T),
        sense=:Max,
        upper_bound=1e6,
        optimizer=Ipopt.Optimizer
    ) do sp, t
        JuMP.set_optimizer_attribute(sp, "Logging", 1)
        JuMP.set_optimizer_attribute(sp, "TimeLimit", 60)

        commission_rate = 0.0009  # 0.09%

        # Model states
        @variable(sp, cash >= 0, SDDP.State,
            initial_value = (t == 1 ? final_cash : 0.0))
        @variable(sp, position, SDDP.State, initial_value = final_position)

        # Control variable
        @variable(sp, -trade_limit <= trade <= trade_limit)
        @variable(sp, trade_abs >= 0)

        # Price variable
        @variable(sp, price)

        # Position bounds
        @constraint(sp, position.out >= position_bounds[1])
        @constraint(sp, position.out <= position_bounds[2])

        # Position balance
        @constraint(sp, position.out == position.in + trade)

        @constraint(sp, trade_abs >= trade)  # trade_abs >= trade
        @constraint(sp, trade_abs >= -trade) # trade_abs >= -trade

        # Out cash with comission, the non-linear constraint
        @expression(sp, commission_cost, trade_abs * price * commission_rate)
        @constraint(sp, cash.out == cash.in - trade * price - commission_cost)


        # Parametrize price for each node
        if t < T
            SDDP.parameterize(sp, 1:(t == 1 ? 1 : N)) do n
                JuMP.fix(price, price_tree[t][n])
                if t == 1
                    probs = transition_probs_julia[t][1, :]
                else
                    probs = transition_probs_julia[t][n, :]
                end
                return probs
            end
        else
            SDDP.parameterize(sp, 1:N) do n
                JuMP.fix(price, price_tree[t][n])
                return [1.0]
            end
        end

        # save variables
        sp[:trade] = trade
        sp[:price] = price
        sp[:cash] = cash.out
        sp[:position] = position.out
        sp[:commission] = commission_cost  

        # Objective
        if t == T
            @stageobjective(sp, cash.out + position.out * price)
        else
            @stageobjective(sp, 0.0)
        end
    end

    
    start_time = time()
    SDDP.train(model, iteration_limit=50, print_level=1, risk_measure=SDDP.EAVaR(; lambda=0.5, beta=0.95))
    end_time = time()

And here is the evaluation function I use to get the decision for a real historical price which is out-of-sample for the given scenario lattice (price_tree):

function get_decision(model::SDDP.PolicyGraph, t::Int, cash::Float64, position::Float64, price::Float64)
    subproblem = model[t].subproblem
    JuMP.fix(subproblem[:price], price)
    subproblem[:cash] = cash
    subproblem[:position] = position
    JuMP.optimize!(subproblem)

    return (
        trade=JuMP.value(subproblem[:trade]),
        cash=JuMP.value(subproblem[:cash]),
        position=JuMP.value(subproblem[:position]),
        commission=JuMP.value(subproblem[:commission])
    )
end

It works fine but I’m worried cause I’m solving the problem with non-linear constraints, so I’m not sure if I will get the optimal solution (also please let me know where I can read more on how the cuts are constructed for non-linear constrained problems and their dual problems).

And here is another way how I’ve tried to train and evaluate my model: using the SDDP.MarkovianPolicyGraph with given transition_matrices. Note here i’m using the COPT optimizer since in that case price is fixed and the problem is linear.

    model = SDDP.MarkovianPolicyGraph(;
        sense=:Max,
        transition_matrices=transition_matrices,
        upper_bound=1e6,
        optimizer=COPT.Optimizer
    ) do sp, node
        t, i = node  # t — stage, i — node index

        JuMP.set_optimizer_attribute(sp, "LogLevel", 1)
        JuMP.set_optimizer_attribute(sp, "PresolveType", 1)
        JuMP.set_optimizer_attribute(sp, "TimeLimit", 60)

        # Commission
        commission_rate = 0.0009

        # States
        @variable(sp, cash >= 0, SDDP.State, initial_value = (t == 1 ? final_cash : 0.0))
        @variable(sp, position, SDDP.State, initial_value = final_position)

        @variable(sp, trade_abs >= 0)

        # Control variable
        @variable(sp, -trade_limit <= trade <= trade_limit)

        # Position bounds
        @constraint(sp, position.out >= position_bounds[1])
        @constraint(sp, position.out <= position_bounds[2])
        @constraint(sp, position.out == position.in + trade)

        @constraint(sp, trade_abs >= trade)
        @constraint(sp, trade_abs >= -trade)

        # Out cash with comission
        @expression(sp, commission_cost, trade_abs * price_tree[t][i] * commission_rate)
        @constraint(sp, cash.out == cash.in - trade * price_tree[t][i] - commission_cost)

        # Saving results
        sp[:trade] = trade
        sp[:cash] = cash.out
        sp[:position] = position.out

        # Objective
        if t == T
            @stageobjective(sp, cash.out + position.out * price_tree[t][i])
        else
            @stageobjective(sp, 0.0)
        end
    end

    start_time = time()
    SDDP.train(model, iteration_limit=100, print_level=1, risk_measure=SDDP.EAVaR(; lambda=0.5, beta=0.95))
    end_time = time()

But in that case I cant evaluate a solution for out-of-sample price since it is not even a variable in the problem. I can get the decision for any certain node of the lattice, but is there a way to use nearest-neighbour cut to solve the problem for out-of-sample state (price) of the lattice?

function get_decision(model::SDDP.PolicyGraph, t::Int, i::Int, cash::Float64, position::Float64)
    println(cash)
    println(position)
    incoming_state = Dict(
        :cash => cash,
        :position => position
    )

    decision = SDDP.DecisionRule(model; node = (t, i))
    solution = SDDP.evaluate(
        decision;
        incoming_state,
        controls_to_record=[:trade]
    )

    println(solution)
    return solution[:controls][:trade]
end

So my questions are:

  • What is the best way to solve my problem? Is it first or second version of my code, or maybe something else?
  • Do I get the same optimal solution if I solve the non-linear problem with Ipopt? (For first version of my code)
  • Is there a way to get a solution for out-of-sample state of the lattice using nearest-neighbor cut? How to plug-in the state into the problem in that case, if it is not a variable? (Related to second version of my code)
  • Where I can learn more about the how the cuts are constructed for non-linear constrained problems and their dual problems?

Hi @petr-a,

In your first model, there are a couple of issues:

  1. you can use JuMP.set_normalized_coefficient instead of fixing a variable.
    JuMP does not have the concept of multiplicative parameters, so writing
    x * y is a non-convex quadratic term, even if x or y is fixed to a
    constant. See Add noise in the constraint matrix · SDDP.jl
  2. SDDP.parameterize should not return the vector of probabilities. Instead,
    pass them as the argument SDDP.parameterize(sp, samples, probability) do w

Note that in this model the prices are independent in each stage. You cannot
have transition probabilities between (stage, price) pairs. I don’t have all
your data so I can’t test, but I would write your model like this:

using SDDP, Gurobi

model = SDDP.LinearPolicyGraph(;
    stages = T,
    sense = :Max,
    upper_bound = 1e6,
    optimizer = Gurobi.Optimizer
) do sp, t
    commission_rate = 0.0009  # 0.09%
    p_l, p_u = position_bounds
    @variables(sp, begin
        x_cash >= 0, SDDP.State, (initial_value = final_cash)
        p_l <= x_position <= p_u, SDDP.State, (initial_value = final_position)
        -trade_limit <= u_trade <= trade_limit
        u_trade_abs >= 0
        u_commission_cost
    end)
    price = 1.0  # placeholder
    @constraints(sp, begin
        # Note how I have normalized the two constraints by moving variables to
        # the left-hand side and putting `constant * variable`. This helps
        # reduce mistakes when using `set_normalized_coefficient`.
        c_commission,
            u_commission_cost - price * commission_rate * u_trade_abs == 0
        c_cash_balance,
            x_cash.out + price * u_trade + u_commission_cost - x_cash.in == 0
        x_position.out == x_position.in + u_trade
        u_trade_abs >= u_trade
        u_trade_abs >= -u_trade
    end)
    if t == T
        SDDP.parameterize(sp, price_tree[t], transition_probs_julia[t]) do price
            set_normalized_rhs(
                c_commission,
                u_trade_abs,
                # Note how this is the full coefficient, not just `price`
                -price * commission_rate,
            )
            set_normalized_rhs(c_cash_balance, u_trade, price)
        end    
        @stageobjective(sp, 0.0)
    else
        SDDP.parameterize(sp, price_tree[t], transition_probs_julia[t]) do price
            set_normalized_rhs(
                c_commission,
                u_trade_abs,
                # Note how this is the full coefficient, not just `price`
                -price * commission_rate,
            )
            set_normalized_rhs(c_cash_balance, u_trade, price)
            @stageobjective(sp, x_cash.out + price * x_position.out)
        end
    end
end

SDDP.train(
    model;
    iteration_limit = 50,
    print_level = 1,
    risk_measure = SDDP.EAVaR(; lambda = 0.5, beta = 0.95),
)

price_sequence = rand(T)
scenario = [(t, price_sequence[t]) for t in 1:T]
simulations = SDDP.simulate(model, 1; sampling_scheme = SDDP.Historical(scenario))

Note how you can still simulate a price scenario that has some temporal
dependence. The assumption that the prices are independent is just an assumption
during training.

For the Markovian model, you’re on the right track. Create the Markovian graph,
but the trick is that we still use SDDP.parameterize to decouple the price
from the node. This allows us to later simulate a different price at a node. I’d
write your model something like:

model = SDDP.MarkovianPolicyGraph(;
    transition_matrices = transition_matrices,
    sense = :Max,
    upper_bound = 1e6,
    optimizer = Gurobi.Optimizer
) do sp, node
    # Unpack the node
    t, price_index = node
    commission_rate = 0.0009  # 0.09%
    p_l, p_u = position_bounds
    @variables(sp, begin
        x_cash >= 0, SDDP.State, (initial_value = final_cash)
        p_l <= x_position <= p_u, SDDP.State, (initial_value = final_position)
        -trade_limit <= u_trade <= trade_limit
        u_trade_abs >= 0
        u_commission_cost
    end)
    price = 1.0  # placeholder
    @constraints(sp, begin
        c_commission,
            u_commission_cost - price * commission_rate * u_trade_abs == 0
        c_cash_balance,
            x_cash.out + price * u_trade + u_commission_cost - x_cash.in == 0
        x_position.out == x_position.in + u_trade
        u_trade_abs >= u_trade
        u_trade_abs >= -u_trade
    end)
    # Create a sample space with one element
    Ω = [price_tree[t][price_index]]
    if t == T
        SDDP.parameterize(sp, Ω, [1.0]) do price
            set_normalized_rhs(
                c_commission,
                u_trade_abs,
                -price * commission_rate,
            )
            set_normalized_rhs(c_cash_balance, u_trade, price)
        end    
        @stageobjective(sp, 0.0)
    else
        SDDP.parameterize(sp, Ω, [1.0]) do price
            set_normalized_rhs(
                c_commission,
                u_trade_abs,
                -price * commission_rate,
            )
            set_normalized_rhs(c_cash_balance, u_trade, price)
            @stageobjective(sp, x_cash.out + price * x_position.out)
        end
    end
end

price_sequence = rand(T)
# Do somethinng better to map `price_sequence` onto `price_index`
nearest_neighbours = rand(1:3, T)  
scenario = Tuple{Tuple{Int,Int},Float64}[
    ((t, nearest_neighbours[t]), price_sequence[t]) for t in 1:T
]
simulations = SDDP.simulate(model, 1; sampling_scheme = SDDP.Historical(scenario))

I haven’t run your code, so there might be issues etc. If you need more help,
please provide some sample inputs for the various transition_matrices etc and
I can put together a runnable example.