# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors #src
# This Source Code Form is subject to the terms of the Mozilla Public License #src
# v.2.0. If a copy of the MPL was not distributed with this file, You can #src
# obtain one at https://mozilla.org/MPL/2.0/. #src
# # The network multi-commodity flow problem
# This tutorial is a variation of [The multi-commodity flow problem](@ref) where
# the graph is a network instead of a bipartite graph.
# The purpose of this tutorial is to demonstrate a style of modeling that
# uses relational algebra.
# ## Required packages
# This tutorial uses the following packages:
using JuMP
import DataFrames
import HiGHS
import SQLite
import SQLite.DBInterface
import Test #src
# ## Formulation
# The network multi-commondity flow problem is an extension of the
# [The multi-commodity flow problem](@ref), where instead of having a bipartite
# graph of supply and demand nodes, the graph can contains a set of nodes,
# $i \in \mathcal{N}$ , which each have a (potentially zero) supply capacity,
# $u^s_{i,p}$, and (potentially zero) a demand, $d_{i,p}$ for each commodity
# $p \in P$. The nodes are connected by a set of edges $(i, j) \in \mathcal{E}$,
# which have a shipment cost $c^x_{i,j,p}$ and a total flow capacity of
# $u^x_{i,j}$.
# Our take is to choose an optimal supply for each node $s_{i,p}$, as well as
# the optimal transshipment $x_{i,j,p}$ that minimizes the total cost.
# The mathematical formulation is:
# ```math
# \begin{aligned}
# \min \;\; & \sum_{(i,j)\in\mathcal{E}, p \in P} c^x_{i,j,p} x_{i,j,p} + \sum_{i\in\mathcal{N}, p \in P} c^s_{i,p} s_{i,p} \\
# s.t. \;\; & s_{i,p} + \sum_{(j, i) \in \mathcal{E}} x_{j,i,p} - \sum_{(i,j) \in \mathcal{E}} x_{i,j,p} = d_{i,p} & \forall i \in \mathcal{N}, p \in P \\
# & x_{i,j,p} \ge 0 & \forall (i, j) \in \mathcal{E}, p \in P \\
# & \sum_{p \in P} x_{i,j,p} \le u^x_{i,j} & \forall (i, j) \in \mathcal{E} \\
# & 0 \le s_{i,p} \le u^s_{i,p} & \forall i \in \mathcal{N}, p \in P.
# \end{aligned}
# ```
#
# The purpose of this tutorial is to demonstrate how this model can be built
# using relational algebra instead of a direct math-to-code translation of the
# summations.
# ## Data
# For the purpose of this tutorial, the JuMP repository contains an example
# database called `commodity_nz.db`:
filename = joinpath(@__DIR__, "commodity_nz.db");
# To run locally, download [`commodity_nz.db`](commodity_nz.db) and update
# `filename` appropriately.
# Load the database using `SQLite.DB`:
db = SQLite.DB(filename)
# A quick way to see the schema of the database is via `SQLite.tables`:
SQLite.tables(db)
# We interact with the database by executing queries and then loading the
# results into a DataFrame:
function get_table(db, table)
query = DBInterface.execute(db, "SELECT * FROM $table")
return DataFrames.DataFrame(query)
end
# The `shipping` table contains the set of arcs and their distances:
df_shipping = get_table(db, "shipping")
# The `products` table contains the shipping cost per kilometer of each product:
df_products = get_table(db, "products")
# The `supply` table contains the supply capacity of each node, as well as the
# cost:
df_supply = get_table(db, "supply")
# The `demand` table contains the demand of each node:
df_demand = get_table(db, "demand")
# ## JuMP formulation
# We start by creating a model and our decision variables:
model = Model(HiGHS.Optimizer)
set_silent(model)
# For the shipping decisions, we create a new column in `df_shipping` called
# `x_flow`, which has one non-negative decision variable for each row:
df_shipping.x_flow = @variable(model, x[1:size(df_shipping, 1)] >= 0)
df_shipping
# For the supply, we add a variable to each row, and then set the upper bound to
# the capacity of each supply node:
df_supply.x_supply = @variable(model, s[1:size(df_supply, 1)] >= 0)
set_upper_bound.(df_supply.x_supply, df_supply.capacity)
df_supply
# Our objective is to minimize the shipping cost plus the supply cost. To
# compute the flow cost, we need to join the shipping table, which contains
# `distance_km` with the products table, which contains `cost_per_km`:
df_cost = DataFrames.leftjoin(df_shipping, df_products; on = [:product])
df_cost.flow_cost = df_cost.cost_per_km .* df_cost.distance_km
df_cost
# Then we can use linear algebra to compute the inner product between two
# columns:
@objective(
model,
Min,
df_cost.flow_cost' * df_shipping.x_flow +
df_supply.cost' * df_supply.x_supply
);
# For the flow capacities on each arc, we use `DataFrames.groupby` to partition
# the flow variables based on `:origin` and `:destination`, and then we
# constrain their sum to be less than a fixed capacity.
capacity = 30
for df in DataFrames.groupby(df_shipping, [:origin, :destination])
@constraint(model, sum(df.x_flow) <= capacity)
end
# For each node in the graph, we need to compute a mass balance constraint
# which says that for each product, the supply, plus the flow into the node, and
# less the flow out of the node is equal to the demand.
# We can compute an expression for the flow out of each node using
# `DataFrames.groupby` on the `origin` and `product` columns of the
# `df_shipping` table:
df_flow_out = DataFrames.DataFrame(
(node = i.origin, product = i.product, x_flow_out = sum(df.x_flow)) for
(i, df) in pairs(DataFrames.groupby(df_shipping, [:origin, :product]))
)
# We can compute an expression for the flow into each node using
# `DataFrames.groupby` on the `destination` and `product` columns of the
# `df_shipping` table:
df_flow_in = DataFrames.DataFrame(
(node = i.destination, product = i.product, x_flow_in = sum(df.x_flow))
for (i, df) in
pairs(DataFrames.groupby(df_shipping, [:destination, :product]))
)
# We can join the two tables together using `DataFrames.outerjoin`. We need to
# use `outerjoin` here because there might be missing rows.
df = DataFrames.outerjoin(df_flow_in, df_flow_out; on = [:node, :product])
# Next, we need to join the supply column:
df = DataFrames.leftjoin(
df,
DataFrames.select(df_supply, [:origin, :product, :x_supply]);
on = [:node => :origin, :product],
)
# and then the demand column
df = DataFrames.leftjoin(
df,
DataFrames.select(df_demand, [:destination, :product, :demand]);
on = [:node => :destination, :product],
)
# Now we're ready to add our mass balance constraint. Because some rows contain
# `missing` values, we need to use `coalesce` to convert any `missing` into a
# numeric value:
@constraint(
model,
[r in eachrow(df)],
coalesce(r.x_supply, 0.0) + coalesce(r.x_flow_in, 0.0) -
coalesce(r.x_flow_out, 0.0) == coalesce(r.demand, 0.0),
);
# ## Solution
# Finally, we can optimize the model:
optimize!(model)
Test.@test termination_status(model) == OPTIMAL #src
Test.@test primal_status(model) == FEASIBLE_POINT #src
solution_summary(model)
# update the solution in the DataFrames:
df_shipping.x_flow = value.(df_shipping.x_flow)
df_supply.x_supply = value.(df_supply.x_supply);
# and display the optimal solution for flows:
DataFrames.select(
filter!(row -> row.x_flow > 0.0, df_shipping),
[:origin, :destination, :product, :x_flow],
)