# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors    #src
# This Source Code Form is subject to the terms of the Mozilla Public License   #src
# v.2.0. If a copy of the MPL was not distributed with this file, You can       #src
# obtain one at https://mozilla.org/MPL/2.0/.                                   #src

# # Simple semidefinite programming examples

# This tutorial is a collection of examples of small conic programs from the field of
# [semidefinite programming](https://en.wikipedia.org/wiki/Semidefinite_programming) (SDP).
#
# This tutorial makes use of the following packages:

using JuMP
import LinearAlgebra
import Plots
import Random
import SCS
import Test

# ## Maximum cut via SDP

# The [maximum cut problem](https://en.wikipedia.org/wiki/Maximum_cut) is a
# classical example in graph theory, where we seek to partition a graph into
# two complementary sets, such that the weight of edges between the two sets is
# maximized. This problem is NP-hard, but it is possible to obtain an
# approximate solution using the semidefinite programming relaxation:
# ```math
#     \text{max}  \quad   0.25 L•X \\
#     \text{    s.t.} \quad   \mathrm{diag}(X) = e \\
#                  \qquad X \succeq 0
# ```
# where ``L`` is the weighted graph Laplacian and ``e`` is a vector of ones.
# For more details, see [Goemans1995](@cite).

"""
    svd_cholesky(X::AbstractMatrix, rtol)

Return the matrix `U` of the Cholesky decomposition of `X` as `U' * U`.
Note that we do not use the `LinearAlgebra.cholesky` function because it
requires the matrix to be positive definite while `X` may be only
positive *semi*definite.

We use the convention `U' * U` instead of `U * U'` to be consistent with
`LinearAlgebra.cholesky`.
"""
function svd_cholesky(X::AbstractMatrix)
    F = LinearAlgebra.svd(X)
    ## We now have `X ≈ `F.U * D² * F.U'` where:
    D = LinearAlgebra.Diagonal(sqrt.(F.S))
    ## So `X ≈ U' * U` where `U` is:
    return (F.U * D)'
end

function solve_max_cut_sdp(weights)
    N = size(weights, 1)
    ## Calculate the (weighted) Laplacian of the graph: L = D - W.
    L = LinearAlgebra.diagm(0 => weights * ones(N)) - weights
    model = Model(SCS.Optimizer)
    set_silent(model)
    @variable(model, X[1:N, 1:N], PSD)
    for i in 1:N
        set_start_value(X[i, i], 1.0)
    end
    @objective(model, Max, 0.25 * LinearAlgebra.dot(L, X))
    @constraint(model, LinearAlgebra.diag(X) .== 1)
    optimize!(model)
    @assert is_solved_and_feasible(model)
    V = svd_cholesky(value(X))
    Random.seed!(N)
    r = rand(N)
    r /= LinearAlgebra.norm(r)
    cut = [LinearAlgebra.dot(r, V[:, i]) > 0 for i in 1:N]
    S = findall(cut)
    T = findall(.!cut)
    println("Solution:")
    println(" (S, T) = ({", join(S, ", "), "}, {", join(T, ", "), "})")
    return S, T
end

# Given the graph
# ```raw
# [1] --- 5 --- [2]
# ```
# The solution is `(S, T)  = ({1}, {2})`

S, T = solve_max_cut_sdp([0 5; 5 0])

# Given the graph
# ```raw
# [1] --- 5 --- [2]
#  |  \          |
#  |    \        |
#  7      6      1
#  |        \    |
#  |          \  |
# [3] --- 1 --- [4]
# ```
# The solution is `(S, T)  = ({1}, {2, 3, 4})`

S, T = solve_max_cut_sdp([0 5 7 6; 5 0 0 1; 7 0 0 1; 6 1 1 0])

# Given the graph
# ```raw
# [1] --- 1 --- [2]
#  |             |
#  |             |
#  5             9
#  |             |
#  |             |
# [3] --- 2 --- [4]
# ```
# The solution is `(S, T)  = ({1, 4}, {2, 3})`

S, T = solve_max_cut_sdp([0 1 5 0; 1 0 0 9; 5 0 0 2; 0 9 2 0])

# ## K-means clustering via SDP

# Given a set of points ``a_1, \ldots, a_m``  in ``\mathbb{R}^n``, allocate them to ``k`` clusters.
#
# For more details, see [Peng2007](@cite).

function example_k_means_clustering()
    a = [[2.0, 2.0], [2.5, 2.1], [7.0, 7.0], [2.2, 2.3], [6.8, 7.0], [7.2, 7.5]]
    m = length(a)
    num_clusters = 2
    W = zeros(m, m)
    for i in 1:m, j in i+1:m
        W[i, j] = W[j, i] = exp(-LinearAlgebra.norm(a[i] - a[j]) / 1.0)
    end
    model = Model(SCS.Optimizer)
    set_silent(model)
    @variable(model, Z[1:m, 1:m] >= 0, PSD)
    @objective(model, Min, LinearAlgebra.tr(W * (LinearAlgebra.I - Z)))
    @constraint(model, [i = 1:m], sum(Z[i, :]) .== 1)
    @constraint(model, LinearAlgebra.tr(Z) == num_clusters)
    optimize!(model)
    @assert is_solved_and_feasible(model)
    Z_val = value.(Z)
    current_cluster, visited = 0, Set{Int}()
    solution = [1, 1, 2, 1, 2, 2]  #src
    for i in 1:m
        if !(i in visited)
            current_cluster += 1
            println("Cluster $current_cluster")
            for j in i:m
                if isapprox(Z_val[i, i], Z_val[i, j]; atol = 1e-3)
                    println(a[j])
                    push!(visited, j)
                    Test.@test solution[j] == current_cluster  #src
                end
            end
        end
    end
    return
end

example_k_means_clustering()

# ## The correlation problem

# Given three random variables ``A``, ``B``, and ``C``, and given bounds on two of the three
# correlation coefficients:
# ```math
#     -0.2 \leq ρ_{AB} \leq -0.1 \\
#     0.4  \leq ρ_{BC} \leq  0.5
# ```
# our problem is to determine upper and lower bounds on other correlation coefficient ``ρ_{AC}``.
#
# We solve an SDP to make use of the following positive semidefinite property
# of the correlation matrix:
# ```math
# \begin{bmatrix}
#       1   &  ρ_{AB} &  ρ_{AC} \\
#      ρ_{AB} &  1    &  ρ_{BC}  \\
#      ρ_{AC} &  ρ_{BC} &  1
# \end{bmatrix} \succeq 0
# ```

function example_correlation_problem()
    model = Model(SCS.Optimizer)
    set_silent(model)
    @variable(model, X[1:3, 1:3], PSD)
    S = ["A", "B", "C"]
    ρ = Containers.DenseAxisArray(X, S, S)
    @constraint(model, [i in S], ρ[i, i] == 1)
    @constraint(model, -0.2 <= ρ["A", "B"] <= -0.1)
    @constraint(model, 0.4 <= ρ["B", "C"] <= 0.5)
    @objective(model, Max, ρ["A", "C"])
    optimize!(model)
    @assert is_solved_and_feasible(model)
    println("An upper bound for ρ_AC is $(value(ρ["A", "C"]))")
    Test.@test value(ρ["A", "C"]) ≈ 0.87195 atol = 1e-4  #src
    @objective(model, Min, ρ["A", "C"])
    optimize!(model)
    @assert is_solved_and_feasible(model)
    println("A lower bound for ρ_AC is $(value(ρ["A", "C"]))")
    Test.@test value(ρ["A", "C"]) ≈ -0.978 atol = 1e-3  #src
    return
end

example_correlation_problem()

# ## The minimum distortion problem

# This example arises from computational geometry, in particular the problem of
# embedding a general finite metric space into a Euclidean space.
#
# It is known that the 4-point metric space defined by the star graph
# ```raw
#   [1]
#     \
#      1
#       \
#       [0] —- 1 -- [2]
#       /
#      1
#     /
#   [3]
# ```
# cannot be exactly embedded into a Euclidean space of any dimension,
# where distances are computed by length of the shortest path between vertices.
# A distance-preserving embedding would require the three leaf nodes to form
# an equilateral triangle of side length 2, with the centre node (0) mapped to
# an equidistant point at distance 1; this is impossible since the
# [triangle inequality](https://en.wikipedia.org/wiki/Triangle_inequality)
# in Euclidean space implies all points would need to be simultaneously
# [collinear](https://en.wikipedia.org/wiki/Collinearity).
#
# Here we will formulate and solve an SDP to compute the best possible embedding,
# that is, the embedding ``f`` assigning each vertex ``v`` to a vector ``f(v)``
# that minimizes the distortion ``c`` such that
# ```math
#     D[a, b] \leq ||f(a) - f(b)|| \leq c \; D[a, b]
# ```
# for all edges ``(a, b)`` in the graph, where ``D[a, b]`` is the distance in the graph metric space.
#
# Any embedding ``f`` can be characterized by a Gram matrix ``Q``, which is PSD and
# such that
# ```math
#     ||f(a) - f(b)||^2 = Q[a, a] + Q[b, b] - 2 Q[a, b]
# ```
# The matrix entry ``Q[a,b]`` represents the inner product of ``f(a)`` with ``f(b)``.
#
# We therefore impose the constraint
# ```math
#     D[a, b]^2 \leq Q[a, a] + Q[b, b] - 2 Q[a, b] \leq c^2 \; D[a, b]^2
# ```
# for all edges ``(a, b)`` in the graph and minimize ``c^2``,
# which gives us the SDP formulation below.
# Since we may choose any point to be the origin, we fix the first vertex at 0.
#
# For more details, see [Matousek2013,Linial2002](@cite).

function example_minimum_distortion()
    model = Model(SCS.Optimizer)
    set_silent(model)
    D = [
        0.0 1.0 1.0 1.0
        1.0 0.0 2.0 2.0
        1.0 2.0 0.0 2.0
        1.0 2.0 2.0 0.0
    ]
    @variable(model, c² >= 1.0)
    @variable(model, Q[1:4, 1:4], PSD)
    for i in 1:4, j in (i+1):4
        @constraint(model, D[i, j]^2 <= Q[i, i] + Q[j, j] - 2 * Q[i, j])
        @constraint(model, Q[i, i] + Q[j, j] - 2 * Q[i, j] <= c² * D[i, j]^2)
    end
    fix(Q[1, 1], 0)
    @objective(model, Min, c²)
    optimize!(model)
    Test.@test is_solved_and_feasible(model)
    Test.@test objective_value(model) ≈ 4 / 3 atol = 1e-4
    ## Recover the minimal distorted embedding:
    X = [zeros(3) sqrt(value.(Q)[2:end, 2:end])]
    return Plots.plot(
        X[1, :],
        X[2, :],
        X[3, :];
        seriestype = :mesh3d,
        connections = ([0, 0, 0, 1], [1, 2, 3, 2], [2, 3, 1, 3]),
        legend = false,
        fillalpha = 0.1,
        lw = 3,
        ratio = :equal,
        xlim = (-1.1, 1.1),
        ylim = (-1.1, 1.1),
        zlim = (-1.5, 1.0),
        zticks = -1:1,
        camera = (60, 30),
    )
end

example_minimum_distortion()

# ## Lovász numbers

# The [Lovász number](https://en.wikipedia.org/wiki/Lov%C3%A1sz_number)
# of a graph, also known as Lovász's theta-function, is a number that
# lies between two important and related numbers that are computationally
# hard to determine, namely the chromatic and clique numbers of the graph.
# It is possible however to efficient compute the Lovász number as the
# optimal value of a semidefinite program.

# Consider the pentagon graph:
# ```raw
#      [5]
#     /   \
#    /     \
#  [1]     [4]
#   |       |
#   |       |
#  [2] --- [3]
# ```
# with five vertices and edges. Its Lovász number is known to be precisely
# ``\sqrt{5} \approx 2.236``, lying between 2 (the largest clique size) and
# 3 (the smallest number needed for a vertex coloring).

# Let ``i, \, j`` be integers such that ``1 \leq i < j \leq 5``.
# We define ``A^{ij}`` to be the ``5 \times 5`` symmetric
# matrix with entries ``(i,j)`` and ``(j,i)`` equal to 1, with all other entries 0.
# Let  ``E`` be the graph's edge set; in this example, ``E`` contains
# (1,2), (2,3), (3,4), (4,5), (5,1)
# and their transposes. The Lovász number can be computed from the program
# ```math
# \begin{align}
#     \text{max}      & \quad    J • X & \\
#     \text{    s.t.} & \quad A^{ij} • X = 0 \text{ for all } (i,j) \notin E   \\
#                     & \quad I • X = 1 \\
#                     & \quad X \succeq 0
# \end{align}
# ```
# where ``J`` is the matrix filled with ones, and ``I`` is the identity matrix.
#
# For more details, see [Barvinok2002,Knuth1994](@cite).

function example_theta_problem()
    model = Model(SCS.Optimizer)
    set_silent(model)
    E = [(1, 2), (2, 3), (3, 4), (4, 5), (5, 1)]
    @variable(model, X[1:5, 1:5], PSD)
    for i in 1:5
        for j in (i+1):5
            if !((i, j) in E || (j, i) in E)
                A = zeros(Int, 5, 5)
                A[i, j] = 1
                A[j, i] = 1
                @constraint(model, LinearAlgebra.dot(A, X) == 0)
            end
        end
    end
    @constraint(model, LinearAlgebra.tr(LinearAlgebra.I * X) == 1)
    J = ones(Int, 5, 5)
    @objective(model, Max, LinearAlgebra.dot(J, X))
    optimize!(model)
    Test.@test is_solved_and_feasible(model)
    Test.@test objective_value(model) ≈ sqrt(5) rtol = 1e-4
    println("The Lovász number is: $(objective_value(model))")
    return
end

example_theta_problem()

# ## Robust uncertainty sets

# This example computes the Value at Risk for a data-driven uncertainty set.
# Closed-form expressions for the optimal value are available.
# For more details, see [Bertsimas2018](@cite).

function example_robust_uncertainty_sets()
    R, d, 𝛿, ɛ = 1, 3, 0.05, 0.05
    N = ceil((2 + 2 * log(2 / 𝛿))^2) + 1
    c, μhat, M = randn(d), rand(d), rand(d, d)
    Σhat = 1 / (d - 1) * (M - ones(d) * μhat')' * (M - ones(d) * μhat')
    Γ1(𝛿, N) = R / sqrt(N) * (2 + sqrt(2 * log(1 / 𝛿)))
    Γ2(𝛿, N) = 2 * R^2 / sqrt(N) * (2 + sqrt(2 * log(2 / 𝛿)))
    model = Model(SCS.Optimizer)
    set_silent(model)
    @variable(model, Σ[1:d, 1:d], PSD)
    @variable(model, u[1:d])
    @variable(model, μ[1:d])
    @constraint(model, [Γ1(𝛿 / 2, N); μ - μhat] in SecondOrderCone())
    @constraint(model, [Γ2(𝛿 / 2, N); vec(Σ - Σhat)] in SecondOrderCone())
    @constraint(model, [((1-ɛ)/ɛ) (u - μ)'; (u-μ) Σ] >= 0, PSDCone())
    @objective(model, Max, c' * u)
    optimize!(model)
    @assert is_solved_and_feasible(model)
    exact =
        μhat' * c +
        Γ1(𝛿 / 2, N) * LinearAlgebra.norm(c) +
        sqrt((1 - ɛ) / ɛ) *
        sqrt(c' * (Σhat + Γ2(𝛿 / 2, N) * LinearAlgebra.I) * c)
    Test.@test objective_value(model) ≈ exact atol = 1e-2
    return
end

example_robust_uncertainty_sets()