# K-means clustering via SDP

From "Approximating K-means-type clustering via semidefinite programming" By Jiming Peng and Yu Wei.

Given a set of points $a_1, \ldots, a_m$ in $R_n$, allocate them to k clusters.

using JuMP
import LinearAlgebra
import SCS

function example_cluster(; verbose = true)
# Data points
n = 2
m = 6
a = Any[
[2.0, 2.0], [2.5, 2.1], [7.0, 7.0], [2.2, 2.3], [6.8, 7.0], [7.2, 7.5]
]
k = 2
# Weight matrix
W = zeros(m, m)
for i in 1:m
for j in i + 1:m
W[i, j] = W[j, i] = exp(-LinearAlgebra.norm(a[i] - a[j]) / 1.0)
end
end
model = Model(SCS.Optimizer)
set_silent(model)
# Z >= 0, PSD
@variable(model, Z[1:m, 1:m], PSD)
@constraint(model, Z .>= 0)
# min Tr(W(I-Z))
I = Matrix(1.0 * LinearAlgebra.I, m, m)
@objective(model, Min, LinearAlgebra.tr(W * (I - Z)))
# Z e = e
@constraint(model, Z * ones(m) .== ones(m))
# Tr(Z) = k
@constraint(model, LinearAlgebra.tr(Z) == k)
optimize!(model)
Z_val = value.(Z)
# A simple rounding scheme
which_cluster = zeros(Int, m)
num_clusters = 0
for i in 1:m
Z_val[i, i] <= 1e-6 && continue
if which_cluster[i] == 0
num_clusters += 1
which_cluster[i] = num_clusters
for j in i + 1:m
if LinearAlgebra.norm(Z_val[i, j] - Z_val[i, i]) <= 1e-6
which_cluster[j] = num_clusters
end
end
end
end
if verbose
# Print results
for cluster in 1:k
println("Cluster \$cluster")
for i in 1:m
if which_cluster[i] == cluster
println(a[i])
end
end
end
end
return
end

example_cluster()
Cluster 1
[2.0, 2.0]
[2.5, 2.1]
[2.2, 2.3]
Cluster 2
[7.0, 7.0]
[6.8, 7.0]
[7.2, 7.5]