基于密度聚类算法OPTICS最关键的是得到样本点聚类顺序图,相比于另一个密度聚类算法DBSCAN,OPTICS对输入的参数eps和MinPts不敏感。参数eps和MinPts分别代表半径和最小样本数,定义半径eps范围内邻居点数量大于等于MinPts的样本点为核心点。本文默认eps为正无穷,且MinPts在总样本数之内,则任何一个样本点都可充当核心点,下面基于此情形采用Python实现OPTICS的密度聚类顺序图。
import numpy as np
from math import sqrt
import matplotlib.pyplot as plt
np.random.seed(0)
n_points_per_cluster = 250
C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))
class Object:
def __init__(self, x, y):
self.x = x
self.y = y
self.Processed = False
self.reachability_distance = 'UNDEFINED'
self.core_distance = 'UNDEFINED'
def dist(self, obj):
return sqrt((self.x - obj.x) ** 2 + (self.y - obj.y) ** 2)
def setCoreDistance(self, MinPts):
DisList = []
for i in SetOfObjects:
DisList.append(self.dist(i))
DisList.sort()
return DisList[MinPts-1]
SetOfObjects = []
OrderedFile = []
OrderSeeds = []
for i in range(X.shape[0]):
currentObject = Object(X[i,0], X[i,1])
SetOfObjects.append(currentObject)
def OrderSeedsUpdate(CenterObject):
c_dist = CenterObject.core_distance
for obj in SetOfObjects:
if not obj.Processed:
new_r_dist = max(c_dist, CenterObject.dist(obj))
if obj.reachability_distance == 'UNDEFINED':
obj.reachability_distance = new_r_dist
OrderSeeds.append(obj)
elif new_r_dist < obj.reachability_distance:
obj.reachability_distance = new_r_dist
OrderSeeds.sort(key=lambda x: x.reachability_distance)
def ExpandClusterOrder(obj, MinPts):
obj.Processed = True
obj.core_distance = obj.setCoreDistance(MinPts)
OrderedFile.append(obj)
OrderSeedsUpdate(obj)
while len(OrderSeeds)>0:
currentObject = OrderSeeds[0]
del OrderSeeds[0]
currentObject.Processed = True
currentObject.core_distance = currentObject.setCoreDistance(MinPts)
OrderedFile.append(currentObject)
OrderSeedsUpdate(currentObject)
def OPTICS(MinPts):
for obj in SetOfObjects:
if not obj.Processed:
ExpandClusterOrder(obj, MinPts)
OPTICS(50)
r_dis_list = []
for i in OrderedFile:
r_dis_list.append(i.reachability_distance)
plt.figure(figsize=(15,8))
plt.scatter(list(range(len(r_dis_list[1:]))), r_dis_list[1:]);
|