三维点云课程—Special Clustering代码
from sklearn import datasets
import numpy as np
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from itertools import cycle, islice
def distance(x1, x2):
res = np.sqrt(np.sum((x1 - x2) ** 2))
return res
def distanceMatrix(X):
M = np.array(X)
Z = np.zeros((len(M), len(M)))
for i in range(len(M)):
for j in range(i + 1, len(M)):
Z[i][j] = 1.0 * distance(X[i], X[j])
Z[j][i] = Z[i][j]
return Z
def adjacencyMatrix(Z, k, sigma=1.0):
N = len(Z)
A = np.zeros((N, N))
for i in range(N):
"""
>>>a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b) # 打包为元组的列表
[(1, 4), (2, 5), (3, 6)]
>>> zip(a,c) # 元素个数与最短的列表一致
[(1, 4), (2, 5), (3, 6)]
>>> zip(*zipped) # 与 zip 相反,*zipped 可理解为解压,返回二维矩阵式
[(1, 2, 3), (4, 5, 6)]
"""
dist = zip(Z[i], range(N))
dist_index = sorted(dist, key=lambda x: x[0])
neibours_id = [dist_index[m][1] for m in range(k + 1)]
sigma=np.var(Z)/4
for index in neibours_id:
A[i][index] = np.exp(-(Z[i][index]**2) / (2 * sigma * sigma))
A[index][i] = A[i][index]
return A
def laplacianMatrix(A,k):
D = np.sum(A, axis=1)
L = np.diag(D) - A
squareD = np.diag(1.0 / (D ** (0.5)))
standardization_L = np.dot(np.dot(squareD, L), squareD)
x, V = np.linalg.eig(standardization_L)
V=V.astype(float)
idx_k_smallest = np.where(x < np.partition(x, k)[k])
H = np.hstack([V[:, i] for i in idx_k_smallest])
return H
def read_txt(path):
filename = path
pos = []
Efield = []
with open(filename, 'r') as file_to_read:
while True:
lines = file_to_read.readline()
if not lines:
break
pass
p_tmp, E_tmp = [float(i) for i in lines.split(",")]
pos.append(p_tmp)
Efield.append(E_tmp)
pass
pos = np.array(pos)
Efield = np.array(Efield)
x=np.vstack(pos)
y=np.vstack(Efield)
X=np.concatenate((x,y),axis=1)
return X
fig, ax = plt.subplots(4, 2)
def show_result(x,i,j,n_labels):
ax[i][j].scatter(x[:, 0], x[:, 1], s=20, c="b", marker='o')
ax[i][j].set_xlabel('x')
ax[i][j].set_ylabel('y')
list_max = max(n_labels)
if list_max == 1:
for idx, value in enumerate(n_labels):
if value == 0:
ax[i][j+1].scatter(x[idx, 0], x[idx, 1], s=20, c="r", marker='*')
elif value == 1:
ax[i][j+1].scatter(x[idx, 0], x[idx, 1], s=20, c="g", marker='+')
elif list_max == 2:
for idx, value in enumerate(n_labels):
if value == 0:
ax[i][j+1].scatter(x[idx, 0], x[idx, 1], s=20, c="r", marker='*')
elif value == 1:
ax[i][j+1].scatter(x[idx, 0], x[idx, 1], s=20, c="g", marker='+')
elif value == 2:
ax[i][j+1].scatter(x[idx, 0], x[idx, 1], s=20, c="y", marker='^')
ax[i][j+1].set_xlabel('x')
ax[i][j+1].set_ylabel('y')
if __name__ == '__main__':
data_circle=read_txt("circle.txt")
Z_circle = distanceMatrix(data_circle)
M_cicle = adjacencyMatrix(Z_circle, 5)
H_cicle = laplacianMatrix(M_cicle,2)
sp_circle = KMeans(n_clusters=2).fit(H_cicle)
show_result(data_circle,0,0,sp_circle.labels_)
print("the first cluster successful")
data_moons = read_txt("moons.txt")
Z_moons = distanceMatrix(data_moons)
M_moons = adjacencyMatrix(Z_moons, 6)
H_moons = laplacianMatrix(M_moons,2)
sp_moons = KMeans(n_clusters=2).fit(H_moons)
show_result(data_moons, 1, 0, sp_moons.labels_)
print("the second cluster successful")
data_varied = read_txt("varied.txt")
Z_varied = distanceMatrix(data_varied)
M_varied = adjacencyMatrix(Z_varied, 30)
H_varied = laplacianMatrix(M_varied,3)
sp_varied = KMeans(init='k-means++',n_clusters=3,tol=1e-6).fit(H_varied)
show_result(data_varied, 2, 0, sp_varied.labels_)
print("the third cluster successful")
data_aniso = read_txt("aniso.txt")
Z_aniso = distanceMatrix(data_aniso)
M_aniso = adjacencyMatrix(Z_aniso, 20)
H_aniso = laplacianMatrix(M_aniso,3)
sp_aniso = KMeans(init="k-means++",n_clusters=3,tol=1e-6).fit(H_aniso)
show_result(data_aniso, 3, 0, sp_aniso.labels_)
print("the fourth cluster successful")
plt.show()
仿真结果 代码比较清晰,数据集参考KMeans的文章,自行下载
|