首先导入包:
import matplotlib.pyplot as plt
import numpy as np
import pylab as mpl
from sklearn.datasets import make_blobs
默认设置:
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False
进行初始化:
n_samples = 1500
random_state = 170
k = 3
np.random.seed(26)
X, y = make_blobs(n_samples=n_samples, random_state=random_state)
ages = np.vstack((X[y == 0][:500], X[y == 1][:500], X[y == 2][:500]))
y = np.array(([0] * 500 + [1] * 500 + [2] * 500))
迭代初始化:
centers = np.zeros([3, 2])
centers_random = np.random.choice(range(len(y)), 3)
centers_new = ages[centers_random]
dis_to_cent = np.zeros((k, len(ages)))
实现预测:
while not (centers_new == centers).all():
centers = centers_new.copy()
for ii in range(k):
dis_to_cent[ii] = np.linalg.norm(ages - centers[ii], axis=1)
clusters = dis_to_cent.argmin(axis=0)
for ii in range(k):
cluster = ages[clusters == ii]
centers_new[ii] = ages[clusters == ii].mean(0)
print(centers, centers_new)
print(centers_new)
print('centers_new==centers?', (centers_new == centers).all())
|