主成分分析(PCA)
import numpy as np
import matplotlib.pyplot as plt
data = np.matrix([[2.5,2.4],[0.5,0.7],[2.2,2.9],[1.9,2.2],[3.1,3.0],
[2.3,2.7],[2,1.6],[1,1.1],[1.5,1.6],[1.1,0.9]])
average = np.mean(data,axis=0)
data_adjust = np.zeros((10,2))
for i in range(10):
for k in range(2):
if k == 0:
data_adjust[i, k] = data[i, k] - average.item(0)
else:
data_adjust[i, k] = data[i, k] - average.item(1)
cov = np.cov(data_adjust,rowvar=False)
eigenvalues, eigenvectors = np.linalg.eig(cov)
eigenvalues_max = eigenvalues.item(0)
i = 0;max = 0
for eigenvalue in eigenvalues:
if eigenvalues_max < eigenvalue:
eigenvalues_max = eigenvalue
max = i
i += 1
eigenvalues_max = eigenvectors[:,max]
eigenvalues_max = eigenvalues_max.reshape(-1,1)
final_data = np.dot(data_adjust,eigenvalues_max)
print(final_data)
实验结果:
[[-0.82797019]
[ 1.77758033]
[-0.99219749]
[-0.27421042]
[-1.67580142]
[-0.9129491 ]
[ 0.09910944]
[ 1.14457216]
[ 0.43804614]
[ 1.22382056]]
|