import numpy as np
import pandas as pd
import os
os.chdir("C:\\Users\\Administrator\\Desktop")
data = pd.read_excel("car.xlsx")
datah = data.head(6)
print(datah)
a1 a2 a3 a4 a5 a6 d
0 4 4 2 2 3 2 3
1 4 4 2 2 3 3 3
2 4 4 2 2 3 1 3
3 4 4 2 2 2 2 3
4 4 4 2 2 2 3 3
5 4 4 2 2 2 1 3
x = data.iloc[:1690,:6].values
y = data.iloc[:1690,6].values
x1= data.iloc[1691:,:6].values
y1= data.iloc[1691:,6].values
from sklearn import svm
clf = svm.SVC(kernel='rbf')
clf.fit(x, y)
rv=clf.score(x, y)
R=clf.predict(x1)
Z=R - y1
Rs=len(Z[Z==0])/len(Z)
print('预测结果为:',R)
print('预测准确率为:',Rs)
预测结果为: [4 3 1 1 3 1 4 3 1 4 3 3 3 3 3 3 3 3 3 3 3 1 3 1 4 3 1 4 3 3 1 3 1 4 3 1 4]
预测准确率为: 0.7027027027027027
def K_mean(data,knum):
import pandas as pd
import numpy as np
p=len(data[0,:])
cluscenter=np.zeros((knum,p))
lastcluscenter=np.zeros((knum,p))
for i in range(knum):
cluscenter[i,:]=data[i,:]
lastcluscenter[i,:]=data[i,:]
clusindex=np.zeros((len(data)))
while 1:
for i in range(len(data)):
sumsquare=np.zeros((knum))
for k in range(knum):
sumsquare[k]=sum((data[i,:]-cluscenter[k,:])**2)
sumsquare=np.sqrt(sumsquare)
s=pd.Series(sumsquare).sort_values()
clusindex[i]=s.index[0]
clusdata=np.hstack((data,clusindex.reshape((len(data),1))))
for i in range(knum):
cluscenter[i,:]=np.mean(clusdata[clusdata[:,p]==i,:-1],0).reshape(1,p)
t=abs(lastcluscenter-cluscenter)
if sum(sum(t))==0:
return clusdata
break
else:
for k in range(knum):
lastcluscenter[k,:]=cluscenter[k,:]
D=pd.read_excel('D.xlsx',header=None)
D=D.values
r=K_mean(D,2)
x0=r[r[:,2]==0,0]
y0=r[r[:,2]==0,1]
x1=r[r[:,2]==1,0]
y1=r[r[:,2]==1,1]
import matplotlib.pyplot as plt
plt.plot(x0,y0,'r*')
plt.plot(x1,y1,'bo')
[<matplotlib.lines.Line2D at 0x23d6cf71760>]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-PwWjOiyA-1651217914699)(output_5_1.png)]
data=pd.read_excel('农村居民人均可支配收入来源2016.xlsx')
X=data.iloc[:,1:]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X=scaler.transform(X)
from sklearn.cluster import KMeans
model = KMeans(n_clusters = 4, random_state=0, max_iter = 500)
model.fit(X)
c=model.labels_
Fs=pd.Series(c,index=data['地区'])
Fs=Fs.sort_values(ascending=True)
print(Fs)
地区
新疆 0
云南 0
山东 0
西藏 0
福建 0
黑龙江 0
海南 0
辽宁 0
内蒙古 0
河北 0
吉林 0
浙江 1
天津 1
江苏 1
甘肃 2
重庆 2
陕西 2
青海 2
贵州 2
四川 2
河南 2
广东 2
湖南 2
湖北 2
宁夏 2
江西 2
安徽 2
山西 2
广西 2
上海 3
北京 3
dtype: int32
|