k-近邻居算法 识别数字?
k-近邻算法
有已经分类好的训练集,和一些需要你去分类的数据(暂且叫做目标)。欲分类,只需要算出 目标与各个训练集的距离,在找出距离最近的 k 个训练数据,这k个数据中哪种数据多,就将目标确定为什么种类。
?
数字样式如下:我们有数百个这种记录数字形状的txt文档,样式分别是1~9
运用这些文档对计算机进行训练,使之以后可以自动判断此类文档表示什么数字
我的代码展示,判断准确率为90%,有待提高
?
import os
import numpy as np
list_path = os.listdir('D:\\aaa编程之pycharm\\要来了吗,机器学习!\\k-近邻\\testDigits')
check_path = os.listdir('D:\\aaa编程之pycharm\\要来了吗,机器学习!\\k-近邻\\测试集')
print(check_path)
def read_allfile(list_path):
list_number = len(list_path)
returnData = np.zeros((list_number,32*32))
list_order = 0
returnlabel = [] # 训练集标签
for filepath in list_path:
fr = open('D:\\aaa编程之pycharm\\要来了吗,机器学习!\\k-近邻\\testDigits\\'+filepath)
returnlabel.append(filepath[0])
for i in range(32):
line = fr.readline()
for j in range(32):
returnData[list_order,32*i+j] = int(line[j])
list_order += 1
return returnData,returnlabel
def read_allfile2(list_path):
list_number = len(list_path)
returnData = np.zeros((list_number,32*32))
list_order = 0
returnlabel = [] # 训练集标签
for filepath in list_path:
fr = open('D:\\aaa编程之pycharm\\要来了吗,机器学习!\\k-近邻\\测试集\\'+filepath)
returnlabel.append(filepath[0])
for i in range(32):
line = fr.readline()
for j in range(32):
returnData[list_order,32*i+j] = int(line[j])
list_order += 1
return returnData,returnlabel
train_Data,train_label = read_allfile(list_path)
check_Data,check_label = read_allfile2(check_path)
print(check_Data)
print(check_label)
def classify(train_data,check_data,train_labels,k):
train_number = train_data.shape[0]
gass_label = []
for line in check_data:
temp_Mat = np.tile(line,(train_number,1))
temp_Mat = (temp_Mat-train_data)**2
distance = temp_Mat.sum(axis = 1)
sort_number = distance.argsort()
dict = {}
for j in range(k):
dict[train_labels[sort_number[j]]] = dict.get(train_labels[sort_number[j]],0) + 1
# print(dict)
dict = list(dict.items())
# print(dict)
temp_Mat = dict[0]
for max in dict:
if max[1] >= temp_Mat[1]:
temp_Mat = max
gass_label.append(temp_Mat[0])
return gass_label
answer = classify(train_Data,check_Data,train_label,50)
print(answer)
def check(true,gass):
chang = len(true)
right_number = 0
for i in range(chang):
if true[i] == gass[i]:
right_number += 1
return right_number/chang
how = check(check_label,answer)
print(how)
|