1、使用决策树预测隐形眼镜类型,隐形眼镜数据集(lenses.csv)是非常著名的数据集,它包含很多患者眼部状况的观察 条件以及医生推荐的隐形眼镜类型。隐形眼镜类型包括硬材质、软材质以及不适合佩戴隐形眼镜。 要求:读取lenses.csv中的隐形眼镜数据集,构造决策树和画出决策树.(采用cart算法构造决策树) 预测[‘young’,‘hyper’,‘no’,‘normal’]适合戴那种隐形眼镜
from sklearn import tree
import csv
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
featureList = []
labels = []
reader = csv.DictReader(open('lenses.csv','r'))
for x in reader:
value = {"age":x["age"],"prescript":x["prescript"],"astigmatic":x["astigmatic"],"tearRate":x["tearRate"]}
labelV = x["class"]
featureList.append(value)
labels.append(labelV)
vec = DictVectorizer()
featureData = vec.fit_transform(featureList).toarray()
print('特征名称:',vec.get_feature_names())
model = tree.DecisionTreeClassifier(criterion='gini',max_depth=3)
model.fit(featureData,labels)
import graphviz
dot_data = tree.export_graphviz(model,
out_file=None,
feature_names=vec.get_feature_names(),
class_names=['hard','no lenses','soft'],
filled=True,
rounded=True,
special_characters=False)
graph = graphviz.Source(dot_data)
graph
prediction = model.predict([[0,0,1,1,0,1,0,1,0]])
print(prediction)
[‘soft’]
2、读取play.csv文件的内容(outlook(天气),TEMPERATURE(温度)、HUMIDITY(湿度),WINDY(风)代表四个特征。 最后一列(play)代表类别,即是否出去打球),根据play.csv的数据构造决策树以及训练模型,画出决策树。
from sklearn import tree
import csv
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
import numpy as np
featureList = []
labels = []
reader = csv.DictReader(open('play.csv','rt'))
dataset0 = np.zeros((14,2))
i=0
for x in reader:
value = {"outlook":x["outlook"],"WINDY":x["WINDY"]}
labelV = x["PLAY"]
featureList.append(value)
labels.append(labelV)
dataset0[i]=[x['TEMPERATURE'],x['HUMIDITY']]
i = i+1
print('labels:',labels)
print(dataset0)
vec = DictVectorizer()
featureData = vec.fit_transform(featureList).toarray()
print('特征名称:',vec.get_feature_names())
vec.get_feature_names().append('TEMPERATURE')
vec.get_feature_names().append('HUMIDITY')
print(vec.get_feature_names())
x_data = np.append(featureData,dataset0,axis=1)
print(x_data)
lb = preprocessing.LabelBinarizer()
y_data = lb.fit_transform(labels)
print(lb.classes_)
model = tree.DecisionTreeClassifier(criterion='gini')
model.fit(x_data,y_data)
import graphviz
dot_data = tree.export_graphviz(model,
out_file=None,
feature_names=vec.get_feature_names(),
class_names=lb.classes_,
filled=True,
rounded=True,
special_characters=False)
graph = graphviz.Source(dot_data)
graph
|