2.7本章实训
#读取数据
import csv
f=open("D:\python test\white_wine.csv","r")
reader=csv.reader(f)
content=[]
for row in reader:
content.append(row)
f.close()
for i in range(5):
print(content[i])
#查看白葡萄酒总共分为几种品质等级
quality_list=[]
for row in content[1: ]:
quality_list.append(int(row[-1]))
quality_count=set(quality_list)
print("白葡萄酒共有%d种等级,分别是:%r"
%(len(quality_count),quality_count))
#按照白葡萄酒等级将数据集划分为7个子集,并统计每种等级的数量
content_dict={}
for row in content[1:]:
quality=int(row[-1])
if quality not in content_dict.keys():
#用字典保存每个子集
content_dict[quality]=[row]
else:
content_dict[quality].append(row)
for key in content_dict:
print(key,":",len(content_dict[key]))
#计算每个数据集中fixed acidity的均值(列表)
mean_list=[]
for key,value in content_dict.items():
sum=0
for row in value:
sum+=float(row[0]) #fixed acidity是第一列数据
mean_list.append((key,sum/len(value)))
for item in mean_list:
print(item[0],":",item[1])
#计算每个数据集中fixed acidity的均值(字典)
mean_dict={}
for key,value in content_dict.items():
sum=0
for row in value:
sum+=float(row[0])
mean_dict[key]=sum/len(value)
for key,mean in mean_dict.items():
print(key,":",mean)
#求方差
var_list=[]
for key,value in content_dict.items():
sum=0
mean=mean_dict[key]
for row in value:
sum+=(float(row[0])-mean)**2
var_list.append((key,sum/len(value)))
for item in var_list:
print(item[0],":",item[1])
|