```python
import openpyxl
import numpy as np
import matplotlib.pyplot as plt
import math
import random
import sympy
import os
def array_frequent(lst):
from collections import Counter
HF = Counter(lst).most_common(1)
return HF[0][0]
#数据打开文件路径
Tain_set = openpyxl.load_workbook(os.path.abspath('C:\\Users\\admin\\Desktop\\data.xlsx'))
#输入表格名file_data
# 要读该列的行数从1到row_end
def ReadInCol(file_data,ClassNum,row_end):
BoyData = []
GirData = []
ClassNum = ClassNum+1
for i in range(2, row_end):
Gender = file_data.cell(i,2).value # 读性别
if Gender :
BoyData.append(file_data.cell(i,ClassNum).value)
else:
GirData.append(file_data.cell(i,ClassNum).value)
return BoyData,GirData
#男女50米跑直方图显示
# sheet 数据字典
def Meter50_HistShow(sheet):
# 1性别 2籍贯 3身高 4体重 5鞋码 6(50米成绩) 7肺活量 8喜欢颜色 9喜欢运动 10喜欢文学
Boy50, Gir50 = ReadInCol(Tain_sheet,7,350) # 提取男女50米成绩数组
Boy50_Max = max(Boy50)
Boy50_Min = min(Boy50)
Gir50_Max = max(Gir50)
Gir50_Min = min(Gir50)
DataMax = max(Boy50_Max, Gir50_Max)
DataMin = min(Boy50_Min, Gir50_Min)
X_Show = np.linspace(DataMin, DataMax, round(round(DataMax - DataMin) * 2))
plt.hist(Boy50, X_Show, density=1, color='yellowgreen', histtype='bar', alpha=0.5, edgecolor='white', linewidth=4)
plt.hist(Gir50, X_Show, density=1, color='pink', histtype='bar', alpha=0.5, edgecolor='white', linewidth=4)
plt.xlabel('X_hight')
plt.ylabel('Y_Frequency')
plt.title('50MeterTime DistributionHist')
fig = plt.gcf()
plt.show()
#训练数据读取#(4)
Tain_sheet = Tain_set["Sheet1"]
#(1)男女50米跑直方图显示
Meter50_HistShow(Tain_sheet)
```
报错显示'>' not supported between instances of 'NoneType' and 'int'
一直报错,刚开始通过加print(max(Boy50))测试,发现还是报错。以为是max()问题,然后单独写了函数测试
list1 = [1,2,3,4,5,3,2,0]
print(type(list1))
print(max(list1))
发现执行没有问题
然后才想到'>'的问题,考虑到可能是数据缺失,写一部分进行处理,发现果然是这样。
用list.index(None)查找缺失值的位置,例子如下:
list1 = [1,None,3,4,5,3,2,0]
list2 = list1[1:4]
print(type(list2))
print(list1.index(None))
然后专门写了行处理数据缺失的命令
if file_data.cell(i,ClassNum).value != None:
然后正常运行
|