数据挖掘课程作业题,信息熵还用手算,我心想这不是累傻小子吗?
于是用python读表,让电脑自己算去。
应该根据算出来的结果选取最大值继续算的,懒得写了,先发上来,哪天心情好补上。
gain.py
import math
def I(s1, s2):
'''
:param s1: 值为1的数量
:param s2: 值为0的数量
:return: 返回期望值
'''
s = s1 + s2
if s1 == 0 or s2 == 0:
return 0
# print("s1 = {}, s2 = {}, s = {}".format(s1,s2,s))
ex = - (s1 / s) * math.log(s1 / s, 2) - (s2 / s) * math.log(s2 / s, 2)
print("I = {}".format(ex))
return ex
def E(s1, s2, s11, s21, s12, s22):
'''
:param s1: 属性1的数量
:param s2: 属性0的数量
:param s11: 需要计算的属性1中最终分类为1的数量
:param s21: 需要计算的属性1中最终分类为0的数量
:param s12: 需要计算的属性0中最终分类为1的数量
:param s22: 需要计算的属性0中最终分类为0的数量
:return: 返回计算出的根据属性A划分出的熵值
'''
s = s1 + s2
if s11+s12+s21+s22 != s:
print("Error param! Please check!")
exit(-1)
entropy = (s1 / s) * I(s11, s21) + (s2 / s) * I(s12, s22)
print("E(A) = {}".format(entropy))
return entropy
def Gain(s):
'''
:param s: 列表,含8个参数:
0 [长毛鸡数量] 1 [不长毛鸡数量]
2 [长毛鸡下蛋数量] 3 [长毛鸡不下蛋数量]
4 [不长毛鸡下蛋数] 5 [不长毛鸡不下蛋数]
6 [下蛋鸡总数] 7 [不下蛋鸡总数]
:return:
'''
if len(s) != 8:
print("The third param must be a list as 8 member")
exit(-1)
gain = I(s[6], s[7]) - E(s[0], s[1], s[2], s[3], s[4], s[5])
print("Gain = {}\n".format(gain))
return gain
主函数
main.py
import numpy as np
from gain import Gain
def read_csv():
p = r'./layegg.csv'
with open(p, encoding='utf-8') as f:
data = np.loadtxt(f, int, delimiter=",", skiprows=1)
return data
def find_col(tb):
s6,s7 = 0,0
for i in range(tb.shape[0]):
if tb[i][tb.shape[1] - 1] == 1:
s6 = s6 + 1 # 下蛋的鸡
else:
s7 = s7 + 1 # 不下蛋的鸡
gain_list = []
for j in range(1, tb.shape[1] - 1):
s11, s21, s12, s22, s_1, s_2 = 0, 0, 0, 0, 0, 0
for i in range(tb.shape[0]):
if tb[i][j] == 1: # 长毛
if tb[i][tb.shape[1] - 1] == 1: # 长毛鸡下蛋
s11 = s11 + 1
else: # 长毛鸡不下蛋
s21 = s21 + 1
s_1 = s_1 + 1 # 长毛鸡的数量
else: # 不长毛
if tb[i][tb.shape[1] - 1] == 1: # 不长毛鸡下蛋
s12 = s12 + 1
else:
s22 = s22 + 1
s_2 = s_2 + 1 # 不长毛鸡不下蛋
s_list = [s_1, s_2, s11, s21, s12, s22, s6, s7]
print(s_list)
gain_list.append([j, Gain(s_list)])
return gain_list
if __name__ == '__main__':
tb = read_csv()
gain_list = find_col(tb)
print(gain_list)
layegg.csv
NO | warmblooded | feathers | fur | swims | layeggs | 1 | 1 | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 1 | 1 | 3 | 1 | 1 | 0 | 0 | 1 | 4 | 1 | 1 | 0 | 0 | 1 | 5 | 1 | 0 | 0 | 1 | 0 | 6 | 1 | 0 | 1 | 0 | 0 | 7 | 1 | 1 | 0 | 0 | 1 | 8 | 0 | 0 | 0 | 1 | 1 | 9 | 1 | 1 | 0 | 0 | 1 | 10 | 1 | 1 | 1 | 0 | 1 |
|