计算指标权重的经典算法之一,用来判断某个指标的离散程度。离散程度越大,即信息量越大,不确定性就越小,熵也就越小;信息量越小,不确定性越大,熵也越大。根据熵的特性,通过计算熵值来判断一个事件的随机性及无序程度,也可以用熵值来判断某个指标的离散程度,指标的离散程度越大,该指标对综合评价的影响越大.
python代码
import pandas as pd
import numpy as np
from numpy import array
doctor = pd.read_csv(r'D:\WorkSpace\PythonWork\Python学习\数据挖掘Baseline\熵权法实例.csv')
index = doctor['科室']
doctor = doctor.drop(['科室'],axis = 1)
def cal_weight(x):
'''熵值法计算变量的权重'''
x = x.apply(lambda x: ((x - np.min(x)) / (np.max(x) - np.min(x))))
rows = x.index.size
cols = x.columns.size
k = 1.0 / math.log(rows)
lnf = [[None] * cols for i in range(rows)]
x = array(x)
lnf = [[None] * cols for i in range(rows)]
lnf = array(lnf)
for i in range(0, rows):
for j in range(0, cols):
if x[i][j] == 0:
lnfij = 0.0
else:
p = x[i][j] / x.sum(axis=0)[j]
lnfij = math.log(p) * p * (-k)
lnf[i][j] = lnfij
lnf = pd.DataFrame(lnf)
E = lnf
d = 1 - E.sum(axis=0)
w = [[None] * 1 for i in range(cols)]
for j in range(0, cols):
wj = d[j] / sum(d)
w[j] = wj
w = pd.DataFrame(w)
w.columns = ['weight']
w.index = doctor.columns
return w
w = cal_weight(doctor)
print(w)
print('运行完成!')
计算得分
array1 = np.array(doctor)
array2 = np.array(w)
score = array1.dot(array2)
score = pd.DataFrame(score)
score.columns = ['score']
score.index = index
print(score)
|