1. python 实现代码
def stepwise_select(data,label,cols_all,method='forward'):
'''
args:
data:数据源,df
label:标签,str
cols_all:逐步回归的全部字段
methrod:方法,forward:向前,backward:向后,both:双向
return:
select_col:最终保留的字段列表,list
summary:模型参数
AIC:aic
'''
import statsmodels.api as sm
if method == 'forward':
add_col = []
AIC_None_value = np.inf
while cols_all:
AIC = {}
for col in cols_all:
print(col)
X_col = add_col.copy()
X_col.append(col)
X = sm.add_constant(data[X_col])
y = data[label]
LR = sm.Logit(y, X).fit()
AIC[col] = LR.aic
AIC_min_value = min(AIC.values())
AIC_min_key = min(AIC,key=AIC.get)
if AIC_min_value < AIC_None_value:
cols_all.remove(AIC_min_key)
add_col.append(AIC_min_key)
AIC_None_value = AIC_min_value
else:
break
select_col = add_col
elif method == 'backward':
p = True
X_col = cols_all.copy()
X = sm.add_constant(data[X_col])
y = data[label]
LR = sm.Logit(y, X).fit()
AIC_None_value = LR.aic
while p:
AIC = {}
for col in cols_all:
print(col)
X_col = [i for i in cols_all if i!=col]
X = sm.add_constant(data[X_col])
LR = sm.Logit(y, X).fit()
AIC[col] = LR.aic
AIC_min_value = min(AIC.values())
AIC_min_key = min(AIC, key=AIC.get)
if AIC_min_value < AIC_None_value:
cols_all.remove(AIC_min_key)
AIC_None_value = AIC_min_value
p = True
else:
break
select_col = cols_all
elif method == 'both':
p = True
add_col = []
X_col = cols_all.copy()
X = sm.add_constant(data[X_col])
y = data[label]
LR = sm.Logit(y, X).fit()
AIC_None_value = LR.aic
while p:
AIC={}
for col in cols_all:
print(col)
X_col = [i for i in cols_all if i!=col]
X = sm.add_constant(data[X_col])
LR = sm.Logit(y, X).fit()
AIC[col] = LR.aic
AIC_min_value = min(AIC.values())
AIC_min_key = min(AIC, key=AIC.get)
if len(add_col) == 0:
if AIC_min_value < AIC_None_value:
cols_all.remove(AIC_min_key)
add_col.append(AIC_min_key)
AIC_None_value = AIC_min_value
p = True
else:
break
else:
for col in add_col:
print(col)
X_col = cols_all.copy()
X_col.append(col)
X = sm.add_constant(data[X_col])
LR = sm.Logit(y, X).fit()
AIC[col] = LR.aic
AIC_min_value = min(AIC.values())
AIC_min_key = min(AIC, key=AIC.get)
if AIC_min_value < AIC_None_value:
if AIC_min_key in add_col:
cols_all.append(AIC_min_key)
add_col = list(set(add_col)-set(AIC_min_key))
p = True
else:
cols_all.remove(AIC_min_key)
add_col.append(AIC_min_key)
p = True
AIC_None_value = AIC_min_value
else:
break
select_col = cols_all
X = sm.add_constant(data[select_col])
LR = sm.Logit(y, X).fit()
summary = LR.summary()
AIC = LR.aic
return select_col,summary,AIC
2. R 实现代码
library('xlsx')
data<-read.xlsx('C:\\Users\\dw\\Desktop\\ss1.xlsx',sheetName = 'Sheet1')
glm1<-glm(label~.,family = binomial(link = logit),data=data)
stepAIC(glm1,direction = 'both')
想了解更多,可以关注我哦~
|