RandomForestClassifier算法对特征值筛选的应用
"""
Created on Tue Feb 15 22:07:32 2022
@author: 17584
随机森林
"""
import pandas as pd
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
df = pd.read_csv(url,header= None)
df.columns=map(str,[i for i in range(1,15)])
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
x,y = df.iloc[:,1:].values,df.iloc[:,0].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)
feat_labels = df.columns[1:]
forest = RandomForestClassifier(n_estimators=10000,random_state=0,n_jobs=-1)
forest.fit(x_train,y_train)
importances = forest.feature_importances_
import numpy as np
np.unique(df['1'])
indices = np.argsort(importances)[::-1]
for f in range(x_train.shape[1]):
print('(%.2d)%-*s %f' % (f + 1,30,feat_labels[indices[f]],importances[indices[f]]))
import matplotlib.pyplot as plt
plt.title('Feature Importance')
plt.bar(range(x_train.shape[1]),importances[indices],color='lightblue',align='center')
plt.xticks(range(x_train.shape[1]),feat_labels,rotation=90)
plt.xlim([-1,x_train.shape[1]])
plt.tight_layout()
plt.show( )
|