from time import time
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error
data = np.loadtxt('C:/Users/Administrator.DESKTOP-BT0S13O/Desktop/数据1.csv',delimiter=',')
(括号内是路径文件)
RON = data[:,0]
factors = data[:,1:]
t0 = time()
forest= RandomForestRegressor(n_estimators=500,random_state=0,max_features=100,n_jobs=2)
X_train,X_test,y_train,y_test = train_test_split(factors,RON,test_size=0.2,shuffle=True,random_state=0)
#print("done?in?%0.3fs"(time()- t0))
forest.fit(X_train,y_train)
y_pred = forest.predict(X_train)
mse = mean_squared_error(y_train,y_pred)
print(mse)
print("Traing Score:%f" %forest.score(X_train, y_train))
print("Testing Score:%f" %forest.score(X_test, y_test))
importances = forest.feature_importances_
print('每个维度对应的重要性因子:\n',importances)
print(sorted(importances,reverse=True))
np.argsort(-importances)
print(sorted(importances*100,reverse=True))
indices = np.argsort(importances)[::-1]# a[::-1]让a逆序输出
print('得到按维度重要性因子排序的维度的序号:\n',indices)
most_import = indices[:100]#取最重要的100个
print(X_train,most_import)
|