波士顿房价数据链接:https://pan.baidu.com/s/1JPrcNl1AgNCKEHCjOGyHvQ? 提取码:1234
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics #评价函数库
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor #导入随机森林
from sklearn.model_selection import GridSearchCV #网格搜索验证
from sklearn import tree
import pydotplus #绘制随机森林
from IPython.display import Image,display #显示图像
%matplotlib inline #在当前环境中显示图像
df = pd.read_csv("D:/波士顿房价预测/boston_housing_data.csv")
df.dropna(inplace=True) #消除空值
x = df.drop(["MEDV"],axis = 1) #x选取前13个特征
y = df["MEDV"] #y选取房价
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 0)
#定义网格搜索
param_grid = {
"n_estimators":[5,10,20,100,200], #数值均可预设
"max_depth":[3,5,7],
"max_features":[0.6,0.7,0.8,1]
}
rf = RandomForestRegressor()
grid = GridSearchCV(rf,param_grid=param_grid,cv = 3) #在网格搜索前提下训练,调参助手——找到最优参数
grid.fit(x_train,y_train) #训练
grid.best_params_ #查看最好参数
data:image/s3,"s3://crabby-images/0a75e/0a75e57e2b417cab4ca370557fdb43e9dac4b23e" alt=""
model = grid.best_estimator_ #选中最好的参数作为模型参数
model
data:image/s3,"s3://crabby-images/a71d6/a71d6b3a190b5096afb975f75e2bce2515e768ab" alt=""
plt.figure(figsize=(20,20))
estimator = model.estimators_[9] #显示第9颗树
data = tree.export_graphviz(
estimator,
out_file=None,
filled=True,
rounded=True
)
graph = pydotplus.graph_from_dot_data(data)
graph
display(Image(graph.create_png()))
data:image/s3,"s3://crabby-images/283c2/283c21fb33de61c5faa0bea9a5ba43676cef91f2" alt=""
model.feature_importances_ #特征重要度分析,数值越大,影响越大
data:image/s3,"s3://crabby-images/aad8c/aad8cb502d22c2a0811dc397aa6c27283f88d2d9" alt=""
model.predict(x_test) #预测
data:image/s3,"s3://crabby-images/27dc8/27dc8563a199c61e44f93b34d2fda889ee36a60f" alt=""
#计算mse均分误差,开根号得均方根误差
MSE = metrics.mean_squared_error(y_test,model.predict(x_test))
MSE
data:image/s3,"s3://crabby-images/5c8ec/5c8ecd2faccb83e560d4b282dd158f83f8094b93" alt=""
|