from sklearn import linear_model # 线性模型
前提准备好数据,推荐糖尿病数据集,但本案例是关于金融数据集,同样具有强相关性。
数据处理再次不多解释,然后选择切片训练集和测试集,一般数据量巨大9:1,数据量少8:2
# 训练集与测试集
# Split the data into training/testing sets
data_X_train = data_X[:-20]
data_X_test = data_X[-20:]
# Split the targets into training/testing sets
data_Y_train = target[:-20]
data_Y_test = target[-20:]
# 线性回归模型实例化
regr = linear_model.LinearRegression()
# 训练集训练数据
regr.fit(data_X_train, data_Y_train)
# 调用predict接口,使用训练好的模型对测试集的自变量进行预测
data_y_pred = regr.predict(data_X_test)
print('预测值:\n', data_y_pred)
# 线性回归系数
print('Coefficients线性回归系数:\n', regr.coef_)
# print('Mean squared error:%.2f' % mean_squared_error(data_Y_test, data_y_pred))
# print('Variance score:%.2f' % r2_score(data_Y_test, data_y_pred))
# mean_squared_error(diabetes_y_test, diabetes_y_pred)),均方误差,(真实值-预测值)之差->平方->求和->平均。
# 即线性回归的损失函数,这个损失函数越小越好
# print(mean_squared_error(data_Y_test, data_y_pred) / data_Y_test.mean())
# plot
# plot.scatter(data_X_test, data_Y_test, color='black')
# plot.plot(data_X_test, data_y_pred, color='blue', linewidth=2)
# plot.xticks(())
# plot.yticks(())
# plot.show()
|