回归算法
线性回归
API
代码示例
波士顿房价预测
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
data = load_boston()
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target)
transfer = StandardScaler()
x_train=transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
reg = LinearRegression()
reg.fit(x_train,y_train)
print(reg.coef_,reg.intercept_)
y_predict = reg.predict(x_test)
print(y_test)
print(y_predict)
x = list(range(len(y_test)))
plt.plot(x,y_test,'rx')
plt.plot(x,y_predict,'b-')
plt.show()
from sklearn.linear_model import SGDRegressor
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
data = load_boston()
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target)
transfer = StandardScaler()
x_train=transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
reg = SGDRegressor()
reg.fit(x_train,y_train)
print(reg.coef_,reg.intercept_)
y_predict = reg.predict(x_test)
print(y_test)
print(y_predict)
x = list(range(len(y_test)))
plt.plot(x,y_test,'rx')
plt.plot(x,y_predict,'b-')
plt.show()
回归模型评估
- 方法: 均方误差
- API
sklearn.metrics.mean_squared_error
mse = mean_squared_error(y_test,y_predict)
岭回归
实质上是一种改良的最小二乘估计法,通过放弃最小二乘法的无偏性,以损失部分信息、降低精度为代价获得回归系数更为符合实际、更可靠的回归方法,对病态数据的拟合要强于最小二乘法。
代码示例
波士顿房价预测
from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = load_boston()
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,random_state=0)
transfer = StandardScaler()
x_train=transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
reg = Ridge(max_iter=10000,alpha=0.8)
reg.fit(x_train,y_train)
print(reg.coef_,reg.intercept_)
y_predict = reg.predict(x_test)
mse = mean_squared_error(y_test,y_predict)
print('方差为:',mse)
x = list(range(len(y_test)))
plt.plot(x,y_test,'rx')
plt.plot(x,y_predict,'b-')
plt.show()
逻辑回归
y=1时,预测结果(横轴)越接近1,损失越小
y=0时,预测结果(横轴)越接近1,损失越大
- 损失函数
c
o
s
t
(
h
θ
(
x
)
,
y
)
=
∑
i
=
1
m
(
?
y
i
l
o
g
(
h
θ
(
x
)
)
?
(
1
?
y
i
)
l
o
g
(
1
?
h
θ
(
x
)
)
)
cost(h_\theta(x),y)=\sum^m_{i=1}(-y_ilog(h_\theta(x))-(1-y_i)log(1-h_\theta(x)))
cost(hθ?(x),y)=i=1∑m?(?yi?log(hθ?(x))?(1?yi?)log(1?hθ?(x)))
|