模型公式
线性回归、逻辑回归、泊松回归。。。各种各样的回归可以抽象出一个通用线性回归
通用线性回归模型 [Generalized Linear Models (GLM)]:
y
^
(
w
,
X
)
=
h
(
X
w
)
\hat y(w, X) = h(Xw)
y^?(w,X)=h(Xw)
不同的回归模型所对应的函数
h
h
h各不相同
标准的线性回归的
h
h
h函数则为常数1:
y
^
l
i
n
e
a
r
=
X
w
\hat y_{linear}=Xw
y^?linear?=Xw
Gamma回归所对应的函数
h
h
h则为
e
e
e的
y
^
l
i
n
e
a
r
\hat y_{linear}
y^?linear?次幂,完整的Gamma模型公式如下:
y
^
=
e
X
w
\hat y = e^{Xw}
y^?=eXw
损失函数
同样各种各样的回归都能抽象出来一个通用的损失函数,具体公式如下:
min
?
w
1
2
n
samples
∑
i
d
(
y
i
,
y
^
i
)
+
α
2
∣
∣
w
∣
∣
2
2
\min_{w} \frac{1}{2 n_{\text{samples}}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2^2
minw?2nsamples?1?∑i?d(yi?,y^?i?)+2α?∣∣w∣∣22?
- 其中,
∣
∣
w
∣
∣
2
||w||_2
∣∣w∣∣2?为L2范数,
α
\alpha
α则为L2的惩罚项
不同的回归模型所对应的函数
d
d
d各不相同
标准的线性回归函数
d
d
d则为,
d
(
y
i
,
y
^
i
)
=
(
y
?
y
^
)
2
d(y_i, \hat{y}_i)=(y-\hat{y})^2
d(yi?,y^?i?)=(y?y^?)2
且
α
=
0
\alpha=0
α=0
而Gamma回归模型所对应的函数
d
d
d则为:
d
(
y
i
,
y
^
i
)
=
2
(
log
?
y
^
y
+
y
y
^
?
1
)
d(y_i, \hat{y}_i)=2(\log\frac{\hat{y}}{y}+\frac{y}{\hat{y}}-1)
d(yi?,y^?i?)=2(logyy^??+y^?y??1)
代码实现Loss以及求导
当前章节则先用numpy实现求loss以及梯度 再借用Pytorch的自动求导机制实现求导 从而验证结果是否正确
import torch
import numpy as np
def grad(coef, X, y, y_pred, alpha=1):
offset = 1
lin_pred = X @ coef[offset:] + coef[0]
y_pred = np.exp(lin_pred)
d1 = np.exp(lin_pred)
coef_scaled = alpha * coef[offset:]
temp = d1 * -2 * (y - y_pred) / np.power(y_pred, 2)
devp = np.concatenate(([temp.sum()], temp @ X))
grad = 0.5 * devp
grad[offset:] += coef_scaled
return grad
def loss(coef, y, y_pred, alpha=1):
offset = 1
coef_scaled = alpha * coef[offset:]
dev = np.sum(2 * (np.log(y_pred / y) + y / y_pred - 1))
loss = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)
return loss
X = np.array([[1, 2], [2, 3], [3, 4], [4, 3]])
y = np.array([19, 26, 33, 30])
"""numpy的实现"""
coef = np.zeros(X.shape[1] + 1)
bias = np.log(np.average(y))
coef[0] = bias
linear_pred = X @ coef[1:] + coef[0]
gamma_pred = np.exp(linear_pred)
loss_np = loss(coef, y, gamma_pred, 1)
grad_np = grad(coef, X, y, gamma_pred, 1)[1:]
print(f"Loss: {loss_np} Grad: {grad_np}")
"""Pytorch实现进行验证"""
X = torch.from_numpy(X).double()
y = torch.from_numpy(y).double()
coef = torch.from_numpy(coef)
coef.requires_grad = True
linear_pred = X @ coef[1:] + coef[0]
gamma_pred = torch.exp(linear_pred)
coef_scaled = 1 * coef[1:]
dev = (2 * (torch.log(gamma_pred / y) + y / gamma_pred - 1)).sum()
loss_pt = 0.5 * dev + 0.5 * (coef[1:] @ coef_scaled)
loss_pt.backward()
grad_pt = coef.grad[1:].numpy()
print(f"Loss: {loss_pt} Grad: {grad_pt}")
assert loss_np == loss_pt
assert grad_np.all() == grad_pt.all()
完整代码实现
import numpy as np
from scipy.optimize import minimize
X = np.array([[1, 2], [2, 3], [3, 4], [4, 3]])
y = np.array([19, 26, 33, 30])
class GammaRegressor:
def __init__(self, alpha=1):
self.alpha = alpha
def init_model(self, X, y):
_, n_features = X.shape
coef = np.zeros(n_features + 1)
coef[0] = np.log(np.average(y))
return coef
def show_coef(self, coef):
print(f"[INFO] 当前模型: {coef}")
def fit(self, X, y):
"""训练"""
coef = self.init_model(X, y)
def cal_loss_and_grad(coef, obj, X, y):
offset = 1
lin_pred = X @ coef[offset:] + coef[0]
y_pred = np.exp(lin_pred)
coef_scaled = obj.alpha * coef[offset:]
dev = np.sum(2 * (np.log(y_pred / y) + y / y_pred - 1))
loss = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)
d1 = np.exp(lin_pred)
temp = d1 * -2 * (y - y_pred) / np.power(y_pred, 2)
devp = np.concatenate(([temp.sum()], temp @ X))
grad = 0.5 * devp
grad[offset:] += coef_scaled
print(f"[INFO] 当前梯度: {grad}")
print(f"[INFO] 当前loss: {loss}")
return loss, grad
args = (self, X, y)
opt_res = minimize(
cal_loss_and_grad,
coef,
method="L-BFGS-B",
jac=True,
options={
"maxiter": 5,
"disp": True
},
args=args,
callback=self.show_coef
)
coef = opt_res.x
return coef
gmr = GammaRegressor()
gmr.fit(X, y)
|