第三节-梯度下降【跟随 up 主 “刘二大人” 学习 pytorch】
前言
- 本专栏是我这个小菜鸡跟随 B 站 up 主
刘二大人 学习 pytorch 完成的课后作业,原视频请戳这里
题目
- 这一节没有题目,所以我就准备实现一下
随机梯度下降 和 批量梯度下降
分析
- 模型为
y
^
=
w
?
x
\hat{y}=w*x
y^?=w?x
- 训练数据这次用的多一点,取
x
∈
[
0.0
,
10.0
]
x\in[0.0, 10.0]
x∈[0.0,10.0] ,步长为
0.1 ,y 的话直接
y
=
2
?
x
y=2*x
y=2?x
X = np.arange(0.0, 10.0, 0.1)
y = X * 2
随机梯度下降
随机梯度下降 是指每次训练都只用 一个 样本,那关键也就在于这 一个 ,随机梯度下降 是与 批量梯度下降 相反的极端情况。- 这个方法的优点在于:
- 梯度下降
运行速度很快 ,内存开销小 ,支持使用海量数据集进行训练 - 损失函数不规则时(存在多个局部最小值),更有可能
跳过局部最小值 ,最终 接近全局最小值 - 存在跃过
鞍点 的可能性 - 实现方法就是
每次随机从样本中选择一个样本进行训练 ,通常每次训练前要 随机打乱训练集 ,代码如下
import numpy as np
import matplotlib.pyplot as plt
X = np.arange(0.0, 10.0, 0.1)
y = X * 2
w = 0.0
eta = 0.001
result = {
'w': [],
'loss': []
}
def predict(w, x):
return w * x
def loss(y, y_pred):
return (y_pred - y) ** 2
def gradient(x, y, y_pred):
return 2 * x * (y_pred - y)
for step in range(1, 10001):
index = np.random.randint(0, len(X))
y_pred = predict(w, X[index])
loss = loss(y[index], y_pred)
grad = gradient(X[index], y[index], y_pred)
result['w'].append(w)
result['loss'].append(loss)
print('\rEpoch: {:>5d}/{} [{}{}] w={:>.4f} loss={:>.4f}'.format(
step, 10000,
'■' * int(step/10000*20),
'□' * (20 - int(step/10000*20)),
w, loss
), end='')
w -= eta * grad
time.sleep(0.01)
plt.plot(result['w'], result['loss'], '-.', label='loss')
plt.xlabel('w')
plt.ylabel('loss')
plt.legend()
plt.show()
- 看一下训练的结果
- 可以看出来损失在反复竖跳,但幅值在逐渐减小
- 预测一下
In[]: print('Predict: x = %.2f --> y = %.4f' % (4.0, predict(w, 4.0)))
Out[]: Predict: x = 4.00 --> y = 8.0000
小批量梯度下降
小批量梯度下降 是指每次训练的样本是 整个训练样本的一小部分 ,这个方法结合了 批量梯度下降 和 随机梯度下降 的优点,虽然不如二者的优点突出,但毕竟集两家之长- 注意点:视具体情况指定
小批量的数量 ,代码如下:
import numpy as np
import matplotlib.pyplot as plt
X = np.arange(0.0, 10.0, 0.1)
y = X * 2
w = 0.0
eta = 0.001
result = {
'w': [],
'loss': []
}
def predict(w, X):
return w * X
def cost(y, y_pred):
return (((y_pred-y)) ** 2).sum() / y.shape[0]
def gradient(X, y, y_pred):
return (2 * X * (y_pred - y)).sum() / X.shape[0]
for step in range(1, 1001):
index = np.random.choice(range(0, X.shape[0]), 5)
train_x, train_y = X[index], y[index]
y_pred = predict(w, train_x)
cost = cost(train_y, y_pred)
result['w'].append(w)
result['cost'].append(cost)
print('\rEpoch: {:>3d}/{} [{}{}] w={:>.4f} loss={:>.4f}'.format(
step, 1000,
'■' * int(step/1000*20),
'□' * (20 - int(step/1000*20)),
w, cost
), end='')
gradient = gradient(train_x, train_y, y_pred)
w -= eta * gradient
data_index = list(range(len(X)))
np.random.shuffle(data_index)
X, y = np.array([X[i] for i in data_index]), np.array([y[i] for i in data_index])
time.sleep(0.01)
plt.plot(result['w'], result['cost'], '-.', label='cost')
plt.xlabel('w')
plt.ylabel('cost')
plt.legend()
plt.show()
- 看一下训练结果
In[]: print('Predict: x = %.2f --> y = %.4f' % (4.0, predict(w, 4.0)))
Out[]: Predict: x = 4.00 --> y = 8.0000
代码展示
批量梯度下降
class TotalGradient:
def __init__(self, w_initial=0.0, n_iter=1000, eta=0.005):
self.w = w_initial
self.n_iter = n_iter
self.eta = eta
self.delay = 0.1 ** (len(str(n_iter))-1) * n_iter / pow(10, (len(str(n_iter)))-1)
def __predict(self, w, X):
return w * X
def __cost(self, y, y_pred):
return ((y_pred - y) ** 2).sum() / y.shape[0]
def __gradient(self, X, y, y_pred):
return (2 * X * (y_pred - y)).sum() / X.shape[0]
def __process(self, data):
return data if isinstance(data, np.ndarray) else np.array(data)
def train(self, X, y):
X, y = self.__process(X), self.__process(y)
result = {
'w': [],
'loss': []
}
for step in range(1, self.n_iter+1):
y_pred = self.__predict(self.w, X)
loss = self.__cost(y, y_pred)
result['w'].append(self.w)
result['loss'].append(loss)
print('\rEpoch: {:>2d}/{:>2d} [{}{}] w={:>.4f} loss={:>.4f}'.format(
step, self.n_iter,
'■' * int(step/self.n_iter*20),
'□' * (20-int(step/self.n_iter*20)),
self.w, loss
), end='')
grad = self.__gradient(X, y, y_pred)
self.w -= self.eta * grad
time.sleep(self.delay)
plt.plot(result['w'], result['loss'], '-.', label='loss')
plt.xlabel('w')
plt.ylabel('loss')
plt.legend()
plt.show()
def predict(self, x):
pred = self.__predict(x, self.w)
print('Predict: x = %.2f --> y = %.4f' % (x, pred))
return pred
In[]: total = TotalGradient()
total.train(X, y)
Out[]: Epoch: 1000/1000 [■■■■■■■■■■■■■■■■■■■■] w=2.0000 loss=0.0000
In[]: total.predict(4)
Out[]: Predict: x = 4.00 --> y = 8.0000
随机梯度下降
class Scochastic_gradient:
def __init__(self, w_initial=0.0, n_iter=1000, eta=0.001):
self.w = w_initial
self.n_iter = n_iter
self.eta = eta
self.delay = 0.1 ** (len(str(n_iter))-1) * n_iter / pow(10, (len(str(n_iter)))-1)
def __predict(self, w, x):
return w * x
def __loss(self, y, y_pred):
return (y_pred - y) ** 2
def __gradient(self, x, y, y_pred):
return 2 * x * (y_pred - y)
def __process(self, data):
return data if isinstance(data, np.ndarray) else np.array(data)
def train(self, X, y):
X, y = self.__process(X), self.__process(y)
result = {
'w': [],
'loss': []
}
for step in range(1, self.n_iter + 1):
index = np.random.randint(0, len(X))
y_pred = self.__predict(X[index], self.w)
loss = self.__loss(y[index], y_pred)
result['w'].append(self.w)
result['loss'].append(loss)
print('\rEpoch: {:>{}d}/{} [{}{}] w={:>.2f} loss={:>.4f}'.format(
step, len(str(self.n_iter)), self.n_iter,
'■' * int(step/self.n_iter*20),
'□' * (20 - int(step/self.n_iter*20)),
self.w, loss
), end='')
grad = self.__gradient(X[index], y[index], y_pred)
self.w -= self.eta * grad
time.sleep(self.delay)
plt.plot(result['w'], result['loss'], '-.', label='loss')
plt.xlabel('w')
plt.ylabel('loss')
plt.legend()
plt.show()
def predict(self, x):
pred = self.__predict(x, self.w)
print('Predict: x = %.2f --> y = %.4f' % (x, pred))
return pred
In[]: total = TotalGradient(n_iter=10000)
total.train(X, y)
Out[]: Epoch: 10000/10000 [■■■■■■■■■■■■■■■■■■■■] w=2.00 loss=0.000012
In[]: total.predict(4)
Out[]: Predict: x = 4.00 --> y = 8.0000
小批量梯度下降
class Batch_gradient:
def __init__(self, w_initial=0.0, n_iter=1000, eta=0.001, batch_size=5):
self.w = w_initial
self.n_iter = n_iter
self.eta = eta
self.batch = batch_size
self.delay = 0.1 ** (len(str(n_iter))-1) * n_iter / pow(10, (len(str(n_iter)))-1)
def __predict(self, w, X):
return w * X
def __cost(self, y, y_pred):
return (((y_pred-y)) ** 2).sum() / y.shape[0]
def __gradient(self, X, y, y_pred):
return (2 * X * (y_pred - y)).sum() / X.shape[0]
def __process(self, data):
return data if isinstance(data, np.ndarray) else np.array(data)
def train(self, X, y):
X, y = self.__process(X), self.__process(y)
result = {
'w': [],
'cost': []
}
for step in range(1, self.n_iter+1):
index = np.random.choice(range(0, X.shape[0]), self.batch)
train_x, train_y = X[index], y[index]
y_pred = self.__predict(self.w, train_x)
cost = self.__cost(train_y, y_pred)
result['w'].append(self.w)
result['cost'].append(cost)
print('\rEpoch: {:>{}d}/{} [{}{}] w={:>.2f} loss={:>.4f}'.format(
step, len(str(self.n_iter)), self.n_iter,
'■' * int(step/self.n_iter*20),
'□' * (20 - int(step/self.n_iter*20)),
self.w, cost
), end='')
gradient = self.__gradient(train_x, train_y, y_pred)
self.w -= self.eta * gradient
data_index = list(range(len(X)))
np.random.shuffle(data_index)
X, y = np.array([X[i] for i in data_index]), np.array([y[i] for i in data_index])
time.sleep(self.delay)
plt.plot(result['w'], result['cost'], '-.', label='cost')
plt.xlabel('w')
plt.ylabel('cost')
plt.legend()
plt.show()
def predict(self, x):
pred = self.__predict(x, self.w)
print('Predict: x = %.2f --> y = %.4f' % (x, pred))
return pred
In[]: total = TotalGradient()
total.train(X, y)
Out[]: Epoch: 1000/1000 [■■■■■■■■■■■■■■■■■■■■] w=2.00 loss=0.000062
In[]: total.predict(4)
Out[]: Predict: x = 4.00 --> y = 8.0000
圆满完成!!!
结尾
以上就是我要分享的内容,因为学识尚浅,会有不足,还请各位大佬指正。 有什么问题也可在评论区留言。
|