梯度下降 在求解损失函数的最小值时,可以通过梯度下降法来一步步的迭代求解,得到最小化的损失函数和模型参数值。 𝑙𝑜𝑠𝑠 = 𝑥2 ? sin(𝑥) 线性方程 𝑦 = 𝑤 ? 𝑥 + 𝑏 例如 1.567 = w * 1 + b 3.043 = w * 2 + b 我们可以解的W = 1.477,B = 0.089 但是在现实生活中存在一定的噪声,所以我们任需要引入一个噪声来模拟 𝑦 = 𝑤 ? 𝑥 + 𝑏 + 𝜖 𝜖 ∽ 𝑁(0.01, 1)(符合正态分布) 所以损失函数𝑙𝑜𝑠𝑠 = 𝑊𝑋 + 𝑏 ? 𝑦 2 计算步骤 1、𝑙𝑜𝑠𝑠 = σ𝑖 𝑤 ? 𝑥𝑖 + 𝑏 ? 𝑦𝑖 2 2、Minimize 𝑙𝑜𝑠𝑠 3、𝑤′ ? 𝑥 + 𝑏′ → 𝑦
def error_Sum(b,w,points):
totalError=0
for i in range(0,len(points)):
x=points[i,0]
y=points[i,1]
totalError+=(y-(w*x+b))**2
return totalError/float(len(points))
def step_gradient(b_current,w_current,points,learningRate):
b_gradient=0
w_gradient=0
N=float(len(points))
for i in range(0,len(points)):
x=points[i,0]
y = points[i, 1]
b_gradient+=(2/N)*(((w_current*x)+b_current)-y)
w_gradient+=(2/N)*x*(((w_current*x)+b_current)-y)
new_b=b_current-learningRate*b_gradient
new_w=w_current-learningRate*w_gradient
return [new_b,new_w]
def gradient_run(points,starting_b,starting_w,learning_rate,num_iterations):
b=starting_b
w=starting_w
for i in range(num_iterations):
b,w=step_gradient(b,w,np.array(points),learning_rate)
return [b,w]
def run():
points = np.genfromtxt("data.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0
initial_w = 0
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
error_Sum(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_run(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
error_Sum(b, w, points))
)
最终完整代码
import numpy as np
def error_Sum(b,w,points):
totalError=0
for i in range(0,len(points)):
x=points[i,0]
y=points[i,1]
totalError+=(y-(w*x+b))**2
return totalError/float(len(points))
def step_gradient(b_current,w_current,points,learningRate):
b_gradient=0
w_gradient=0
N=float(len(points))
for i in range(0,len(points)):
x=points[i,0]
y = points[i, 1]
b_gradient+=(2/N)*(((w_current*x)+b_current)-y)
w_gradient+=(2/N)*x*(((w_current*x)+b_current)-y)
new_b=b_current-learningRate*b_gradient
new_w=w_current-learningRate*w_gradient
return [new_b,new_w]
def gradient_run(points,starting_b,starting_w,learning_rate,num_iterations):
b=starting_b
w=starting_w
for i in range(num_iterations):
b,w=step_gradient(b,w,np.array(points),learning_rate)
return [b,w]
def run():
points = np.genfromtxt("data.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0
initial_w = 0
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
error_Sum(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_run(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
error_Sum(b, w, points))
)
if __name__=='__main__':
run()
|