前言
本文是训练多层感知机的代码实现,具体原理及推导请看BP神经网络原理(详细推导)。
提示:以下是本篇文章正文内容,下面案例可供参考
第一步.读取数据
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
def getMNIST(kind = 'train'):
path = os.getcwd()
labels_path = os.path.join(path,'%s-labels-idx1-ubyte' % kind)
images_path = os.path.join(path,'%s-images-idx3-ubyte' % kind)
with open(labels_path,'rb') as lbpath:
magic, n = struct.unpack('>II',
lbpath.read(8))
labels = np.fromfile(lbpath,dtype = np.uint8)
with open(images_path,'rb') as imgpath:
magic, num, rows, cols = struct.unpack('>IIII',
imgpath.read(16))
images = np.fromfile(imgpath,dtype = np.uint8).reshape(len(labels),rows*cols)
images = ((images/255)-0.5)*2
return images,labels
X_train,y_train = getMNIST(kind = 'train')
X_test,y_test = getMNIST(kind = 't10k')
第二步.搭建多层感知机
class MutiLayerNeuralNetWork(object):
def __init__(self,
l2 = 0.01,
eta = 0.001,
epochs = 10,
shuffle = True,
Random_seed = 1,
hidden_unit = 100,
minibatch_size = 200,):
self.l2 = l2
self.eta = eta
self.epochs = epochs
self.shuffle = shuffle
self.hidden_unit = hidden_unit
self.minibatch_size = minibatch_size
self.random = np.random.RandomState(Random_seed)
self.cost = {'Cost': []}
def ReadData(self,X_train,y_train,X_test,y_test):
self.X_train = X_train
self.y_train = y_train
self.X_trian = X_test
self.y_test = y_test
return self
def OneHot(self,y):
num_samples = y.shape[0]
num_classes = np.unique(y).shape[0]
y_onehot = np.zeros((num_samples,num_classes))
for i,j in enumerate(y):
y_onehot[i,j]= 1
return y_onehot
def Initialization(self,X,y):
num_feature = X.shape[1]
num_hidden = self.hidden_unit
self.weight_hidden = np.random.normal(loc=0.0, scale=1.0, size=(num_feature,num_hidden))
self.bias_hidden = np.zeros(shape = (1,num_hidden))
num_hidden = self.hidden_unit
num_output = np.unique(y).shape[0]
self.weight_output = np.random.normal(loc=0.0, scale=1.0, size=(num_hidden,num_output))
self.bias_output = np.zeros(shape = (1,num_output))
return self
def Sigmoid(self,z):
return 1./(1+np.exp(-np.clip(z,-200,200)))
def SigmoidDerivative(self,z):
return self.Sigmoid(z)*(1-self.Sigmoid(z))
def ComputeCost(self, y_enc, output):
L2_term = (self.l2 *
(np.sum(self.weight_hidden ** 2.) +
np.sum(self.weight_output ** 2.)))
term1 = -y_enc * (np.log(output))
term2 = (1. - y_enc) * np.log(1. - output)
cost = np.sum(term1 - term2) + L2_term
return cost
def ForwardPropagation(self,X):
z_hidden = np.dot(X,self.weight_hidden) + self.bias_hidden
a_hidden = self.Sigmoid(z_hidden)
z_output = np.dot(a_hidden,self.weight_output) + self.bias_output
a_output = self.Sigmoid(z_output)
return z_hidden,a_hidden,z_output,a_output
def BackPropagation(self,X,y):
y_enc = self.OneHot(y)
for epoch in range(self.epochs):
indices = np.arange(X.shape[0])
if self.shuffle:
np.random.shuffle(indices)
for start_indx in range(0,X.shape[0] + 1 - self.minibatch_size,self.minibatch_size):
minibatch_indx = indices[start_indx : start_indx+self.minibatch_size + 1]
minibatch_X = X[minibatch_indx]
minibatch_y_enc = y_enc[minibatch_indx]
z_hidden,a_hidden,z_output,a_output = self.ForwardPropagation(minibatch_X)
Sigmoid_prime_output = self.SigmoidDerivative(z_output)
delta_output = (a_output - minibatch_y_enc)
Sigmoid_prime_hidden = self.SigmoidDerivative(z_hidden)
delta_hidden = np.dot(delta_output,self.weight_output.T) * Sigmoid_prime_hidden
gradient_output = np.dot(a_hidden.T,delta_output)
gradient_bias_output = np.sum(delta_output,axis = 0)
gradient_hidden = np.dot(minibatch_X.T,delta_hidden)
gradient_bias_hidden = np.sum(delta_hidden,axis = 0)
Datla_output = gradient_output + self.l2 * self.weight_output
Delta_bias_output = gradient_bias_output
self.weight_output -= self.eta*Datla_output
self.bias_output -= self.eta*Delta_bias_output
Datla_hidden = gradient_hidden + self.l2 * self.weight_hidden
Delta_bias_hidden = gradient_bias_hidden
self.weight_hidden -= self.eta*Datla_hidden
self.bias_hidden -= self.eta*Delta_bias_hidden
z_hidden,a_hidden,z_output,a_output = self.ForwardPropagation(X)
cost = self.ComputeCost(y_enc, a_output)
self.cost['Cost'].append(cost)
return self
第三步.训练
if __name__ == "__main__":
MLP = MutiLayerNeuralNetWork(
l2 = 0.01,
eta = 0.001,
epochs = 50,
shuffle = True,
Random_seed = 1,
hidden_unit = 100,
minibatch_size = 200,)
MLP.ReadData(X_train,y_train,X_test,y_test)
MLP.Initialization(X_train,y_train)
MLP.BackPropagation(X_train,y_train)
第四步.可视化误差函数
plt.figure(figsize=(8,5),dpi=110)
plt.plot(range(MLP.epochs), MLP.cost['Cost'],color = 'firebrick')
plt.title('Cost and Epochs')
plt.ylabel('Cost')
plt.xlabel('Epochs')
plt.show()
总结
本文的数据集来自MNIST,读者可以从我的GitHub上下载(另其他机器学习模型的代码):Sunsky的Github。
|