例1 :实现简单的BP神经网络
****代码流程****
输入(Input):输入层输入向量
向前传播 (Feed Forward)
输出层误差(Output Error)
反向传播误差(Back propagate Error):
隐藏层误差 输出(Output):
输出损失函数的偏置
import numpy as np
import pprint
pp=pprint.PrettyPrinter(indent=4)
network_sizes=[3,4,2]
sizes=network_sizes
num_layers=len(sizes)
biases=[np.random.randn(h,1) for h in sizes[1:]]
weights=[np.random.randn(y,x) for x,y in zip(sizes[:-1],sizes[1:])]
'''
返回损失函数的偏导,损失函数使用MSE(均方误差)
L=1/2(network_y-real_y)^2
delta_L=network_y-real_y
'''
def loss_der(network_y,real_y):
return (network_y-real_y)
''' 激活函数用sigmoid '''
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
''' sigmoid函数的导数 derivative of sigmoid'''
def sigmoid_der(z):
return sigmoid(z)*(1-sigmoid(z))
'''
根据损失函数,C通过反向传播算法返回
return a tuple "(nabla_b,nabla_w)" representing the gradient for
the cost function C_x."nabla_b" and "nabla_w" are layer-by-layer lists
of numpy arrays,similar to "self.biases"and"self.weights"
'''
def backprop(x,y):
delta_w=[np.zeros(w.shape) for w in weights]
delta_b=[np.zeros(b.shape) for b in biases]
activation= x
activations=[x]
zs=[]
for w,b in zip(weights,biases):
z=np.dot(w,activation)+b
activation=sigmoid(z)
activations.append(activation)
zs.append(z)
delta_L=loss_der(activations[-1],y)*sigmoid_der(zs[-1])
delta_b[-1]=delta_L
delta_w[-1]=np.dot(delta_L, activations[-2].transpose())
delta_l=delta_L
for l in range(2,num_layers):
z=zs[-1]
sp=sigmoid_der(z)
delta_l = np.dot(weights[-l + 1].transpose(), delta_l) * sp
delta_b[-1]=delta_l
delta_w[-1]=np.dot(delta_l,activations[-l-1].transpose())
return (delta_w,delta_b)
'''
输入(input):输入层输入向量
向前传播(feed forward)
输出层误差(output error)
反向传播误差(back propagate error):隐藏层误差
输出(output):输出损失函数的偏置
'''
training_x=np.random.rand(3).reshape(3,1)
training_y=np.array([0,1]).reshape(2,1)
print("training data x:\n{},\n training data y:\n{}".format(training_x,training_y))
delta_w, delta_b=backprop(training_x,training_y)
print("delta_w:\n{},\n delta_b:\n{}".format(delta_w, delta_b))
运行结果:
例2 :医疗数据诊断
from sklearn import linear_model
from sklearn import datasets
import sklearn
import numpy as np
import matplotlib.pyplot as plt
class Config:
input_dim=2
output_dim=2
epsilon=0.01
reg_lambda=0.01
def generate_data():
np.random.seed(0)
X,y=datasets.make_moons(200,noise=0.20)
return X,y
def display_model(model):
print("W1 {}: \n{}\n".format(model['W1'].shape,model['W1']))
print("b1 {}: \n{}\n".format(model['b1'].shape,model['b1']))
print("W2 {}: \n{}\n".format(model['W2'].shape,model['W2']))
print("b1 {}: \n{}\n".format(model['b2'].shape,model['b2']))
def plot_decision_boundary(pred_func,data,labels):
x_min,x_max=data[:,0].min()-0.5,data[:,0].max()+0.5
y_min,y_max=data[:,1].min()-0.5,data[:,1].max()+0.5
h=0.01
xx,yy=np.meshgrid(np.arange(x_min,x_max,h),
np.arange(y_min,y_max,h))
z=pred_func(np.c_[xx.ravel(),yy.ravel()])
z=z.reshape(xx.shape)
plt.contourf(xx,yy,z,cmap=plt.cm.Spectral,alpha=0.2)
plt.scatter(data[:,0],data[:,1],s=40,c=labels,cmap=plt.cm.Spectral)
'''损失函数'''
def calculate_loss(model,X,y):
num_examples=len(X)
W1,b1,W2,b2=model['W1'],model['b1'],model['W2'],model['b2']
z1=X.dot(W1)+b1
a1=np.tanh(z1)
z2=a1.dot(W2)+b2
exp_scores=np.exp(z2)
probs=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
corect_logprobs=-np.log(probs[range(num_examples),y])
data_loss=np.sum(corect_logprobs)
data_loss+=Config.reg_lambda/2 * \
(np.sum(np.square(W1))+np.sum(np.square(W2)))
return 1./num_examples * data_loss
'''预测函数'''
def predict(model,x):
W1,b1,W2,b2=model['W1'],model['b1'],model['W2'],model['b2']
z1=x.dot(W1)+b1
a1=np.tanh(z1)
z2=a1.dot(W2)+b2
exp_scores=np.exp(z2)
probs=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
return np.argmax(probs,axis=1)
'''网络学习函数,并返回网络
nn_hdim:隐层的神经元节点(隐层数目)
num_passes:梯度下降迭代次数
print_loss:是否显示损失函数值
'''
def ANN_modle(X,y,nn_hdim,num_passes=20000,print_loss=False):
num_examples=len(X)
model={}
np.random.seed(0)
W1=np.random.randn(Config.input_dim,nn_hdim)/np.sqrt(Config.input_dim)
b1=np.zeros((1,nn_hdim))
W2=np.random.randn(nn_hdim,Config.output_dim)/np.sqrt(nn_hdim)
b2=np.zeros((1,Config.output_dim))
display_model({'W1':W1,'b1':b1,'W2':W2,'b2':b2})
for i in range(0,num_passes+1):
z1=X.dot(W1)+b1
a1=np.tanh(z1)
z2=a1.dot(W2)+b2
exp_scores=np.exp(z2)
probs=exp_scores / np.sum(exp_scores,axis=1,keepdims=True)
delta3=probs
delta3[range(num_examples),y]-=1
delta2=delta3.dot(W2.T)*(1-np.power(a1,2))
dW2=(a1.T).dot(delta3)
db2=np.sum(delta3,axis=0,keepdims=True)
dW1=np.dot(X.T,delta2)
db1=np.sum(delta2,axis=0)
dW1 +=Config.reg_lambda*W1
dW2 +=Config.reg_lambda*W2
W1+=-Config.epsilon * dW1
b1+=-Config.epsilon * db1
W2+=-Config.epsilon * dW2
b2+=-Config.epsilon * db2
model={'W1':W1,'b1':b1,'W2':W2,'b2':b2}
if print_loss and i %1000==0:
print("Loss after iteration %i:%f"%(i,calculate_loss(model,X,y)))
return model
data,labels=generate_data()
model=ANN_modle(data,labels,3,print_loss=3)
print(display_model(model))
plot_decision_boundary(lambda x:predict(model,x),data,labels)
plt.title("hidden Layer size 3")
plt.show()
实验结果:
|