[人工智能] 吴恩达神经网络-数字识别正向传播和反向传播整合 python实现

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 吴恩达神经网络-数字识别正向传播和反向传播整合 python实现 -> 正文阅读

[人工智能]吴恩达神经网络-数字识别正向传播和反向传播整合 python实现

主函数

不懂的同学可以直接cv尝试理解哦

import time
from Four_Week.Regularized import regularized_cost, regularized_gradient
from Four_Week.Tool import random_init, serialize, deserialize, accuracy
from Three_Week.dispaly_Data import display_data
from Three_Week.predict import predict
import numpy as np
import scipy.io as scio
import scipy.optimize as opt
picture_DataFile = 'ex3data1.mat'
picture_Data = scio.loadmat(picture_DataFile)
X=picture_Data['X']#X:5000x400  图像是20x20的灰色图像，把一个图像展开成一维向量，有5000个图像，所以是5000x400


y=picture_Data['y'].flatten()#X每一行代表的数字,y:5000x1
print(y)
ylabel=[]
for i in range(y.size):
    zeros = np.zeros(10)
    zeros[y[i]-1]=1
    ylabel.append(zeros)
yarray=np.array(ylabel)#yarray:5000x10，由于是多元输出，
init_theta = random_init(10285)  # 25*401 + 10*26
res = opt.minimize(fun=regularized_cost,
                       x0=init_theta,#theta1,theta2展开合成的一维向量，算法需要
                       args=(X, yarray),#X:5000x10 yarray:5000x10
                       method='TNC',
                       jac=regularized_gradient,
                       options={'maxiter': 400})#（x0 args） 都是fun和jac的参数，而x0是需要训练的参数

theta1,theta2=deserialize(res.x)
rand_indices = np.random.permutation(range(5000))  # 获取0-4999 5000个无序随机索引
accuracy(res.x, X, y,res)
for i in range(5000):
    example = X[rand_indices[i]]
    example = example.reshape((1, example.size)) #example:1x400
    display_data(example)
    pred = predict(theta1, theta2, example)
    print('Neural network prediction is: {}'.format(pred, np.mod(pred, 10)))

工具函数

def costFunction(theta,X,yarray):#X:5000x10  yarray:5000x10
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    m = X.shape[0]
    X = np.c_[np.ones(m), X]  # X:5000x401
    z2 = X.dot(theta1.T)  # 5000x401 * 401x25
    a2 = 1 / (1 + np.exp(-z2))#a2:5000x25
    a2 = np.c_[np.ones(m), a2]#a2:5000x26
    z3 = a2.dot(theta2.T)
    a3 = 1 / (1 + np.exp(-z3))  # a3:5000x10
    sum=0
    for i in range(m):
        '''代价函数求和'''
        first=(-yarray[i]).dot(np.log(a3[i]))
        second=(1-yarray[i]).dot(np.log(1-a3[i]))
        sum=sum+(first-second)

    return sum/m


def gradient(theta,X,yarray):
    '''梯地下降的偏导数'''
    init_theta1, init_theta2 = deserialize(theta)#获得两个随机初始化的矩阵
    a1, z2, a2, z3, h = feed_forward(theta, X)#z2:5000x25
    delta3 = h - yarray#不用算delta1,第一层输入层不用计算 误差 delta3:5000x10
    delta2 = delta3.dot(init_theta2[:,1:])*sigmod(z2)[1]  # (5000, 25)#theta2去掉第一列 5000x10 10x25 z2:5000x25,removing delta0^2
    D1 = delta2.T.dot(a1)  #25x5000  5000x401    (25, 401)
    D2 = delta3.T.dot(a2)   # (10, 26)#只用算D1，D2，没有D3，因为输出层没有偏导数
    D = (1 / len(X)) * serialize(D1, D2)  # (10285,)#展开为了好调用高级优化算法,就是theta1,theta2的展开
return D


import numpy as np
from Four_Week.CostFunction import *
from Four_Week.Gradient import gradient
from Four_Week.Tool import serialize,deserialize
def regularized_cost(theta, X, y):
    '''正则化时忽略每层的偏置项，也就是参数矩阵的第一列'''
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    reg = np.sum(theta1[:,1:] ** 2) + np.sum(theta2[:,1:] ** 2)  # or use np.power(a, 2)，不算偏置单元,theta1的第一列就是偏执单元列，theta2同理
    return 1/ (2 * len(X)) * reg + costFunction(theta, X, y)


def regularized_gradient(theta, X, y, l=1):
    """不惩罚偏置单元的参数"""
    theta1_d,theta2_d = deserialize(gradient(theta, X, y))
    theta1,theta2=deserialize(theta)#theta1:25x401,theta2:10x26
    theta1[:, 0] = 0
    theta2[:, 0] = 0
    reg_d1 = theta1_d + (l / len(X)) * theta1
    reg_d2 = theta2_d + (l / len(X)) * theta2

    return serialize(reg_d1, reg_d2)


def serialize(a, b):
    '''展开参数'''
    return np.r_[a.flatten(),b.flatten()]
def deserialize(seq):
    '''提取参数'''
    return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)

def random_init(size):#随机初始化矩阵
    '''从服从的均匀分布的范围中随机返回size大小的值'''
    return np.random.uniform(-0.12, 0.12, size)


def feed_forward(theta,X):#前向传播算法
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    m = X.shape[0]
    X = np.c_[np.ones(m), X]  # X:5000x401
    a1=X
    z2 = X.dot(theta1.T)  # 5000x401 * 401x25
    a2 = 1 / (1 + np.exp(-z2))  # a2:5000x25
    a2 = np.c_[np.ones(m), a2]  # a2:5000x26
    z3 = a2.dot(theta2.T)
    a3 = 1 / (1 + np.exp(-z3))  # a3:5000x10
    return a1,z2,a2,z3,a3;