一、代码来源
????????本人初学,仿照别的博主的代码,在tensorflow2.2.0版本的基础上实现手写数字的识别,tensorflow1.0版本的代码可参见mnist/mnist_cnn.py at master · zonghua94/mnist · GitHub
或者(1条消息) 基于MNIST数据集实现手写数字识别_小崔的博客-CSDN博客_mnist手写数字识别
二、基础知识
????????对于卷积神经网络的基础知识部分,可以以参见如下博客(1条消息) CNN卷积神经网络原理讲解+图片识别应用(附源码)_学海无涯-CSDN博客_卷积神经网络图像识别
????????在阅读时可能会对卷积核(或者叫滤波器。或者叫权重矩阵)的输入通道,输出通道的概念比较陌生,与输入图片的通道数和卷积核的数目产生相关疑问,可参见博客(1条消息) 卷积神经网络卷积核的输入通道数与输出通道数_8月5日星期4的博客-CSDN博客
三、代码及注释部分
????????下面是代码及注释部分。注释部分很多,大多是本人在实际实验时对各环节输出的检验,经历了很长一段时间,一直没有得到理想的输出结果。
'''
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
mnist=tf.keras.datasets.mnist
(x_train,label_train),(x_test,label_test)=mnist.load_data()
plt.imshow(x_train[0],'binary')
plt.imshow(x_train[5])
#权重函数
def weight(shape):
return np.random.random(shape)
#偏置函数
def bias(shape):
return np.random.random(shape)
'''
'''
import tensorflow as tf
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
mnist=tf.keras.datasets.mnist
(x_train,label_train),(x_test,label_test)=mnist.load_data()
tf.compat.v1.disable_eager_execution()
x=tf.compat.v1.placeholder(tf.float32,[None,784])
y_=tf.compat.v1.placeholder(tf.float32,[None,10])
#权重生成函数,生成的是元素在(-1,1)范围内的随机数
def weight(shape):
initial=tf.compat.v1.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
#偏置生成函数,生成初始元素为1e-2的初始偏置矩阵
def bias(shape):
initial=tf.constant(1e-2,shape=shape)
return tf.Variable(initial)
#卷积函数
def conv(x,W):
result=tf.nn.conv2d(x,W,strides=[1,1,1,1], padding='SAME')
return result
def pooling(x):
result=tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')#在池化中,padding=‘same’和padding=‘valid’输出结果一样
return result
#第一个卷积曾
W1_conv=weight([5,5,1,32])#第一卷积层卷积核数目为32
b1_conv=bias([32])
x_image=tf.reshape(x,[-1,28,28,1])
result1_conv=conv(x_image,W1_conv)
h1_conv=tf.nn.relu(result1_conv+b1_conv)
#第一个池化层
result1_pooling=pooling(h1_conv)
#第二个卷积层
W2_conv=weight([5,5,32,64])
b2_conv=bias([64])
result2_conv=conv(result1_pooling,W2_conv)
h2_conv=tf.nn.relu(result2_conv+b2_conv)
#第二个池化层
result2_pooling=pooling(h2_conv)
W3_full_connect=weight([7*7*64,1024])
b3_full_connect=bias([1024])
result2_pooling_flat=tf.reshape(result2_pooling,[-1,7*7*64])
result3_full_connect=tf.matmul(result2_pooling_flat,W3_full_connect)+b3_full_connect
h3_full_connect=tf.nn.relu(result3_full_connect)
keep_prob=tf.compat.v1.placeholder("float")
h_fc1_drop=tf.nn.dropout(h3_full_connect, keep_prob)
W4_full_connect=weight([1024,10])
b4_full_connect=bias([10])
result4_fc=tf.matmul(h_fc1_drop,W4_full_connect)
y_fc=tf.nn.softmax(result4_fc+b4_full_connect)
cost=-tf.reduce_sum(y_*tf.compat.v1.log(y_fc))
train_step=tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cost)
correct_prediction=tf.equal(tf.argmax(y_fc,1), tf.argmax(y_,1))
correct_prediction=tf.cast(correct_prediction,"float")
accuracy=tf.reduce_mean(correct_prediction)
saver=tf.compat.v1.train.Saver()
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for i in range(20000):
batch=mnist.train.next_batch(50)
if i%100==0:
train_accuracy=accuracy.eval(feed_dict={x:batch[0],y_:batch[1],keep_prob:1})
print('step %d,training accuracy:%g'%(i,train_accuracy))
train_step.run(feed_dict={x:batch[0],y_:batch[1],keep_prob:0.5})
saver.save(sess,'WModel/model.ckpt')
print('test accuracy %g '%accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels,keep_prob:1}))
'''
'''测试tf.Variable()的代码
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
sess = tf.compat.v1.Session()
v1=tf.Variable(tf.random.normal(shape=[4,3],mean=0,stddev=1),name='v1')
v2=tf.Variable(tf.constant(2),name='v2')
v3=tf.Variable(tf.ones([4,3]),name='v3')
alpha=tf.Variable(10)
sess.run(tf.compat.v1.global_variables_initializer())
print(sess.run(v1))
print(sess.run(v2))
print(sess.run(v3))
print(sess.run(alpha))
'''
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from random import sample
from tensorflow.python.keras.utils.np_utils import to_categorical
import random
global b_szie
b_size=100
tf.compat.v1.disable_eager_execution()
mnist=tf.keras.datasets.mnist
(x_train,y_train),(x_test,y_test)=mnist.load_data()
'''
x_train=np.reshape(x_train,[-1,28*28])
x_test=np.reshape(x_test,[-1,28*28])
'''
x_train=x_train/255
x_test=x_test/255
y_train=to_categorical(y_train, num_classes=None)
y_test=to_categorical(y_test, num_classes=None)
'''
def batch_read(data1,data2,batch_size):
#x_result=np.zeros([None,28,28])
#y_result=np.zeros(10)
x_result=np.zeros([batch_size,28,28])
y_result=np.zeros([batch_size,10])
for i in range(batch_size):
index=random.randint(0,60000-1)
x_result[i]=data1[index]
y_result[i]=data2[index]
return x_result,y_result
#return x_result,y_result
'''
'''测试代码
def batch_read(data1,data2,batch_size):
x_result=np.zeros(784)
y_result=np.zeros(784)
for i in range(50):
index=random.randint(0,60000)
if i==0:
x_result=x_train[index]
y_result=y_train[index]
else:
x_result=np.stack((x_result,x_train[index]))
y_result=np.stack((y_result,y_train[index]))
return x_result,y_result
'''
'''测试代码
def batch_read(data1,data2,batch_size):
x_result=np.zeros(784)
y_result=np.zeros(784)
for i in range(batch_size):
index=random.randint(0,60000)
if i==0:
x_result=data1[index]
y_result=data2[index]
else:
x_result=np.vstack((x_result,data1[index]))
y_result=np.vstack((y_result,data2[index]))
return x_result,y_result
c=batch_read(x_train,y_train,20)
'''
'''测试代码
def check(data):
for i in range(len(data)):
temp=data[i]
temp1=np.zeros(10)
for j in range(10):
if j==temp:
temp1[j]=1
data[i]=temp1
return data
c1=check(c[1])
'''
def batch_read(data1,data2):
global b_size
x=np.zeros([b_size,28,28])
y=np.zeros([b_size,10])
for i in range(b_size):
index=random.randint(0,60000-1)
x[i]=data1[index]
y[i]=data2[index]
return x,y
'''
b=batch_read(x_train,y_train)
print(b[0].shape)
'''
def acc_compute(x_test,y_test):
global prediction
y_pre=sess.run(prediction,feed_dict={x:x_test,keep_prob:0.8})
correct_pred=sess.run(tf.equal(tf.argmax(y_pre,1),tf.argmax(y_test,1)))
#此处要注意,暂时不知道y_pre的输出shape是多少,暂时axis=1
accuracy=sess.run(tf.reduce_mean(tf.cast(correct_pred,tf.float32)))
#result=sess.run(accuracy,feed_dict={x:x_test,y:y_test})
return accuracy
def weight(shape):
initial=tf.compat.v1.truncated_normal(shape,mean=0,stddev=0.1)
return tf.Variable(initial)
def bias(shape):
initial=tf.constant(0.1,dtype=tf.float32,shape=shape)
return tf.Variable(initial)
def conv(x,W):
#strides=[1,x_movement,y_movement,1]
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
x=tf.compat.v1.placeholder(tf.float32,[None,28,28])
y=tf.compat.v1.placeholder(tf.float32,[None,10])
keep_prob=tf.compat.v1.placeholder(tf.float32)
x_image=tf.reshape(x,[-1,28,28,1])
# [-1, reshape_height, reshape_weight, imagine layers]
#***********************conv1***************************
shape1=[5,5,1,32]
w1=weight(shape1)
b1=bias([32])
f1=conv(x_image,w1)
h1=tf.nn.relu(f1+b1)
#*********************max_pooling1**********************
p1=max_pool_2x2(h1)
#********************conv2******************************
shape2=[5,5,32,64]
w2=weight(shape2)
b2=bias([64])
f2=conv(p1,w2)
h2=tf.nn.relu(f2+b2)
#********************max_pooling2***********************
p2=max_pool_2x2(h2)
#**********************full_connect1********************
shape3=[7*7*64,1024]
w3=weight(shape3)
b3=bias([1024])
## reshape the image from 7,7,64 into a flat (7*7*64)
p2_flat=tf.reshape(p2,[-1,7*7*64])
f3=tf.nn.relu(tf.matmul(p2_flat,w3)+b3)
h3=tf.nn.dropout(f3,keep_prob)
#*********************full_connect2*********************
shape4=[1024,10]
w4=weight(shape4)
b4=bias([10])
prediction=tf.nn.softmax(tf.matmul(h3,w4)+b4)
#h4=tf.nn.dropout(f4,keep_prob)
#********************网络训练部分************************
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
#cost=-tf.reduce_sum(y*tf.compat.v1.log(tf.clip_by_value(prediction,1e-3,1)),1)
#cost=tf.nn.softmax_cross_entropy_with_logits(y,prediction,axis=-1)
#cost=(-tf.reduce_sum(y*tf.compat.v1.log(prediction),1))
#cost=(-tf.reduce_sum(y*tf.compat.v1.log(prediction)+(1-y)*tf.compat.v1.log(1-prediction),1))
train_step=tf.compat.v1.train.AdamOptimizer(1e-3).minimize(cost)
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for i in range(1000):
b=batch_read(x_train,y_train)
sess.run(train_step,feed_dict={x:b[0],y:b[1],keep_prob:0.5})
#测试集正确率
print(acc_compute(x_test[:100],y_test[:100]))
#训练集正确率
# print(acc_compute(x_train[:10],y_train[:10]))
#y_pre=sess.run(prediction,feed_dict={x:x_test,keep_prob:0.8})
#correct_pred=sess.run(tf.equal(tf.argmax(y_pre,1),tf.argmax(y_test,1)))
#accuracy=sess.run(tf.reduce_mean(tf.cast(correct_pred,tf.float32)))
#print(accuracy)
'''测试代码
def ad(a,b):
return a,b
a=np.ones([2,2])
b=np.random.random([3,3])
c,d=ad(a,b)
'''
'''测试代码
def f(x_train):
b=100
x=np.zeros([b,28,28])
for i in range(b):
index=random.randint(0,60000-1)
x[i]=x_train[index]
#print(x.shape)
return x
b=f(x_train)
print(b.shape)
'''
'''
batch_x=np.zeros([b_size,28,28])
batch_y=np.zeros([b_size,10])
for i in range(b_size):
index=random.randint(0,60000-1)
batch_x[i]=x_train[index]
batch_y[i]=y_train[index]
'''
'''测试代码
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for i in range(1000):
b=batch_read(x_train,y_train)
sess.run(train_step,feed_dict={x:b[0],y:b[1],keep_prob:0.5})
y_pre=sess.run(tf.nn.relu(tf.matmul(p2_flat,w3)+b3),feed_dict={x:x_test[:100],keep_prob:0.6})
#correct_pred=sess.run(tf.equal(tf.argmax(y_pre,1),tf.argmax(y_test[:100],1)))
print(y_pre[0:3])
'''
'''测试代码
sess.close()
sess=tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())
for i in range(1000):
b=batch_read(x_train,y_train)
sess.run(train_step,feed_dict={x:b[0],y:b[1],keep_prob:0.5})
y_pre=sess.run(tf.nn.softmax(tf.matmul(h3,w4)+b4),feed_dict={x:x_test[:10000],keep_prob:0.8})
correct_pred=sess.run(tf.equal(tf.argmax(y_pre,1),tf.argmax(y_test[:10000],1)))
print(y_pre[0:3])
print('***********************************************************')
print(y_test[:3])
print('***********************************************************')
print(correct_pred)
print('***********************************************************')
accuracy=sess.run(tf.reduce_mean(tf.cast(correct_pred,tf.float32)))
print(accuracy)
sess.close()
'''
'''测试代码
sess.close()
sess=tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())
for i in range(1000):
b=batch_read(x_train,y_train)
sess.run(train_step,feed_dict={x:b[0],y:b[1],keep_prob:0.5})
y_pre=sess.run(prediction,feed_dict={x:x_test,keep_prob:0.8})
correct_pred=sess.run(tf.equal(tf.argmax(y_pre,1),tf.argmax(y_test,1)))
print(y_pre[0:3])
print('***********************************************************')
print(y_test[:3])
print('***********************************************************')
print(correct_pred)
print('***********************************************************')
accuracy=sess.run(tf.reduce_mean(tf.cast(correct_pred,tf.float32)))
print(accuracy)
sess.close()
'''
????????本人对python语言并不了解,只有简单的matlab语言基础,故在数据类型,函数的输出上走的弯路较多 ,用到哪个方法查哪个,逐步输出各环节得到的数据类型。得到的经验是有时同样的程序段放在函数内和放在函数外得到的输出数据类或者维度并不一样,大概率是本人的理论知识不充分,只能根据实际情况做经验总结
? ? ? ? 本文与其他文章的主要不同点在于,在检测测试集的正确率时,用的keep_prob并不等于1,这是因为在实际实验时发现如果令keep_prob=1,那么测试集(x_test)数据在经过dropout环节后输出结果为[nan,nan,nan,nan,nan,nan,nan,nan,nan,nan],这种情况在test_label=[1,0,0,0,0,0,0,0,0,0],即标签数字为“0”时会判断为预测正确,其余标签数字下全部判断为预测错误,这样整体的正确率会在0.1~0.08之间,尝试过其他博客几个比较常用的方法,如
(1条消息) 【tensorflow】记一次cnn训练MNIST数据集时遇到的权值为nan的问题_wunaidev的博客-CSDN博客
(1条消息) Mnist模型识别自己手写数字正确率低的原因_xiaqi4145-CSDN博客
????????并没有解决掉这个问题,数据在经过dropout之前是正常的,经过dropout之后就变为非数字nan了,只有将keep_prob 选为(0~1),才能解决这个问题,但这应该不是最好的方法
? ? ? ? 别的问题,可在评论区交流
|