datagen = ImageDataGenerator( rotation_range=20, width_shift_range=0.15, height_shift_range=0.15, zoom_range=0.15, shear_range=0.2, horizontal_flip=True, fill_mode=‘nearest’) dirs = os.listdir(“picture”) print(len(dirs)) for filename in dirs: img = load_img(“picture//{}”.format(filename)) x = img_to_array(img)
print(x.shape)
x = x.reshape((1,) + x.shape) #datagen.flow要求rank为4
print(x.shape)
datagen.fit(x) prefix = filename.split(’.’)[0] print(prefix) counter = 0 for batch in datagen.flow(x, batch_size=4 , save_to_dir=‘generater_pic’, save_prefix=prefix, save_format=‘jpg’): counter += 1 if counter > 100: break # 否则生成器会退出循环
二、数据集的处理
1.缩放图片
接下来对这21500张照片进行处理,首先要把每张照片缩放到64*64的尺寸,这么做的原因如下:
- 不同手机拍出的照片的size各不相同,要统一
- 如果手机拍出来的高分辨率图片,太大,GPU显存有限,要压缩下,减少体积。
- APP通过手机摄像头拍摄出来的照片,不同机型有差异,要统一。
对图片的缩放不能简单的直接缩小尺寸,那样的话会失真严重。所以要用到一些缩放算法,TensorFlow中已经提供了四种缩放算法,分别为: 双线性插值法(Bilinear interpolation)、最近邻居法(Nearest neighbor interpolation)、双三次插值法(Bicubic interpolation)和面积插值法(area interpolation)。 我这里使用了面积插值法(area interpolation)。代码为:
#压缩图片,把图片压缩成64*64的 def resize_img(): dirs = os.listdir(“split_pic//6”) for filename in dirs: im = tf.gfile.FastGFile(“split_pic//6//{}”.format(filename), ‘rb’).read()
print(“正在处理第%d张照片”%counter)
with tf.Session() as sess: img_data = tf.image.decode_jpeg(im) image_float = tf.image.convert_image_dtype(img_data, tf.float32) resized = tf.image.resize_images(image_float, [64, 64], method=3) resized_im = resized.eval()
new_mat = np.asarray(resized_im).reshape(1, 64, 64, 3)
scipy.misc.imsave(“resized_img6//{}”.format(filename),resized_im)
2.把图片转成 .h5文件
h5文件的种种好处,这里不再累述。我们首先把图片转成RGB矩阵,即每个图片是一个64643的矩阵(因为是彩色图片,所以通道是3)。这里不做归一化,因为我认为归一化应该在你用到的时候自己代码归一化,如果直接把数据集做成了归一化,有点死板了,不灵活。在我们把矩阵存进h5文件时,此时标签一定要对应每一张图片(矩阵),直接上代码:
#图片转h5文件 def image_to_h5(): dirs = os.listdir(“resized_img”) Y = [] #label X = [] #data print(len(dirs)) for filename in dirs: label = int(filename.split(’_’)[0]) Y.append(label) im = Image.open(“resized_img//{}”.format(filename)).convert(‘RGB’) mat = np.asarray(im) #image 转矩阵 X.append(mat)
file = h5py.File(“dataset//data.h5”,“w”) file.create_dataset(‘X’, data=np.array(X)) file.create_dataset(‘Y’, data=np.array(Y)) file.close()
#test
data = h5py.File(“dataset//data.h5”,“r”)
X_data = data[‘X’]
print(X_data.shape)
Y_data = data[‘Y’]
print(Y_data[123])
image = Image.fromarray(X_data[123]) #矩阵转图片并显示
image.show()
训练模型 接下来就是训练模型了,首先把数据集划分为训练集和测试集,然后先坐下归一化,把标签转化为one-hot向量表示,代码如下:
#load dataset def load_dataset(): #划分训练集、测试集 data = h5py.File(“dataset//data.h5”,“r”) X_data = np.array(data[‘X’]) #data[‘X’]是h5py._hl.dataset.Dataset类型,转化为array Y_data = np.array(data[‘Y’])
print(type(X_data))
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, train_size=0.9, test_size=0.1, random_state=22)
print(X_train.shape)
print(y_train[456])
image = Image.fromarray(X_train[456])
image.show()
y_train = y_train.reshape(1,y_train.shape[0])
y_test = y_test.reshape(1,y_test.shape[0])
print(X_train.shape)
print(X_train[0])
X_train = X_train / 255. # 归一化 X_test = X_test / 255.
print(X_train[0])
one-hot
y_train = np_utils.to_categorical(y_train, num_classes=11) print(y_train.shape) y_test = np_utils.to_categorical(y_test, num_classes=11) print(y_test.shape)
return X_train, X_test, y_train, y_test
构建CNN模型,这里用了最简单的类LeNet-5,具体两层卷积层、两层池化层、一层全连接层,一层softmax输出。具体的小trick有:dropout、relu、regularize、mini-batch、adam。具体看代码吧:
def weight_variable(shape): tf.set_random_seed(1) return tf.Variable(tf.truncated_normal(shape, stddev=0.1))
def bias_variable(shape): return tf.Variable(tf.constant(0.0, shape=shape))
def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding=‘SAME’)
def max_pool_2x2(z): return tf.nn.max_pool(z, ksize=[1,2,2,1], strides=[1,2,2,1], padding=‘SAME’)
def random_mini_batches(X, Y, mini_batch_size=16, seed=0): “”" Creates a list of random minibatches from (X, Y)
Arguments: X – input data, of shape (input size, number of examples) Y – true “label” vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) mini_batch_size - size of the mini-batches, integer seed – this is only for the purpose of grading, so that you’re "random minibatches are the same as ours.
Returns: mini_batches – list of synchronous (mini_batch_X, mini_batch_Y) “”"
m = X.shape[0] # number of training examples mini_batches = [] np.random.seed(seed)
Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m)) shuffled_X = X[permutation] shuffled_Y = Y[permutation,:].reshape((m, Y.shape[1]))
Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m / mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning for k in range(0, num_complete_minibatches): mini_batch_X = shuffled_X[k * mini_batch_size: k * mini_batch_size + mini_batch_size] mini_batch_Y = sh uffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size] mini_batch = (mini_batch_X, mini_batch_Y) mini_batches.append(mini_batch)
Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0: mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size: m] mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size: m] mini_batch = (mini_batch_X, mini_batch_Y) mini_batches.append(mini_batch)
return mini_batches
def cnn_model(X_train, y_train, X_test, y_test, keep_prob, lamda, num_epochs = 450, minibatch_size = 16): X = tf.placeholder(tf.float32, [None, 64, 64, 3], name=“input_x”) y = tf.placeholder(tf.float32, [None, 11], name=“input_y”) kp = tf.placeholder_with_default(1.0, shape=(), name=“keep_prob”) lam = tf.placeholder(tf.float32, name=“lamda”) #conv1 W_conv1 = weight_variable([5,5,3,32]) b_conv1 = bias_variable([32]) z1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1) maxpool1 = max_pool_2x2(z1) #max_pool1完后maxpool1维度为[?,32,32,32]
#conv2 W_conv2 = weight_variable([5,5,32,64]) b_conv2 = bias_variable([64]) z2 = tf.nn.relu(conv2d(maxpool1, W_conv2) + b_conv2) maxpool2 = max_pool_2x2(z2) #max_pool2,shape [?,16,16,64]
#conv3 效果比较好的一次模型是没有这一层,只有两次卷积层,隐藏单元100,训练20次
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
z3 = tf.nn.relu(conv2d(maxpool2, W_conv3) + b_conv3)
maxpool3 = max_pool_2x2(z3) # max_pool3,shape [?,8,8,128]
20次
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
z3 = tf.nn.relu(conv2d(maxpool2, W_conv3) + b_conv3)
maxpool3 = max_pool_2x2(z3) # max_pool3,shape [?,8,8,128]
|