一、项目准备
1. 问题导入
图像分类是根据图像的语义信息将不同类别图像区分开来,是计算机视觉中重要的基本问题。本实践使用卷积神经网络VGG19模型构建深度学习模型,自动提取高质量的特征,来解决海洋鱼类识别的问题。
2. 数据集简介
本次实验使用的是台湾电力公司、台湾海洋研究所和垦丁国家公园在2010年10月1日至2013年9月30日期间,在中国台湾南湾海峡、兰屿岛和胡比湖的水下观景台收集的鱼类图像数据集。 该数据集包括23类鱼种,共27370张鱼的图像,本次实验将取其中的90%作为训练集,剩下的10%作为测试集。
这是数据集的下载链接:Fish4Knowledge 23种鱼类数据集 - AI Studio
3. VGG模型
二、实验步骤
0. 前期准备
注意:本案例仅适用于PaddlePaddle 2.0+ 版本
import os
import zipfile
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
from paddle import nn
from paddle import metric as M
from paddle.io import DataLoader, Dataset
from paddle.nn import functional as F
from paddle.optimizer import Adam
from paddle.optimizer.lr import NaturalExpDecay
BATCH_SIZE = 64
EPOCHS = 8
LOG_GAP = 150
INIT_LR = 3e-4
LR_DECAY = 0.75
SRC_PATH = "./data/data14492/fish_image23.zip"
DST_PATH = "./data"
DATA_PATH = DST_PATH + "/fish_image"
INFER_PATH = "./work/infer.jpg"
MODEL_PATH = "VGG19.pdparams"
LAB_DICT = {'fish_1': 'Dascyllus reticulatus', 'fish_2': 'Plectroglyphidodon dickii',
'fish_3': 'Chromis chrysura', 'fish_4': 'Amphiprion clarkii',
'fish_5': 'Chaetodon lunulatus', 'fish_6': 'Chaetodon trifascialis',
'fish_7': 'Myripristis kuntee', 'fish_8': 'Acanthurus nigrofuscus',
'fish_9': 'Hemigymnus fasciatus', 'fish_10': 'Neoniphon sammara',
'fish_11': 'Abudefduf vaigiensis', 'fish_12': 'Canthigaster valentini',
'fish_13': 'Pomacentrus moluccensis', 'fish_14': 'Zebrasoma scopas',
'fish_15': 'Hemigymnus melapterus', 'fish_16': 'Lutjanus fulvus',
'fish_17': 'Scolopsis bilineata', 'fish_18': 'Scaridae',
'fish_19': 'Pempheris vanicolensis', 'fish_20': 'Zanclus cornutus',
'fish_21': 'Neoglyphidodon nigroris', 'fish_22': 'Balistapus undulatus',
'fish_23': 'Siganus fuscescens'}
1. 数据准备
- 解压数据集
由于数据集中的数据是以压缩包的形式存放的,因此我们需要先解压数据压缩包。
if not os.path.isdir(DATA_PATH):
z = zipfile.ZipFile(SRC_PATH, "r")
z.extractall(path=DST_PATH)
z.close()
print("数据集解压完成!")
- 划分数据集
我们需要按1:9比例划分测试集和训练集,分别生成两个包含数据路径和标签映射关系的列表。
type_num, lab_dict = 0, {}
train_list, test_list = [], []
file_folders = os.listdir(DATA_PATH)
for folder in file_folders:
lab_dict[str(type_num)] = LAB_DICT[folder]
imgs = os.listdir(os.path.join(DATA_PATH, folder))
for idx, img in enumerate(imgs):
path = os.path.join(DATA_PATH, folder, img)
if idx % 10 == 0:
test_list.append([path, type_num])
else:
train_list.append([path, type_num])
type_num += 1
- 数据预处理
我们需要对数据集图像进行缩放和归一化处理。
class MyDataset(Dataset):
''' 自定义的数据集类 '''
def __init__(self, label_list, transform):
'''
* `label_list`: 标签与文件路径的映射列表
* `transform`:数据处理函数
'''
super(MyDataset, self).__init__()
random.shuffle(label_list)
self.label_list = label_list
self.transform = transform
def __getitem__(self, index):
''' 根据位序获取对应数据 '''
img_path, label = self.label_list[index]
img = self.transform(img_path)
return img, int(label)
def __len__(self):
''' 获取数据集样本总数 '''
return len(self.label_list)
def data_mapper(img_path, show=False):
''' 图像处理函数 '''
img = Image.open(img_path)
if show:
display(img)
img = img.resize((224, 224), Image.ANTIALIAS)
img = np.array(img).astype("float32")
img = img.transpose((2, 0, 1))
img = paddle.to_tensor(img / 255.0)
return img
train_dataset = MyDataset(train_list, data_mapper)
test_dataset = MyDataset(test_list, data_mapper)
- 定义数据提供器
我们需要分别构建用于训练和测试的数据提供器,其中训练数据提供器是乱序、按批次提供数据的。
train_loader = DataLoader(train_dataset,
batch_size=BATCH_SIZE,
num_workers=1,
shuffle=True,
drop_last=False)
test_loader = DataLoader(test_dataset,
batch_size=BATCH_SIZE,
num_workers=1,
shuffle=False,
drop_last=False)
2. 网络配置
- 实验模型
本实验采用的是VGG19模型,VGG19常常被用于分类问题。它包含16个卷积层、5个池化层、3个全连接层。其中,卷积层和全连接层具有权重系数,且它们的总数为19,故该模型被称为VGG19。
class ConvPool(nn.Layer):
''' 卷积-池化组
* `conv_args`(list): 卷积层参数
* `pool_args`(list): 池化层参数
* `conv_num`(int): 卷积层的个数
* `pool_type`(str): 池化类型(Max/Avg)
'''
def __init__(self, conv_args, pool_args, conv_num=1, pool_type="Max"):
super(ConvPool, self).__init__()
for i in range(conv_num):
conv = nn.Conv2D(in_channels=conv_args[0],
out_channels=conv_args[1],
kernel_size=conv_args[2],
stride=conv_args[3],
padding=conv_args[4])
conv_args[0] = conv_args[1]
self.add_sublayer("conv_%d" % i, conv)
self.add_sublayer("relu_%d" % i, nn.ReLU())
if pool_type == "Max":
pool = nn.MaxPool2D(kernel_size=pool_args[0],
stride=pool_args[1],
padding=pool_args[2])
else:
pool = nn.AvgPool2D(kernel_size=pool_args[0],
stride=pool_args[1],
padding=pool_args[2])
self.add_sublayer("pool", pool)
def forward(self, x):
for prefix, sub_layer in self.named_children():
x = sub_layer(x)
return x
class VGG19(nn.Layer):
def __init__(self, out_dim):
super(VGG19, self).__init__()
self.conv1 = ConvPool([ 3, 64, 3, 1, 1], [2, 2, 0], 2, "Max")
self.conv2 = ConvPool([ 64, 128, 3, 1, 1], [2, 2, 0], 2, "Max")
self.conv3 = ConvPool([128, 256, 3, 1, 1], [2, 2, 0], 4, "Max")
self.conv4 = ConvPool([256, 512, 3, 1, 1], [2, 2, 0], 4, "Max")
self.conv5 = ConvPool([512, 512, 3, 1, 1], [2, 2, 0], 4, "Max")
self.linear = nn.Sequential(nn.Linear(512*7*7, 4096),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(4096, out_dim))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = paddle.flatten(x, 1, -1)
y = self.linear(x)
return y
model = VGG19(out_dim=type_num)
3. 模型训练
model.train()
scheduler = NaturalExpDecay(
learning_rate=INIT_LR,
gamma=LR_DECAY
)
optimizer = Adam(
learning_rate=scheduler,
parameters=model.parameters()
)
loss_arr, acc_arr = [], []
for ep in range(EPOCHS):
for batch_id, data in enumerate(train_loader()):
x_data, y_data = data
y_data = y_data[:, np.newaxis]
y_pred = model(x_data)
acc = M.accuracy(y_pred, y_data)
loss = F.cross_entropy(y_pred, y_data)
if batch_id != 0 and batch_id % LOG_GAP == 0:
print("Epoch:%d,Batch:%3d,Loss:%.5f,Acc:%.5f"\
% (ep, batch_id, loss, acc))
acc_arr.append(acc.item())
loss_arr.append(loss.item())
optimizer.clear_grad()
loss.backward()
optimizer.step()
scheduler.step()
paddle.save(model.state_dict(), MODEL_PATH)
模型训练的结果如下:
Epoch:0,Batch:150,Loss:0.57758,Acc:0.82812
Epoch:0,Batch:300,Loss:0.22912,Acc:0.93750
Epoch:1,Batch:150,Loss:0.12195,Acc:0.95312
Epoch:1,Batch:300,Loss:0.19877,Acc:0.93750
Epoch:2,Batch:150,Loss:0.07112,Acc:0.98438
Epoch:2,Batch:300,Loss:0.09523,Acc:0.98438
Epoch:3,Batch:150,Loss:0.07677,Acc:0.98438
Epoch:3,Batch:300,Loss:0.01776,Acc:1.00000
Epoch:4,Batch:150,Loss:0.02427,Acc:0.98438
Epoch:4,Batch:300,Loss:0.00211,Acc:1.00000
Epoch:5,Batch:150,Loss:0.00695,Acc:1.00000
Epoch:5,Batch:300,Loss:0.02889,Acc:0.98438
Epoch:6,Batch:150,Loss:0.01456,Acc:0.98438
Epoch:6,Batch:300,Loss:0.00660,Acc:1.00000
Epoch:7,Batch:150,Loss:0.00719,Acc:1.00000
Epoch:7,Batch:300,Loss:0.00266,Acc:1.00000
fig = plt.figure(figsize=[10, 8])
ax1 = fig.add_subplot(211, facecolor="#E8E8F8")
ax1.set_ylabel("Loss", fontsize=18)
plt.tick_params(labelsize=14)
ax1.plot(range(len(loss_arr)), loss_arr, color="orangered")
ax1.grid(linewidth=1.5, color="white")
ax2 = fig.add_subplot(212, facecolor="#E8E8F8")
ax2.set_xlabel("Training Steps", fontsize=18)
ax2.set_ylabel("Accuracy", fontsize=18)
plt.tick_params(labelsize=14)
ax2.plot(range(len(acc_arr)), acc_arr, color="dodgerblue")
ax2.grid(linewidth=1.5, color="white")
fig.tight_layout()
plt.show()
plt.close()
4. 模型评估
model.eval()
test_costs, test_accs = [], []
for batch_id, data in enumerate(test_loader()):
x_data, y_data = data
y_data = y_data[:, np.newaxis]
y_pred = model(x_data)
acc = M.accuracy(y_pred, y_data)
loss = F.cross_entropy(y_pred, y_data)
test_accs.append(acc.item())
test_costs.append(loss.item())
test_loss = np.mean(test_costs)
test_acc = np.mean(test_accs)
print("Eval \t Loss:%.5f,Acc:%.5f" % (test_loss, test_acc))
模型评估的结果如下:
Eval Loss:0.11719,Acc:0.98067
5. 模型预测
truth_lab = "Dascyllus reticulatus"
infer_img = data_mapper(INFER_PATH, show=True)
infer_img = infer_img[np.newaxis, :, :, :]
model.eval()
model.set_state_dict(
paddle.load(MODEL_PATH)
)
result = model(infer_img)
infer_lab = lab_dict[ str(np.argmax(result)) ]
print("真实标签:%s,预测结果:%s" % (truth_lab, infer_lab))
模型预测的结果如下:
真实标签:Dascyllus reticulatus,预测结果:Dascyllus reticulatus
写在最后
|