这篇文章主要讲述用 pytorch 完成简单 CNN 图片分类任务,如果想对 CNN 的理论知识进行了解,可以看我的这篇文章,深度学习(一)——CNN卷积神经网络。
图片分类
我们以美食图片分类为例,有testing 、training 、validation 文件夹。下载链接放下面。 点击提取, 提取码:nefu 前面的 0 表示其为 0 类,后面为其编号。
导入必要的包
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
cv2 我是通过如下命令下载
pip install opencv-python
torch 我下载的是 cuda10.2 的版本,这里就简单放一下下载 pytorch 的代码,至于如何使用 GPU 加速,可以上网查查。
pip3 install torch==1.10.0+cu102 torchvision==0.11.1+cu102 torchaudio===0.10.0+cu102 -f https://download.pytorch.org/whl/cu102/torch_stable.html
读取数据
把训练集、验证集和测试集读取进来,放入 numpy 数组。 x 为其图片的像素张量,y 为其标签。
def readfile(path, label):
image_dir = sorted(os.listdir(path))
x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img, (128, 128))
if label:
y[i] = int(file.split("_")[0])
if label:
return x, y
else:
return x
workspace_dir = './food-11'
print("Reading data")
print("...")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Reading data complicated")
数据处理
定义数据增强操作(随机翻转、随机旋转),定义 batch 的大小。
''' Dataset '''
print("Dataset")
print("...")
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
])
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
class ImgDataset(Dataset):
def __init__(self, x, y=None, transform=None):
self.x = x
self.y = y
if y is not None:
self.y = torch.LongTensor(y)
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
batch_size = 32
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
print("Dataset complicated")
模型结构
定义CNN的结构。
''' Model '''
print("Model")
print("...")
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(256, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(512, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
)
self.fc = nn.Sequential(
nn.Linear(512 * 4 * 4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
print("Model complicated")
训练模型
对模型进行训练,迭代30次,并用验证集测试,最后将训练集和验证集合并在进行训练。
''' Training '''
print("Training")
print("...")
model = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
model.train()
for i, data in enumerate(train_loader):
optimizer.zero_grad()
train_pred = model(data[0].cuda())
batch_loss = loss(train_pred, data[1].cuda())
batch_loss.backward()
optimizer.step()
train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()
model.eval()
with torch.no_grad():
for i, data in enumerate(val_loader):
val_pred = model(data[0].cuda())
batch_loss = loss(val_pred, data[1].cuda())
val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
val_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
(epoch + 1, num_epoch, time.time() - epoch_start_time, \
train_acc / train_set.__len__(), train_loss / train_set.__len__(), val_acc / val_set.__len__(),
val_loss / val_set.__len__()))
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
model_best.train()
for i, data in enumerate(train_val_loader):
optimizer.zero_grad()
train_pred = model_best(data[0].cuda())
batch_loss = loss(train_pred, data[1].cuda())
batch_loss.backward()
optimizer.step()
train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
(epoch + 1, num_epoch, time.time() - epoch_start_time, \
train_acc / train_val_set.__len__(), train_loss / train_val_set.__len__()))
print("Training complicated")
Output:
[001/030] 70.94 sec(s) Train Acc: 0.260997 Loss: 0.065946 | Val Acc: 0.303499 loss: 0.060955
[002/030] 56.79 sec(s) Train Acc: 0.362051 Loss: 0.057194 | Val Acc: 0.372595 loss: 0.057390
[003/030] 57.03 sec(s) Train Acc: 0.409588 Loss: 0.053193 | Val Acc: 0.395335 loss: 0.054268
[004/030] 57.81 sec(s) Train Acc: 0.455504 Loss: 0.049251 | Val Acc: 0.454519 loss: 0.048942
[005/030] 57.97 sec(s) Train Acc: 0.499899 Loss: 0.045356 | Val Acc: 0.455977 loss: 0.051678
[006/030] 58.28 sec(s) Train Acc: 0.535982 Loss: 0.042452 | Val Acc: 0.378717 loss: 0.074250
[007/030] 59.17 sec(s) Train Acc: 0.553720 Loss: 0.040124 | Val Acc: 0.568513 loss: 0.039936
[008/030] 59.99 sec(s) Train Acc: 0.579769 Loss: 0.038209 | Val Acc: 0.556268 loss: 0.041599
[009/030] 59.79 sec(s) Train Acc: 0.596392 Loss: 0.036224 | Val Acc: 0.502332 loss: 0.045684
[010/030] 60.00 sec(s) Train Acc: 0.618690 Loss: 0.033986 | Val Acc: 0.552770 loss: 0.043603
[011/030] 60.34 sec(s) Train Acc: 0.640482 Loss: 0.032332 | Val Acc: 0.569679 loss: 0.040512
[012/030] 60.72 sec(s) Train Acc: 0.664403 Loss: 0.030329 | Val Acc: 0.537609 loss: 0.047755
[013/030] 59.88 sec(s) Train Acc: 0.685181 Loss: 0.028571 | Val Acc: 0.534111 loss: 0.045569
[014/030] 60.37 sec(s) Train Acc: 0.690249 Loss: 0.028063 | Val Acc: 0.612828 loss: 0.037240
[015/030] 60.48 sec(s) Train Acc: 0.709811 Loss: 0.026130 | Val Acc: 0.649563 loss: 0.034160
[016/030] 60.54 sec(s) Train Acc: 0.720961 Loss: 0.025035 | Val Acc: 0.641691 loss: 0.034600
[017/030] 60.84 sec(s) Train Acc: 0.738901 Loss: 0.023416 | Val Acc: 0.635277 loss: 0.034863
[018/030] 60.48 sec(s) Train Acc: 0.757551 Loss: 0.022210 | Val Acc: 0.616035 loss: 0.039769
[019/030] 59.98 sec(s) Train Acc: 0.774073 Loss: 0.020678 | Val Acc: 0.649271 loss: 0.035323
[020/030] 60.62 sec(s) Train Acc: 0.779343 Loss: 0.019825 | Val Acc: 0.662099 loss: 0.033701
[021/030] 60.04 sec(s) Train Acc: 0.790999 Loss: 0.018790 | Val Acc: 0.682216 loss: 0.032581
[022/030] 60.72 sec(s) Train Acc: 0.800426 Loss: 0.017761 | Val Acc: 0.620408 loss: 0.041586
[023/030] 60.28 sec(s) Train Acc: 0.810156 Loss: 0.016732 | Val Acc: 0.674344 loss: 0.036074
[024/030] 60.17 sec(s) Train Acc: 0.825461 Loss: 0.015653 | Val Acc: 0.649271 loss: 0.039717
[025/030] 59.96 sec(s) Train Acc: 0.838638 Loss: 0.014406 | Val Acc: 0.639067 loss: 0.041005
[026/030] 58.78 sec(s) Train Acc: 0.842793 Loss: 0.014155 | Val Acc: 0.657434 loss: 0.040948
[027/030] 60.47 sec(s) Train Acc: 0.854247 Loss: 0.013192 | Val Acc: 0.664140 loss: 0.042358
[028/030] 59.34 sec(s) Train Acc: 0.861443 Loss: 0.012012 | Val Acc: 0.687755 loss: 0.038089
[029/030] 59.39 sec(s) Train Acc: 0.876748 Loss: 0.010853 | Val Acc: 0.676385 loss: 0.038813
[030/030] 59.35 sec(s) Train Acc: 0.882222 Loss: 0.010558 | Val Acc: 0.648105 loss: 0.043327
测试
对测试集进行预测
''' Testing '''
print("Testing")
print("...")
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model_best.eval()
prediction = []
with torch.no_grad():
for i, data in enumerate(test_loader):
test_pred = model_best(data.cuda())
test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
for y in test_label:
prediction.append(y)
with open("predict.csv", 'w') as f:
f.write('Id,Category\n')
for i, y in enumerate(prediction):
f.write('{},{}\n'.format(i, y))
print("Testing complicated")
|