前言
跟着大佬在学习 深度学习在图像处理中的应用(tensorflow2.4以及pytorch1.10实现)
第一部分:深度学习----分类篇
一、AlexNet
1. 学习教程
2.学习笔记
- 模型
在model.py里面加入代码并调试
#测试
import torch
input1 = torch.rand([32,3,32,32])
model = LeNet()
print(model)
output = model(input1)
训练数据集cifar10 下载数据集
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
def main():
train_set = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True)
if __name__ == '__main__':
main()
代码中可以查看torch官方的数据集
torchvision.datasets.
训练完成 测试 这里只能用jpg格式的图片,不知道为什么
二、AlexNet网络训练花分类数据集
1. 下载数据集并分好文件夹
https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
2. 网络结构在代码中的详解
2.1 各个层的参数
深度学习在图像处理中的应用(tensorflow2.4以及pytorch1.10实现)
pytorch中的卷积操作详解 经卷积后的矩阵尺寸大小计算公式为:
N = (W ? F + 2P ) / S + 1
但在实际应用中,有时会出现N为非整数的情况(例如在alexnet,googlenet网络的第一层输出),再例如输入的矩阵 H=W=5,卷积核的F=2,S=2,Padding=1。经计算我们得到的N =(5 - 2 + 21)/ 2 +1 = 3.5 此时在Pytorch中是如何处理呢,先直接告诉你结论:在卷积过程中会直接将最后一行以及最后一列给忽略掉,以保证N为整数,此时N = (5 - 2 + 21 - 1)/ 2 + 1 = 3。
model.py
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(128, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
train.py
测试
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=True,
num_workers=nw)
test_data_iter = iter(validate_loader)
test_image, test_label = test_data_iter.next()
def imshow(img):
img = img / 2 + 0.5
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
imshow(utils.make_grid(test_image))
3. 使用CPU训练
import os
import sys
import json
import time
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device( "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
image_path = os.path.join(data_root, "data_set", "flower_data")
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
net.train()
running_loss = 0.0
t1 = time.perf_counter()
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
rate = (step+1)/len(train_loader)
a = "*" * int(rate*50)
b = "." * int((1-rate)*50)
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)
print(time.perf_counter()-t1)
net.eval()
acc = 0.0
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += (predict_y == val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
平均一轮40ms,精度在68%左右 预测
4. 使用GPU训练
无法使用GPU训练,暂时不知道什么原因 解决方法 将num_workers改为1即可
5. 总结
- AlexNet 是2012年提出的可使用GPU加速的神经网络。有5个卷积层,3个池化层,3个全连接层
三、VGG训练花分类数据集
3.1 VGG网络详解
3.2 GPU训练
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
训练30轮,平均一轮34s,最终得到78的精度
3.3 总结
- VGG是2014年牛津大学一个实验团队提出来
- 大致思路是堆叠2个3x3卷积核代替5x5的卷积核,堆叠3个3x3的卷积核代替7x7的卷积核
- 提出感受野的概念
- 网络大概分为A-E模型,分别对应11-19层
- train.py文件都大部分相同
四、GoogleNet训练花分类数据集
4.1 GoogleNet网络详解
4.2 GPU训练
最好结果是:81.6
第二部分:深度学习----目标检测
一、 Faster R-CNN
1. 网络详解
2. GPU训练
2.1
- 代码提示没有安装pycocotools包,使用conda安装不了,于是用pip install pycocotools成功安装。
from pycocotools.coco import COCO
- 下载预训练模型
- 分割训练集与验证集
通过split_data.py文件,修改files_path文件路径,最后生成val.txt和train.txt文件
|