加载手写数字的数据
组成训练集和测试集,这里已经下载好了,所以download为False
import torchvision
train_dataset = torchvision.datasets.MNIST(root='./data1',
train=True,
transform=torchvision.transforms.ToTensor(),
download=False
)
test_dataset = torchvision.datasets.MNIST(root='./data1',
train=False,
transform=torchvision.transforms.ToTensor(),
download=False
)
数据加载器(分批加载)
batch_size = 100
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True
)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=True
)
examples = iter(test_loader)
example_data, example_targets = examples.next()
for i in range(6):
plt.subplot(2, 3, i + 1)
plt.imshow(example_data[i][0], cmap='gray')
print(example_data[i][0].shape)
plt.show()
建立模型
- 建立模型之前定义输入大小和分类类别输出大小
通过上边查看图片的大小为28*28*1,所以输入大小为784
数字识别只有0~9所以为10个类别的多分类问题
input_size = 784
num_classes = 10
- 创建模型类
class NeuralNet(torch.nn.Module):
def __init__(self, n_input_size, hidden_size, n_num_classes):
"""
神经网络类初始化
:param n_input_size: 输入
:param hidden_size: 隐藏层
:param n_num_classes: 输出
"""
super(NeuralNet, self).__init__()
self.input_size = input_size
self.l1 = torch.nn.Linear(n_input_size, hidden_size)
self.relu = torch.nn.ReLU()
self.l2 = torch.nn.Linear(hidden_size, n_num_classes)
def forward(self, x):
"""
重写正向传播函数 获取到预测值
:param x: 数据
:return: 预测值
"""
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, 500, num_classes).to(device)
print(model)
- 可以看出模型一共三层
- 输入层(节点数量和图小大小相同)
- 隐藏层(节点数为500)
- 输出层(输出节点数量为10
0~9 )
定义损失函数和优化器
learning_rate = 0.01
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
模型训练
训练步骤:
-
通过模型类正向传播获取到预测结果 -
通过损失函数传入预测结果和真实值计算损失 -
通过反向传播获取梯度 -
通过梯度下降更新模型参数的权重 -
梯度清空,防止下次梯度累加 -
循环,降低损失为我们想要的结果(提高模型精度)
num_epochs = 10
total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.reshape(-1, 28 * 28).to(device)
labels = labels.to(device)
outputs = model(images)
loss_val = criterion(outputs, labels)
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
if i % 100 == 0:
print(f'Loss:{loss_val.item():.4f}')
print('训练完成')
torch.save(model.state_dict(), './last.pt')
- 损失值很明显的在收敛
- 生成了pt模型文件
测试集抽取数据,查看预测结果
examples = iter(test_loader)
example_data, example_targets = examples.next()
for i in range(3):
plt.subplot(1, 3, i + 1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
images = example_data
images = images.reshape(-1, 28 * 28).to(device)
outputs = model(images)
print(f'真实结果:{example_targets[0:3].detach().numpy()}')
print(f'预测结果:{np.argmax(outputs[0:3].cpu().detach().numpy(), axis=1)}')
计算模型精度
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_loader:
images = images.reshape(-1, 28 * 28).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f"准确率:{acc}%")
自己手写数字进行预测
import cv2
import numpy as np
import torch
from 手写数字神经网络结构 import NeuralNet
device = torch.device('cuda')
input_size = 784
num_classes = 10
model = NeuralNet(input_size, 500, num_classes)
model.load_state_dict(torch.load('./last.pt', map_location='cuda:0'))
model.to(device)
with torch.no_grad():
images = cv2.imread('./number_four.png', cv2.IMREAD_GRAYSCALE)
ret, thresh_img = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow('png1', thresh_img)
cv2.waitKey()
my_image = cv2.resize(thresh_img, (28, 28))
my_image = np.array(my_image, np.float32)
my_image = torch.from_numpy(my_image)
my_image = my_image.reshape(-1, 28 * 28).to(device)
outputs = model(my_image)
pred = np.argmax(outputs.cpu().detach().numpy(), axis=1)
print(f'预测结果为:{pred[0]}')
|