如何使用PyTorch定义神经网络?
首先是模型层要用到的一些基本函数
#模型层
input_image = torch.rand(3,28,28) #随机生成三个28*28的为输入数据
print(input_image.size())
#nn.Flatten层
#将28*28的数据转化为784的一维数据
flatten = nn.Flatten()#要先初始化flatten层
flat_image = flatten(input_image)#调用nn.Flatten层
print(flat_image.size())
#nn.Linear层
layer1 = nn.Linear(in_features=28*28, out_features=20)# 要先初始化Linear层
hidden1 = layer1(flat_image)#调用nn.linear层 给了输入,输出默认为上面设置的20
print(hidden1.size())
#nn.relu层
print(f"before relu:{hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"after relu :{hidden1}")
#nn.Sequential 以定义的顺序通过所有的模块 可以使用序列容器快速的创建一个网络
seq_modules = nn.Sequential(
flatten,
layer1,
nn.ReLU(),
nn.Linear(20,10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
#nn.Softmax 神经网络的最后一个线性层返回logits 传给softmax后 数据被缩放到0-1之间代表每个类别的预测概率,dim参数表示数值必须和为1的维度
softmax = nn.Softmax(dim=1)
pred_probab=softmax(logits)
输出结果:
torch.Size([3, 28, 28])
torch.Size([3, 784])
torch.Size([3, 20])
before relu:tensor([[-0.1643, -0.6735, -0.1328, -0.4160, 0.0519, 0.0962, 0.5110, -0.0060,
0.4754, -0.1763, -0.1053, -0.1048, 0.3832, 0.4390, 0.2825, 0.1255,
0.0655, -0.0166, 0.2684, 0.2844],
[-0.0868, -0.3997, 0.0816, -0.1037, 0.2405, -0.4162, 0.2308, -0.1436,
0.8886, -0.2782, -0.0973, -0.1159, 0.1795, -0.0238, 0.2312, 0.2655,
0.8309, -0.2080, 0.1703, 0.6288],
[-0.3683, -0.4862, -0.1711, -0.1568, 0.2857, 0.3082, 0.1831, 0.2739,
0.6198, -0.1501, 0.2008, -0.5275, 0.4669, 0.1893, 0.1369, 0.3056,
0.7309, 0.1656, 0.0215, 0.2588]], grad_fn=<AddmmBackward>)
after relu :tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0519, 0.0962, 0.5110, 0.0000, 0.4754,
0.0000, 0.0000, 0.0000, 0.3832, 0.4390, 0.2825, 0.1255, 0.0655, 0.0000,
0.2684, 0.2844],
[0.0000, 0.0000, 0.0816, 0.0000, 0.2405, 0.0000, 0.2308, 0.0000, 0.8886,
0.0000, 0.0000, 0.0000, 0.1795, 0.0000, 0.2312, 0.2655, 0.8309, 0.0000,
0.1703, 0.6288],
[0.0000, 0.0000, 0.0000, 0.0000, 0.2857, 0.3082, 0.1831, 0.2739, 0.6198,
0.0000, 0.2008, 0.0000, 0.4669, 0.1893, 0.1369, 0.3056, 0.7309, 0.1656,
0.0215, 0.2588]], grad_fn=<ReluBackward0>)
使用torch.autograd自动计算梯度,举例:
#torch.autograd自动计算梯度
x = torch.ones(5)#输入tensor
y = torch.zeros(3)#期望的输出
#requires_grad表示是否需要求导,默认为不求导
w = torch.randn(5,3,requires_grad = True)
b = torch.randn(3,requires_grad = True)#对于两个参数w,b需要求导
z = torch.matmul(x,w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)#定义损失函数
#grad_fn用来记录变量是怎么来的,方便计算梯度,如y = x*3,grad_fn记录了y由x计算的过程。
print('Gradient function for z= ',z.grad_fn)
print('Gradient funciton for loss = ',loss.grad_fn)
#grad记录的是backward()后的梯度值
loss.backward()
print(w.grad)
print(b.grad)
输出结果:
Gradient function for z= <AddBackward0 object at 0x000002ABB2BB4040>
Gradient funciton for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x000002ABB2BB49D0>
tensor([[0.2510, 0.2536, 0.3221],
[0.2510, 0.2536, 0.3221],
[0.2510, 0.2536, 0.3221],
[0.2510, 0.2536, 0.3221],
[0.2510, 0.2536, 0.3221]])
tensor([0.2510, 0.2536, 0.3221])
一个神经网络的完整实例 (使用FashionMNIST数据集)
# 一个神经网络完整实例
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# 设置超参数
learing_rate = 1e-3
batch_size = 64
epoches = 5
# 训练数据
training_data = datasets.FashionMNIST(
root='data',
train=True,
download=True,
transform=ToTensor()
)
# 测试数据
test_data = datasets.FashionMNIST(
root='data',
train=False,
download=True,
transform=ToTensor()
)
# 定义数据加载器
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
#for X, Y in test_dataloader:
# print("shape of X [N,C,H,W]:", X.shape) # N为数量 C为通道数 ,H,W为长和宽
# print("Shape of Y: ", Y.shape, Y.type) # y为标签
# break
# 创建模型
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU()
)
def forward(self, x): # 定义前向传播函数
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device) # 调用刚才创建的模型,如果cude可用 转到GPU上
# 选择损失函数
loss_fn = nn.CrossEntropyLoss()
# 选择优化器
optimizer = torch.optim.SGD(model.parameters(), lr=learing_rate)
# 每个epoch由两部分组成
# 训练loop:在训练集上进行迭代 试图收敛到最佳参数
# 测试loop :在测试集上进行迭代,检查模型的性能是否提高
# 训练loop三步:
# 1.调用optimizer.zero_grad()重置模型参数的梯度。因为梯度模型累加,为了防止重复计算,每次迭代都要归零
# 2.调用loss.backward()对预测值进行反向传播
# 3.调用optimizer.step()根据向后传递中收集的梯度调整参数(更新优化的参数)
# 定义训练loop:负责循环优化代码
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset) # 查看dataloader有多少张图片
for batch, (X, y) in enumerate(dataloader): # 从数据加载器中读取batch(一次读取多少张,即批次数),X(图片数据),y(标签)
X, y = X.to(device), y.to(device)
pred = model(X) # 得到的预测结果
loss = loss_fn(pred, y) # 计算预测和真实的误差
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每训练100次 输出一次当前的信息
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")
# 定义测试loop:负责根据测试数据评估模型的性能
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval() # 将模型转换为验证模式
test_loss, corrent = 0, 0
with torch.no_grad(): # 测试时参数不用更新,只需要正向传播预测结果,这个函数范围内的数停止计算梯度
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
corrent += (pred.argmax(1) == y).type(torch.float).sum().item() # 统计正确的个数
test_loss /= num_batches
corrent /= size
print(f"Test ERROR :\n Accuracy:{(100 * corrent):>0.1f}%,Avg loss:{test_loss:>8f}\n")
#开始训练
epoch = 10
for t in range(epoch):
print(f"Epoch {t + 1}\n--------------------")
train_loop(train_dataloader,model,loss_fn,optimizer)
test_loop(test_dataloader,model,loss_fn)
print("Done")
#保存模型
#Pytorch模型将学到的参数全都保存在state_dice内部状态字典中
torch.save(model.state_dict(),"model.pth")
print("Save Pytorch Model state to model.pth")
#加载模型
model = NeuralNetwork() #创建一个相同框架的模型
model.load_state_dict(torch.load("model.pth"))#torch.load读取模型pth load_state_dict读取参数
#获取预测结果
classes = [
"T-shirt/top",
"Trouser",
"Pollover",
"Dress",
"coat",
"Sandal",
"Shirt",
"Sneaker",
"Bag",
"Ankle boot",
]
model.eval() #进入验证阶段,不会梯度更新,只会正向传播
x, y = test_data[0][0], test_data[0][1]#拿出来测试集中的一对数据测试
with torch.no_grad():
pred = model(x)
print(pred.size())
predicted, actual = classes[pred[0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
|