import pickle
import gzip
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
x_train.shape
(50000, 784)
784是mnist数据集每个样本的像素点个数
注意数据需转换成tensor才能参与后续建模训练
import torch
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())
torch.nn.functional 很多层和函数在这里都会见到
torch.nn.functional中有很多功能,后续会常用的。那什么时候使用nn.Module,什么时候使用nn.functional呢?一般情况下,如果模型有可学习的参数,最好用nn.Module,其他情况nn.functional相对更简单一些
import torch.nn.functional as F
loss_func = F.cross_entropy
def model(xb):
return xb.mm(weights) + bias
bs = 64
xb = x_train[0:bs]
yb = y_train[0:bs]
weights = torch.randn([784, 10], dtype = torch.float, requires_grad = True)
bs = 64
bias = torch.zeros(10, requires_grad=True)
print(loss_func(model(xb), yb))
创建一个model来更简化代码
1.必须继承nn.Module且在其构造函数中需调用nn.Module的构造函数 2.无需写反向传播函数,nn.Module能够利用autograd自动实现反向传播 3.Module中的可学习参数可以通过named_parameters()或者parameters()返回迭代器
from torch import nn
class Mnist_NN(nn.Module):
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(784, 128)
self.hidden2 = nn.Linear(128, 256)
self.out = nn.Linear(256, 10)
self.dropout=nn.Dropout(0.5)
def forward(self, x):
x = F.relu(self.hidden1(x))
x=self.dropout(x)
x = F.relu(self.hidden2(x))
x=self.dropout(x)
x = self.out(x)
return x
net = Mnist_NN()
print(net)
Mnist_NN( (hidden1): Linear(in_features=784, out_features=128, bias=True) (hidden2): Linear(in_features=128, out_features=256, bias=True) (out): Linear(in_features=256, out_features=10, bias=True) (dropout): Dropout(p=0.5, inplace=False) )
可以打印我们定义好名字里的权重和偏置项
for name, parameter in net.named_parameters():
print(name, parameter,parameter.size())
使用TensorDataset和DataLoader来简化?
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)
一般在训练模型时加上model.train(),这样会正常使用Batch Normalization和 Dropout 测试的时候一般选择model.eval(),这样就不会使用Batch Normalization和 Dropout
import numpy as np
def fit(steps, model, loss_func, opt, train_dl, valid_dl):
for step in range(steps):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print('当前step:'+str(step), '验证集损失:'+str(val_loss))
a=[1,2,3]
b=[4,5,6]
zipped=zip(a,b)
print(list(zipped))
a2,b2=zip(*zip(a,b))
print(a2)
print(b2)
from torch import optim
def get_model():
model = Mnist_NN()
return model, optim.Adam(model.parameters(), lr=0.001)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(20, model, loss_func, opt, train_dl, valid_dl)
当前step:0 验证集损失:0.18639104866981507 当前step:1 验证集损失:0.1372520131058991 当前step:2 验证集损失:0.12028736076653004 当前step:3 验证集损失:0.10732126496359706 当前step:4 验证集损失:0.10093651054650545 当前step:5 验证集损失:0.09517242526896298 当前step:6 验证集损失:0.09194612504523247 当前step:7 验证集损失:0.08943103497959673 当前step:8 验证集损失:0.0877913200291805 当前step:9 验证集损失:0.08958465236043557 当前step:10 验证集损失:0.08797709809066728 当前step:11 验证集损失:0.08352214076635428 当前step:12 验证集损失:0.0866958644344937 当前step:13 验证集损失:0.08074293819144368 当前step:14 验证集损失:0.08045687620015815 当前step:15 验证集损失:0.08040115665267222 当前step:16 验证集损失:0.07971061569196172 当前step:17 验证集损失:0.08116058965921402 当前step:18 验证集损失:0.0811522187425755 当前step:19 验证集损失:0.0807436868159566
correct=0
total=0
for xb,yb in valid_dl:
outputs=model(xb)
_, predicted=torch.max(outputs.data,1)
total+=yb.size(0)
correct+=(predicted==yb).sum().item()
print('Accuracy of the network on the 10000 test images:%d %%' %(
100 * correct / total))
Accuracy of the network on the 10000 test images:97 %
|