项目一 手写数字识别
做个小项目的目的是熟悉paddle基本框架以及如何导入数据 所涉及到的paddle api
1、模型架构
Linear层的导入
from paddle.nn import Linear
class Regressor(paddle.nn.Layer):
def __init__(self):
super(Regressor,self).__init__()
self.fc=Linear(in_features=13,out_features=1)
def forward(self,inputs):
x=self.fc(inputs)
return x
Linear层的说明: in_features (int): The number of input units. out_features (int): The number of output units. # Define the linear layer. weight_attr = paddle.ParamAttr( name=“weight”, initializer=paddle.nn.initializer.Constant(value=0.5)) bias_attr = paddle.ParamAttr( name=“bias”, initializer=paddle.nn.initializer.Constant(value=1.0)) linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr) # linear.weight: [[0.5 0.5 0.5 0.5] # [0.5 0.5 0.5 0.5]] # linear.bias: [1. 1. 1. 1.]
2、数据导入
dataloader
paddle.vision.set_image_backend('cv2')
train_loader=paddle.io.DataLoader(paddle.vision.datasets.MNIST(mode='train'),
batch_size=16,shuffle=True)
其中,该loader接收PIL等数据类型的
3、训练策略
优化器
opt=paddle.optimizer.SGD(learning_rate=1e-3,parameters=model.parameters())
4、损失函数
import paddle.nn.functional as F
loss=F.square_error_cost(predict,label)
5、模型权重保存与加载
paddle.save(model.state_dict(),'xxx.pdparams')
params_file_path='xxx.pdparams'
param_dict=paddle.load(params_file_path)
model=MODEL()
model.load_dict(params_dict)
6、其余边边角角
(1)to tensor
tensor_result=paddle.to_tensor(tensor_img)
(2)reshape
img=paddle.reshape(img,[batch_size,img_h*img_W])
(3) 求平均
avg_loss=paddle.mean(loss)
最后,综合上述所有的API,我们搭建一套简易版的项目代码
import paddle
import paddle.nn.functional as F
import os
import numpy as np
def norm_img(img):
assert len(img.shape)==3
batch_size,img_h,img_w=img.shape[0],img.shape[1],img.shape[2]
img=img/255
img=paddle.reshape(img,[batch_size,img_h*img_w])
return img
class MNIST_module(paddle.nn.Layer):
def __init__(self):
super(MNIST_module,self).__init__()
self.fc=paddle.nn.Linear(in_features=784,out_features=1)
def forward(self,inputs):
outptus=self.fc(inputs)
return outputs
def train(model):
model.train()
paddle.vision.set_image_backend('cv2')
train_loader=paddle.io.DataLoader(paddle.vision.datasets.MNIST(mode='train'),batch_size=16,shuffle=True)
opt=paddle.optimizer.SGD(learning_rate=1e-3,parameters=model.parameters())
EPOCH_NUM=10
for epoch in range(EPOCH_NUM):
for batch_id,data in enumerate(train_loader):
images=norm_img(data)
labels=data[1].astype('float32')
predicts=model(images)
loss=F.square_error_cost(predicts,labels)
avg_loss=paddle.mean(loss)
if batch_id%1000==0:
print("epoch id:{},batch_id:{} loss is {}".format(epoch,batch_id,avg_loss.numpy()))
avg_loss.backward()
opt.step()
opt.clear_grad()
model=MNIST_module()
train(model)
paddle.save(model.state_dict(),'mnist.pdparams')
测试
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import paddle
def load_image(img_path):
im=Image.open(img_path).convert('L')
im=im.resize((28,28),Image.ANTIALIAS)
im=np.array(im).reshape(1,-1).astype(np.float32)
im=1-im/256
return im
img_path='./1.png'
tensor_img=load_image(img_path)
params_file_path='mnist.pdparams'
params_dict=paddle.load(params_file_path)
model.load_dict(params_dict)
model.eval()
result=model(paddle.to_tensor(tensor_img))
print("本次预测的数字是",result.numpy().astype('int32'))
当然上述的模型的智力水平和出生婴儿差不多,后续将通过不断学习,提高模型智商
|