一.项目说明
①数据集
????ModelNet总共有662中目标分类,127915个CAD,以及十类标记过方向朝向的数据。其中包含了三个子集:
????1、ModelNet10:十个标记朝向的子集数据;
????2、ModelNet40:40个类别的三维模型;
????3、Aligned40:40类标记的三维模型。
????这里使用了ModelNet40,并且归一化了,文件中的数据的意义:
????1、横轴有六个数字,分别代表:x, y, z, r, g, b;
????2、纵轴为点,每份数据一共有10000个点,项目中每份数据抽取其中1024个点进行训练。
!unzip data/data50045/modelnet40_normal_resampled.zip
!mv modelnet40_normal_resampled dataset //解压压缩包并重命名为dataset
②PointNet简介
????一、PointNet是斯坦福大学研究人员提出的一个点云处理网络,与先前工作的不同在于这一网络可以直接输入无序点云进行处理,而无序将数据处理成规则的3Dvoxel形式进行处理。输入点云顺序对于网络的输出结果没有影响,同时也可以处理旋转平移后的点云数据。
????二、几个重要的知识点:
????1、将点云体素化会改变点云数据的原始特征,造成不必要的数据损失,并且额外增加了工作量,而 PointNet 采用了原始点云的输入方式,最大限度地保留了点云的空间特征,并在最终的测试中取得了很好的效果。
????2、基本思想:对输入点云中的每一个点学习其对应的空间编码,之后再利用所有点的特征,得到一个全局的点云特征。
????3、第一次input transform(T-Net)是对空间中点云进行调整,直观上理解是旋转出一个有利于分类或分割的角度(点云的刚性变化),第二次feature transform(T-Net)是对提取出的特征进行变换,类似点的刚性变化,想利用这个得到一个有利于分类的特征角度。
????4、分类任务的完成:点云生成的1024维特征通过最后一个MLP来进行学习,其中k是最后一层的输出数量,代表分类的类别,以此计算与labels的交叉熵损失,从而完成分类任务。
? ? ? ? ? ? ? 三、网络结构:
?
二.项目主体
①导入需要的库
import os
import numpy as np
import random #生成随机数
import paddle
import paddle.nn as nn #paddle.nn 目录下包含飞桨框架支持的神经网络层和相关函数的相关API
import paddle.nn.functional as F #函数相关包
②数据处理
1、类别
category = {
'bathtub': 0,
'bed': 1,
'chair': 2,
'desk': 3,
'dresser': 4,
'monitor': 5,
'night_stand': 6,
'sofa': 7,
'table': 8,
'toilet': 9
}
2、生成训练和测试样本的list
def getDatalist(file_path='./dataset/modelnet10_shape_names.txt'):
f = open(file_path, 'r')
f_train = open('./dataset/train.txt', 'w') #使用'W',文件若存在,首先要清空,然后(重新)创建
f_test = open('./dataset/test.txt', 'w')
for category in f: #字符串对象
dict_path = os.path.join('./dataset/', category.split('\n')[0]) #换行符前面,合并路径
data_dict = os.listdir(dict_path) #os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。
count = 0
for data_path in data_dict:
if count % 60 != 0:
f_train.write(os.path.join(dict_path, data_path) + ' ' + category)
else:
f_test.write(os.path.join(dict_path, data_path) + ' ' + category)
count += 1
f_train.close()
f_test.close()
f.close()
if __name__ == '__main__': #下面代码只有在文件作为脚本直接执行时才会被执行
getDatalist()
3、数据读取?
def pointDataLoader(file_path='./dataset/train.txt', mode='train'):
BATCHSIZE = 256
MAX_POINT = 1024
datas = []
labels = []
f = open(file_path)
for data_list in f:
point_data = []
data_path = data_list.split(' ')[0]
data_file = open(data_path)
point_num = 0
for points in data_file:
if point_num == MAX_POINT:
break
point_data.append([
float(points.split(',')[0]),
float(points.split(',')[1]),
float(points.split(',')[2])
])
point_num += 1
datas.append(point_data)
labels.append(category[data_list.split(' ')[1].split('\n')[0]])
f.close()
datas = np.array(datas) #(4813,1024,3)
labels = np.array(labels) #(4813)
index_list = list(range(len(datas)))
def pointDataGenerator():
if mode == 'train':
random.shuffle(index_list)
datas_list = []
labels_list = []
print(datas.shape) #(4813,1024,3)
for i in index_list:
data = np.expand_dims(datas[i].T, axis=-1).astype('float32')
print(data.shape) #(3,1024,1)
label = np.reshape(labels[i], [1]).astype('int64')
datas_list.append(data)
labels_list.append(label)
if len(datas_list) == BATCHSIZE:
#print(np.array(datas_list).shape) (256,3,1024,1)
yield np.array(datas_list), np.array(labels_list)
datas_list = []
labels_list = []
if len(datas_list) > 0:
yield np.array(datas_list), np.array(labels_list)
return pointDataGenerator
补充注释:
1.np.expand_dims()的作用是通过在指定位置插入新的轴来扩展数组形状。?T是Numpy当中实现的矩阵转置的方法
③定义网络
1、定义网络
class PointNet(paddle.nn.Layer):
def __init__(self, name_scope='PointNet_', num_classes=10, num_point=1024):
super(PointNet, self).__init__()
self.input_transform_net = nn.Sequential(
nn.Conv2D(3, 64, (1, 1)),
nn.BatchNorm(64),
nn.ReLU(),
nn.Conv2D(64, 128, (1, 1)),
nn.BatchNorm(128),
nn.ReLU(),
nn.Conv2D(128, 1024, (1, 1)),
nn.BatchNorm(1024),
nn.ReLU(),
nn.MaxPool2D((num_point, 1))
)
self.input_fc = nn.Sequential(
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 9,
weight_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.zeros((256, 9)))),
bias_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.reshape(paddle.eye(3), [-1])))
)
)
self.mlp_1 = nn.Sequential(
nn.Conv2D(3, 64, (1, 1)),
nn.BatchNorm(64),
nn.ReLU(),
nn.Conv2D(64, 64,(1, 1)),
nn.BatchNorm(64),
nn.ReLU(),
)
self.feature_transform_net = nn.Sequential(
nn.Conv2D(64, 64, (1, 1)),
nn.BatchNorm(64),
nn.ReLU(),
nn.Conv2D(64, 128, (1, 1)),
nn.BatchNorm(128),
nn.ReLU(),
nn.Conv2D(128, 1024, (1, 1)),
nn.BatchNorm(1024),
nn.ReLU(),
nn.MaxPool2D((num_point, 1))
)
self.feature_fc = nn.Sequential(
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 64*64)
)
self.mlp_2 = nn.Sequential(
nn.Conv2D(64, 64, (1, 1)),
nn.BatchNorm(64),
nn.ReLU(),
nn.Conv2D(64, 128,(1, 1)),
nn.BatchNorm(128),
nn.ReLU(),
nn.Conv2D(128, 1024,(1, 1)),
nn.BatchNorm(1024),
nn.ReLU(),
)
self.fc = nn.Sequential(
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(p=0.7),
nn.Linear(256, num_classes),
nn.LogSoftmax(axis=-1)
)
def forward(self, inputs):#(64,3,1024,1)
batchsize = inputs.shape[0]
t_net = self.input_transform_net(inputs)#(64,1024,1,1)
t_net = paddle.squeeze(t_net, axis=[-2, -1])#(64,1024)
t_net = self.input_fc(t_net) #(64,9)
t_net = paddle.reshape(t_net, [batchsize, 3, 3])#(64,3,3)
x = paddle.squeeze(inputs, axis=-1)#(64,3,1024)
x = paddle.transpose(x, (0, 2, 1))#(64,1024,3)
x = paddle.matmul(x, t_net)#(64,1024,3)
x = paddle.transpose(x, (0, 2, 1))#(64,3,1024)
x = paddle.unsqueeze(x, axis=-1)#(64,3,1024,1)
x = self.mlp_1(x)#(64,64,1024,1)
t_net = self.feature_transform_net(x)#(64,1024,1,1)
t_net = paddle.squeeze(t_net, axis=[-2, -1])#(64,1024)
t_net = self.feature_fc(t_net)#(64,64*64)
t_net = paddle.reshape(t_net, [batchsize, 64, 64])#(64,64,64)
x = paddle.squeeze(x, axis=-1)#(64,64,1024)
x = paddle.transpose(x, (0, 2, 1))#(64,1024,64)
x = paddle.matmul(x, t_net)#(64,1024,64)
x = paddle.transpose(x, (0, 2, 1))#(64,64,1024)
x = paddle.unsqueeze(x, axis=-1)#(64,64,1024,1)
x = self.mlp_2(x)#(64,1024,1024,1)
x = paddle.max(x, axis=2) #(64,1024,1)
x = paddle.squeeze(x, axis=-1) #(64,1024)
x = self.fc(x)#(64,10)
return x
补充注释:
1.paddle.nn.Conv2D(in_channels,?out_channels,?kernel_size...)?
2.paddle.nn.MaxPool2D(kernel_size,?stride=None...)
3.paddle.fluid.layers. squeeze (input,?axes,?name=None).该OP会根据axes压缩输入Tensor的维度。如果指定了axes,则会删除axes中指定的维度,axes指定的维度要等于1。如果没有指定axes,那么所有等于1的维度都会被删除。
4.matmul 支持的两个tensor的矩阵乘操作.
5.paddle.fluid.layers.unsqueeze (input,?axes,?name=None).该OP向输入(input)的shape中一个或多个位置(axes)插入维度。
?6.paddle.max(x,?axis=None,?keepdim=False,?name=None).该OP是对指定维度上的Tensor元素求最大值运算,并输出相应的计算结果。
7.LogSoftmax其实就是对softmax的结果进行log,即Log(Softmax(x)
2、模型结构可视化?
pointnet = PointNet()
paddle.summary(pointnet, (64, 3, 1024, 1))
⑤训练
def train():
train_loader = pointDataLoader(file_path='./dataset/train.txt', mode='train')
model = PointNet()
model.train()
optim = paddle.optimizer.Adam(parameters=model.parameters(), weight_decay=0.001)
epoch_num = 50
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_loader()): ##batch_id=0,8,16
inputs = paddle.to_tensor(data[0]) #(256,3,1024,1)
labels = paddle.to_tensor(data[1]) #(256,1)
predicts = model(inputs)
loss = F.nll_loss(predicts, labels) #用于多分类的损失函数
acc = paddle.metric.accuracy(predicts, labels)
loss.backward()
optim.step() #调用定义的优化器对象的step方法进行参数更新
optim.clear_grad() ##每一轮参数更新完成后我们调用clear_grad()来重置梯度,以保证下一轮的正确性
if batch_id % 8 == 0:
print("epoch: {}, batch_id: {}, loss is: {}, acc is: {}".format(epoch, batch_id, loss.numpy(), acc.numpy()))
if epoch % 20 == 0:
paddle.save(model.state_dict(), './model/PointNet.pdparams')
paddle.save(optim.state_dict(), './model/PointNet.pdopt')
if __name__ == '__main__':
train()
补充注释:
1.paddle.optimizer.Adam:class?paddle.optimizer.Adam(learning_rate=0.001,?beta1=0.9,?beta2=0.999,?epsilon=1e-08,?parameters=None,?weight_decay=None,?grad_clip=None,?name=None,?lazy_mode=False)
weight_decay?(可选) - 正则化方法。可以是float类型的L2正则化系数或者正则化策略。
2.paddle.metric.accuracy(input,?label,?k=1,?correct=None,?total=None,?name=None)
使用输入和标签计算准确率。
⑥评估?
def evaluation():
test_loader = pointDataLoader(file_path='./dataset/test.txt', mode='test')
model = PointNet()
model_state_dict = paddle.load('./model/PointNet.pdparams')
model.load_dict(model_state_dict)
for batch_id, data in enumerate(test_loader()):
inputs = paddle.to_tensor(data[0])
labels = paddle.to_tensor(data[1])
predicts = model(inputs)
loss = F.cross_entropy(predicts, labels)
acc = paddle.metric.accuracy(predicts, labels)
# 打印信息
if batch_id % 100 == 0:
print("batch_id: {}, loss is: {}, acc is: {}".format(batch_id, loss.numpy(), acc.numpy()))
if __name__ == '__main__':
evaluation()
⑦预测
1、可视化预测样本
import numpy as np
zdata = []
xdata = []
ydata = []
f = open('./dataset/table/table_0015.txt', 'r')
for i in f:
xdata.append(float(i.split(',')[0]))
ydata.append(float(i.split(',')[1]))
zdata.append(float(i.split(',')[2]))
f.close()
xdata = np.array(xdata)
ydata = np.array(ydata)
zdata = np.array(zdata)
from mpl_toolkits import mplot3d #3D画图库
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ax = plt.axes(projection='3d') #创建3D坐标轴
ax.scatter3D(xdata, ydata, zdata, c='r') #3D散点图 c为颜色
plt.show()
?补充注释:?
1.mpl_toolkits.mplot3d 是Matplotlib里面专门用来画三维图的工具包。
⑦预测
1、可视化预测样本
import numpy as np
zdata = []
xdata = []
ydata = []
f = open('./dataset/table/table_0015.txt', 'r')
for i in f:
xdata.append(float(i.split(',')[0]))
ydata.append(float(i.split(',')[1]))
zdata.append(float(i.split(',')[2]))
f.close()
xdata = np.array(xdata)
ydata = np.array(ydata)
zdata = np.array(zdata)
from mpl_toolkits import mplot3d
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ax = plt.axes(projection='3d')
ax.scatter3D(xdata, ydata, zdata, c='r')
plt.show()
?2、开始预测
def test():
test_loader = pointDataLoader(file_path='./dataset/predict.txt', mode='test')
model = PointNet()
model_state_dict = paddle.load('./model/PointNet.pdparams')
model.load_dict(model_state_dict)
for batch_id, data in enumerate(test_loader()):
inputs = paddle.to_tensor(data[0])
label = paddle.to_tensor(data[1])
predict = model(inputs)
print("predict: {}, label: {}".format(np.argmax(predict.numpy(), 1), np.squeeze(label.numpy())))
#np.argmax函数参数为1时按行比较,返回最大值索引
if __name__ == '__main__':
test()
|