本人也是小白一枚,主要是边学习边记录,打算把一些学到的算法整理一下,方便以后回顾。 如果有不对的地方,希望大家指证,一起共同成长。
目标:利用BP神经网络解决多分类问题 库:pyorch、numpy
根据此问题,主要为四部分:数据集的读取,模型的搭建,训练,预测。
一:数据集读取 前提采用txt文件存储数据,例如下图(形式:特征数据和种类数间均为以空格或TAB键分隔,每行表示一组数据,其中最后一个数表示种类(从零开始分类计数)。 采用pytorch中定义数据集的类Dataset和DataLoader 详解可参照这两篇文章: 1.详解PyTorch中加载数据的方法–Dataset、Dataloader、Sampler、collate_fn等 2.pytorch中DataLoader函数
class Dataset(Dataset):
def __init__(self, train_lines):
super(Dataset, self).__init__()
self.lens = len(train_lines)
self.x_data = []
self.y_data = []
for i in range(self.lens):
x = train_lines[i].split()[:-1]
y = train_lines[i].split()[-1]
self.x_data.append(x)
self.y_data.append(y)
def __len__(self):
return self.lens
def __getitem__(self, index):
x_data = torch.Tensor(list(map(float, self.x_data[index])))
y_data = torch.squeeze(torch.Tensor(list(map(float, self.y_data[index]))))
return x_data, y_data.long()
二:模型搭建 模型搭建则按照自己想法选定节点数即可,本程序搭建的结构仅是自己搭着玩的。。。。。
class BPModel(nn.Module):
def __init__(self):
super(BPModel, self).__init__()
self.layer1 = nn.Linear(6, 32)
self.layer2 = nn.Linear(32, 16)
self.layer3 = nn.Linear(16, 3)
self.dropout1 = nn.Dropout(p=0.15)
self.dropout2 = nn.Dropout(p=0.15)
self.BN0 = nn.BatchNorm1d(6, momentum=0.5)
self.BN1 = nn.BatchNorm1d(32, momentum=0.5)
self.BN2 = nn.BatchNorm1d(16, momentum=0.5)
def forward(self, x):
x = self.BN0(x)
x = self.BN1(self.layer1(x))
x = torch.tanh(x)
x = self.BN2(self.layer2(x))
x = torch.tanh(x)
out = torch.relu(self.layer3(x))
return out
三:训练过程
class BPTrain(object):
def __init__(self, train_path, val_path, lr=0.01, epochs=500, gpu=False):
self.gpu = gpu
self.lr = lr
self.epochs = epochs
self.loss = []
self.num_epoch = []
with open(train_path) as f:
lines = f.readlines()
train_dataset = Dataset(lines)
self.gen = DataLoader(train_dataset, shuffle=True, batch_size=64, drop_last=True)
with open(val_path) as f:
lines = f.readlines()
val_dataset = Dataset(lines)
self.val_gen = DataLoader(val_dataset, batch_size=32, shuffle=False)
def weights_init(self, m):
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
def train(self, model_path=None):
if not self.gpu:
device = 'cpu'
else:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
print(f'gpu is unavailable !!!')
device = 'cpu'
best_val_acc = 0
model = BPModel()
if model_path:
model.load_state_dict(torch.load(model_path))
else:
model.apply(self.weights_init)
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=self.lr)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 100, gamma=0.9)
for epoch in range(self.epochs):
all_loss = 0
train_rights = 0
train_falses = 0
for i, data in enumerate(self.gen, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
model.train()
optimizer.zero_grad()
output = model(inputs)
loss = loss_func(output, labels)
loss.backward()
optimizer.step()
lr_scheduler.step()
output = torch.argmax(output, dim=1)
train_count_right = output == labels
train_count_false = output != labels
train_rights += sum(train_count_right)
train_falses += sum(train_count_false)
all_loss += loss
self.loss.append(float(all_loss))
self.num_epoch.append(epoch)
if epoch % 50 == 0:
print('\n')
print(f'迭代次数为{epoch}:损失值为{all_loss}')
print(f'训练集准确率为{train_rights / (train_rights + train_falses)}, 正确数量为{train_rights}, 错误数量为{train_falses}')
val_rights = 0
val_falses = 0
model.eval()
with torch.no_grad():
for j, data in enumerate(self.val_gen, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
output = model(inputs)
output = torch.argmax(output, dim=1)
val_count_right = output == labels
val_count_false = output != labels
val_rights += sum(val_count_right)
val_falses += sum(val_count_false)
val_acc = val_rights / (val_rights + val_falses)
if val_acc > best_val_acc:
best_val_acc = val_acc
torch.save(model.state_dict(), f'best_{self.gpu}')
print(f'测试集准确率为:{val_acc}, 正确数量为{val_rights}, 错误数量为{val_falses}')
def draw_loss(self):
fig = plt.figure(figsize=(10, 6))
ax1 = fig.add_subplot(1, 1, 1)
ax1.set_xlabel('epoch')
ax1.set_ylabel('loss')
ax1.set_title("Loss picture")
ax1.plot(self.num_epoch, self.loss)
plt.savefig('Loss_chart.jpg')
plt.show()
程序中超参数均可修改。 四:预测 其中预测文件与训练集格式类似,只是没有最后一项分类数,只存在多组特征值。
def predict(model_path, pre_path, gpu=False):
inputs = []
with open(pre_path) as f:
lines = f.readlines()
for i in range(len(lines)):
x = list(map(float, lines[i].split()))
inputs.append(x)
inputs = torch.tensor(inputs)
if not gpu:
device = 'cpu'
else:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
print(f'gpu is unavailable !!!')
device = 'cpu'
model = BPModel()
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()
inputs = inputs.to(device)
output = model(inputs)
output = torch.argmax(output, dim=1)
print(f'预测结果为{output}')
总程序展示
"""
作者:张世达
日期:2021年03月29日
"""
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
class Dataset(Dataset):
def __init__(self, train_lines):
super(Dataset, self).__init__()
self.lens = len(train_lines)
self.x_data = []
self.y_data = []
for i in range(self.lens):
x = train_lines[i].split()[:-1]
y = train_lines[i].split()[-1]
self.x_data.append(x)
self.y_data.append(y)
def __len__(self):
return self.lens
def __getitem__(self, index):
x_data = torch.Tensor(list(map(float, self.x_data[index])))
y_data = torch.squeeze(torch.Tensor(list(map(float, self.y_data[index]))) - 1)
return x_data, y_data.long()
class BPModel(nn.Module):
def __init__(self):
super(BPModel, self).__init__()
self.layer1 = nn.Linear(6, 32)
self.layer2 = nn.Linear(32, 16)
self.layer3 = nn.Linear(16, 3)
self.dropout1 = nn.Dropout(p=0.15)
self.dropout2 = nn.Dropout(p=0.15)
self.BN0 = nn.BatchNorm1d(6, momentum=0.5)
self.BN1 = nn.BatchNorm1d(32, momentum=0.5)
self.BN2 = nn.BatchNorm1d(16, momentum=0.5)
def forward(self, x):
x = self.BN0(x)
x = self.BN1(self.layer1(x))
x = torch.tanh(x)
x = self.BN2(self.layer2(x))
x = torch.tanh(x)
out = torch.relu(self.layer3(x))
return out
class BPTrain(object):
def __init__(self, train_path, val_path, lr=0.01, epochs=500, gpu=False):
self.gpu = gpu
self.lr = lr
self.epochs = epochs
self.loss = []
self.num_epoch = []
with open(train_path) as f:
lines = f.readlines()
train_dataset = Dataset(lines)
self.gen = DataLoader(train_dataset, shuffle=True, batch_size=64, drop_last=True)
with open(val_path) as f:
lines = f.readlines()
val_dataset = Dataset(lines)
self.val_gen = DataLoader(val_dataset, batch_size=32, shuffle=False)
def weights_init(self, m):
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
def train(self, model_path=None):
if not self.gpu:
device = 'cpu'
else:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
print(f'gpu is unavailable !!!')
device = 'cpu'
best_val_acc = 0
model = BPModel()
if model_path:
model.load_state_dict(torch.load(model_path))
else:
model.apply(self.weights_init)
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=self.lr)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 100, gamma=0.9)
for epoch in range(self.epochs):
all_loss = 0
train_rights = 0
train_falses = 0
for i, data in enumerate(self.gen, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
model.train()
optimizer.zero_grad()
output = model(inputs)
loss = loss_func(output, labels)
loss.backward()
optimizer.step()
lr_scheduler.step()
output = torch.argmax(output, dim=1)
train_count_right = output == labels
train_count_false = output != labels
train_rights += sum(train_count_right)
train_falses += sum(train_count_false)
all_loss += loss
self.loss.append(float(all_loss))
self.num_epoch.append(epoch)
if epoch % 50 == 0:
print('\n')
print(f'迭代次数为{epoch}:损失值为{all_loss}')
print(f'训练集准确率为{train_rights / (train_rights + train_falses)}, 正确数量为{train_rights}, 错误数量为{train_falses}')
val_rights = 0
val_falses = 0
model.eval()
with torch.no_grad():
for j, data in enumerate(self.val_gen, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
output = model(inputs)
output = torch.argmax(output, dim=1)
val_count_right = output == labels
val_count_false = output != labels
val_rights += sum(val_count_right)
val_falses += sum(val_count_false)
val_acc = val_rights / (val_rights + val_falses)
if val_acc > best_val_acc:
best_val_acc = val_acc
torch.save(model.state_dict(), f'best_{self.gpu}')
print(f'测试集准确率为:{val_acc}, 正确数量为{val_rights}, 错误数量为{val_falses}')
def draw_loss(self):
fig = plt.figure(figsize=(10, 6))
ax1 = fig.add_subplot(1, 1, 1)
ax1.set_xlabel('epoch')
ax1.set_ylabel('loss')
ax1.set_title("Loss picture")
ax1.plot(self.num_epoch, self.loss)
plt.savefig('Loss_chart.jpg')
plt.show()
def predict(model_path, pre_path, gpu=False):
inputs = []
with open(pre_path) as f:
lines = f.readlines()
for i in range(len(lines)):
x = list(map(float, lines[i].split()))
inputs.append(x)
inputs = torch.tensor(inputs)
if not gpu:
device = 'cpu'
else:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
print(f'gpu is unavailable !!!')
device = 'cpu'
model = BPModel()
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()
inputs = inputs.to(device)
output = model(inputs)
output = torch.argmax(output, dim=1)
print(f'预测结果为{output}')
if __name__ == '__main__':
train_path = r'traindata.txt'
val_path = r'testdata.txt'
model_path = r'best_False'
pre_path = 'predata.txt'
training_BP = BPTrain(train_path, val_path)
training_BP.train()
training_BP.draw_loss()
注意点!! 1.由于此程序解决多分类问题,故选用交叉熵损失函数nn.CrossEntropyLoss() 由于该函数首先会对输入的预测概略进行softmax操作,所有必须保证输入为神经网络预测概率。即该函数输入的预测结果应是原始结果,不能经过softmax、标准化normalized或者进行argmax后得到的[0,1,0]此类独热编码此类操作。 而且标签值的输入也不可以是独热编码,直接输入数据对应的分类编号[1,6,2,4]即可。比如产生的结果数为N*C(N为个数,C为类别数),那么输入的target必须输入一个长度为N的一维tensor(指明每个结果属于哪一类,如[1,6,2,4],函数内部会自动转化为one-hot标签) 2.因为batch normalize层(BN层)会对当前batch进行反normalize的操作,在每次前向传播时该函数中平移和放缩参数就已经发生变化当batch size较小时,该batch数据的均值和方差的代表性较差,因此对最后的结果影响也较大。随着batch size越来越小,BN层所计算的统计信息的可靠性越来越差,这样就容易导致最后错误率的上升。此问题,可看这两位大神解释: 1.什么是批标准化 (Batch Normalization) 2.Batch Normalization 超详细解读
|