介绍
这里用简单的网络来预测CSDN的阅读量。
我希望训练后的模型,给它前7天的阅读量,让它预测出第八天的阅读量。
阅读量的数据(很少)采用CSDN提供的excel文件:
这里采用xrld工具包来读取文件。
def get_readings():
book = xlrd.open_workbook('./data/article_readings.xlsx')
sheet1 = book.sheet_by_name('sheet1')
cols = sheet1.col_values(1)
readings = []
for i in range(8, len(cols)):
pre_days = []
for j in range(i - 7, i):
pre_days.append(int(cols[j]))
pre_days.append(int(cols[i]))
readings.append(pre_days)
return readings
训练的xlsx文件:
阿里云盘
链接:https://www.aliyundrive.com/s/eQcRpbuWakp
百度网盘
链接:https://pan.baidu.com/s/1Icz5QOtFWe58pX37XhcPsw 提取码:91dt
效果
loss的下降趋势:
环境
pytorch=1.10.1
xlrd=1.2.0
步骤
1.准备数据
其中目录结构为: 所有代码都在train.py 中
def get_readings():
book = xlrd.open_workbook('./data/article_readings.xlsx')
sheet1 = book.sheet_by_name('sheet1')
cols = sheet1.col_values(1)
readings = []
for i in range(8, len(cols)):
pre_days = []
for j in range(i - 7, i):
pre_days.append(int(cols[j]))
pre_days.append(int(cols[i]))
readings.append(pre_days)
return readings
2.定义网络
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(7, 16, bias=True),
nn.ReLU(),
nn.Linear(16, 32, bias=True),
nn.ReLU(),
nn.Linear(32, 1, bias=True)
)
self.optimizer = torch.optim.Adam(self.fc.parameters(), lr=LR)
self.loss_fn = nn.MSELoss()
def forward(self, input):
return self.fc(input)
3.定义训练方法
def train(model: MyNet, readings: list[[int, int]]):
loss_list = []
for reading in readings:
predict = torch.tensor(reading[:7], dtype=torch.float)
predict = model(predict)
target = reading[7]
target = torch.tensor(target, dtype=torch.float)
target = torch.unsqueeze(target, 0)
loss = model.loss_fn(target, predict)
loss_list.append(loss.item())
model.optimizer.zero_grad()
loss.backward()
model.optimizer.step()
return loss_list
4.预测
def predict(model: MyNet, pre_reading) -> int:
pre_reading = torch.tensor(pre_reading, dtype=torch.float)
pre_reading
out = model(pre_reading).item()
return int(out)
train.py
__main__ 中epoch 是要自己设定的,如果你有大量的数据可以随机取出,像文章中数据太少,大了很容易过拟合。
import random
import torch.optim
import xlrd
import torch.nn as nn
import matplotlib.pyplot as plt
LR = 0.01
def get_readings():
book = xlrd.open_workbook('./data/article_readings.xlsx')
sheet1 = book.sheet_by_name('sheet1')
cols = sheet1.col_values(1)
readings = []
for i in range(8, len(cols)):
pre_days = []
for j in range(i - 7, i):
pre_days.append(int(cols[j]))
pre_days.append(int(cols[i]))
readings.append(pre_days)
return readings
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(7, 16, bias=True),
nn.ReLU(),
nn.Linear(16, 32, bias=True),
nn.ReLU(),
nn.Linear(32, 1, bias=True)
)
self.optimizer = torch.optim.Adam(self.fc.parameters(), lr=LR)
self.loss_fn = nn.MSELoss()
def forward(self, input):
return self.fc(input)
def train(model: MyNet, readings: list[[int, int]]):
loss_list = []
for reading in readings:
predict = torch.tensor(reading[:7], dtype=torch.float)
predict = model(predict)
target = reading[7]
target = torch.tensor(target, dtype=torch.float)
target = torch.unsqueeze(target, 0)
loss = model.loss_fn(target, predict)
loss_list.append(loss.item())
model.optimizer.zero_grad()
loss.backward()
model.optimizer.step()
return loss_list
def predict(model: MyNet, pre_reading) -> int:
pre_reading = torch.tensor(pre_reading, dtype=torch.float)
pre_reading
out = model(pre_reading).item()
return int(out)
if __name__ == "__main__":
model = MyNet()
readings = get_readings()
print(readings)
pre_reading = readings[random.randint(0, len(readings))]
real_reading = pre_reading[-1]
loss_list_y = []
epoch = 200
for i in range(epoch):
loss_list_y.extend(train(model, readings))
print('train done!')
loss_list_x = [i + 1 for i in range(len(loss_list_y))]
plt.plot(loss_list_x, loss_list_y)
plt.show()
print(f'predict:{predict(model, pre_reading[:7])}, real:{real_reading}')
|