加入时序关系之后效果更差了。。。?
from torch.utils.data import DataLoader,Dataset
import torch.nn as nn
import torch
from torchvision import transforms
import pandas as pd
import numpy as np
import torch.optim as optim
# torch.manual_seed(999)
path = r'C:\Users\Administrator\Desktop\tib.csv'
df=pd.read_csv(path)
# print(type(df))
TIME_STEP = 5
# 数据总数
total_len = df.shape[0]
X = []
Y = []
for i in range(total_len-TIME_STEP):
X.append(np.array(df.iloc[i:(i+TIME_STEP),2:].values,dtype=np.float32))
Y.append(np.array(df.iloc[(i+TIME_STEP),-1],dtype=np.float32))
# print(X[3])
# print(Y[-1])
class MyDataset(Dataset):
def __init__(self,xx,yy,transform=None):
self.x = xx
self.y = yy
self.tranform = transform
def __getitem__(self, index):
x1 = self.x[index]
y1 = self.y[index]
if self.tranform != None:
return self.tranform(x1), y1
return x1, y1
def __len__(self):
return len(self.x)
TRAIN_SIZE = 0.5
# 构造batch
trainx,trainy=X[:int(TRAIN_SIZE*total_len)],Y[:int(TRAIN_SIZE*total_len)]
testx,testy=X[int(TRAIN_SIZE*total_len):],Y[int(TRAIN_SIZE*total_len):]
train_loader=DataLoader(dataset=MyDataset(trainx,trainy,transform=transforms.ToTensor()), batch_size=2, shuffle=True)
test_loader=DataLoader(dataset=MyDataset(testx,testy), batch_size=2, shuffle=False)
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.lstm = nn.GRU(
input_size=16,
hidden_size=64,
num_layers=3,
batch_first=True,
)
self.linear = nn.Linear(64,1)
def forward(self,x): # 需要加上h_state
# rnn和GRU返回两个参数
out, hidden = self.lstm(x, None)
# print(out1.shape)
# out, (hidden, cell) = self.lstm(x,None)
# x.shape : batch,seq_len,hidden_size
# hn.shape and cn.shape : num_layes * direction_numbers,batch,hidden_size
out = self.linear(out[:,-1,:]) # 每个batch最后一个step的h_out进行out
return out
net = LSTM()
# net(torch.randn(2,TIME_STEP,16))
criterion=nn.MSELoss()
optimizer=optim.Adam(net.parameters(),lr=0.001)
# cnt = 0
for epoch in range(100):
for x,y in train_loader:
# cnt += 1
# print(cnt)
# print(x,y)
# print(x.shape) # torch.Size([8, 1, 5, 16])
x = x.squeeze(1)
# print(x.shape)
pre = net(x)
# print(pre.shape)
y = y.unsqueeze(1)
# print(y.shape)
loss = criterion(pre,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss)
import matplotlib.pyplot as plt
preds=[]
labels=[]
for idx, (x, label) in enumerate(test_loader):
x = x.squeeze(1) # batch_size,seq_len,input_size
pred=net(x)
preds.extend(pred.data.squeeze(1).tolist())
labels.extend(label.tolist())
print(idx)
print(type(preds))
print(criterion(torch.tensor(preds),torch.tensor(labels)))
print(len(preds))
# plt.scatter(labels,preds )
plt.plot(labels,"r",label="real")
plt.plot(preds,"b",label="pred")
plt.show()
|