【金融】【pytorch】使用深度学习预测期货收盘价涨跌——数据处理
读取数据
其中pandas_techinal_indicators 参考jmartinezheras/reproduce-stock-market-direction-random-forests
import torch
from torch.autograd import Variable
import torch.nn as nn
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas_techinal_indicators as ta
np.random.seed(42)
random.seed(42)
df1 = pd.read_csv(r'xxxxxxx/hengSheng_0404.csv')
df1.head()
将数据按3年+3月分段
每次取3年为训练集,后三个月为测试集。下一组训练集和测试集后推三个月。 每组训练集中80%为训练集,20%为验证集。参考:M’Ng J , Mehralizadeh M . Forecasting East Asian Indices Futures via a Novel Hybrid of Wavelet-PCA Denoising and Artificial Neural Network Models[J]. PLOS ONE, 2016, 11.
train_ptr = []
test_ptr = []
end_ptr = []
date_flag = [2, 3*12+2, 3*12+5]
for i in range(0, len(df1)):
num = (df1.iloc[i]['year'] - 2006) * 12 + df1.iloc[i]['month']
if num == date_flag[0]:
train_ptr.append(i)
date_flag[0] += 3
if num == date_flag[1]:
test_ptr.append(i)
date_flag[1] += 3
if num == date_flag[2]:
end_ptr.append(i)
date_flag[2] += 3
print(len(end_ptr))
print(train_ptr)
取数据并进行初步处理
aapl = df1[['Open', 'High', 'Low', 'Close', 'Volume']]
aapl.head()
def get_exp_preprocessing(df, alpha=0.9):
edata = df.ewm(alpha=alpha).mean()
return edata
saapl = get_exp_preprocessing(aapl)
saapl.head()
处理出各种期货金融指标
def feature_extraction(data):
for x in [5, 14, 26, 44, 66]:
data = ta.relative_strength_index(data, n=x)
data = ta.stochastic_oscillator_d(data, n=x)
data = ta.accumulation_distribution(data, n=x)
data = ta.average_true_range(data, n=x)
data = ta.momentum(data, n=x)
data = ta.money_flow_index(data, n=x)
data = ta.rate_of_change(data, n=x)
data = ta.on_balance_volume(data, n=x)
data = ta.commodity_channel_index(data, n=x)
data = ta.ease_of_movement(data, n=x)
data = ta.trix(data, n=x)
data = ta.vortex_indicator(data, n=x)
data['ema50'] = data['Close'] / data['Close'].ewm(50).mean()
data['ema21'] = data['Close'] / data['Close'].ewm(21).mean()
data['ema14'] = data['Close'] / data['Close'].ewm(14).mean()
data['ema5'] = data['Close'] / data['Close'].ewm(5).mean()
data = ta.macd(data, n_fast=12, n_slow=26)
return data
def compute_prediction_int(df, n):
pred = (df.shift(-n)['Close'] >= df['Close'])
pred = pred.iloc[:-n]
return pred.astype(int)
def prepare_data(df, horizon):
data = feature_extraction(df).dropna().iloc[:-horizon]
data['pred'] = compute_prediction_int(data, n=horizon)
return data.dropna()
data = prepare_data(saapl, 10)
y = data['pred']
features = [x for x in data.columns if x not in ['gain', 'pred']]
X = data[features]
print(list(X.columns))
制作数据集
miData = X.values
scalarX = np.max(miData, axis=0) - np.min(miData, axis=0)
miData = (miData - np.min(miData, axis=0)) / scalarX
yData = y.values
print(yData)
def create_dataset(dataset, label, look_back):
data_x = []
data_y = []
batch_size = 50
ind = list(range(len(dataset)-look_back))
random.shuffle(ind)
for i in range(int(len(ind) / batch_size)):
ptr = ind[i * batch_size]
x_item = dataset[ptr:ptr+look_back, :]
y_item = label[ptr:ptr+look_back]
x_item = torch.from_numpy(x_item.astype(np.float32))
y_item = torch.from_numpy(y_item.astype(np.float32))
x_item = torch.reshape(x_item, (look_back, 1 , dataset.shape[1]))
y_item = torch.reshape(y_item, (look_back, 1 , 1))
for j in range(1, batch_size):
ptr = ind[i * batch_size + j]
x_temp = dataset[ptr:ptr+look_back, :]
y_temp = label[ptr:ptr+look_back]
x_temp = torch.from_numpy(x_temp.astype(np.float32))
y_temp = torch.from_numpy(y_temp.astype(np.float32))
x_temp = torch.reshape(x_temp, (look_back, 1 , dataset.shape[1]))
y_temp = torch.reshape(y_temp, (look_back, 1 , 1))
x_item = torch.cat([x_item, x_temp], dim = 1)
y_item = torch.cat([y_item, y_temp], dim = 1)
y_item = y_item.long()
data_x.append(x_item)
data_y.append(y_item)
return data_x, data_y
def create_Test_dataset(dataset, label, look_back):
data_x = []
data_y = []
x_item = torch.tensor([])
y_item = torch.tensor([])
for i in range(len(dataset)-look_back):
x_temp = dataset[i:i+look_back, :]
y_temp = label[i:i+look_back]
x_temp = torch.from_numpy(x_temp.astype(np.float32))
y_temp = torch.from_numpy(y_temp.astype(np.float32))
x_temp = torch.reshape(x_temp, (look_back, 1 , dataset.shape[1]))
y_temp = torch.reshape(y_temp, (look_back, 1 , 1))
x_item = torch.cat([x_item, x_temp], dim = 1)
y_item = torch.cat([y_item, y_temp], dim = 1)
y_item = y_item.long()
data_x.append(x_item)
data_y.append(y_item)
return data_x, data_y
def trainSet_split(dataX, dataY):
'''
80%为训练集,20%为验证集
'''
train_size = int(len(dataX)*0.8)
ind = list(range(len(dataX)))
random.shuffle(ind)
trainLoaderX = []
trainLoaderY = []
for i in ind[:train_size]:
trainLoaderX.append(dataX[i])
trainLoaderY.append(dataY[i])
validateLoaderX = []
validateLoaderY = []
for i in ind[train_size:]:
validateLoaderX.append(dataX[i])
validateLoaderY.append(dataY[i])
return trainLoaderX, trainLoaderY, validateLoaderX, validateLoaderY
|