依赖环境
- python3.x
- tensorflow 2.x
- Linux(我用的Ubuntu 18.04LTS)
- numpy
- pandas
- sklearn
核心代码
搭建模型
import sys
import os
from tensorflow import keras
from tensorflow.keras.models import Model
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*1)])
model = keras.models.Sequential()
model.add(keras.layers.GRU(units=256, return_sequences=True, activation='tanh', input_shape=(train_X.shape[1],1)))
model.add(keras.layers.GRU(units=64, return_sequences=False))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(n_forecast, activation='linear'))
model.compile(loss=smape, optimizer=keras.optimizers.Adam())
model.summary()
注意: 1、这里使用的是GRU,主要是数据量较少,数据量特别大的话建议使用LSTM; 2、隐藏层使用256层,可以根据需要自己定义,建议不要过大或者过小; 3、loss函数选取特别重要,有量纲的话用MAE、MSE、RMSE、Huber Loss(将MSE与MAE结合起来),无量纲的话使用SMAPE、NMB;
def rnn_model(data_name, n_input, n_forecast, train_data_path, test_data_path):
X, Y = load_data('train', train_data_path, n_input, n_forecast)
train_X, train_Y, valid_X, valid_Y = split_data(X, Y)
model = keras.models.Sequential()
model.add(keras.layers.GRU(units=256, return_sequences=True, activation='tanh', input_shape=(train_X.shape[1],1)))
model.add(keras.layers.GRU(units=64, return_sequences=False))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(n_forecast, activation='linear'))
model.compile(loss=smape, optimizer=keras.optimizers.Adam())
model.summary()
print('training model')
batch_size = 32
epochs = 1000
model_dir = './model_res/'
model_path = os.path.join(model_dir, 'lstm-%s-%d-%d.h5' %(data_name, n_input, n_forecast))
callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
checkpoint_callback = keras.callbacks.ModelCheckpoint(
filepath=model_path,
save_weights_only=True,
monitor='val_loss',
mode='min',
save_best_only=True)
model.fit(x = train_X,
y = train_Y,
batch_size = batch_size,
epochs = epochs,
callbacks=[callback, checkpoint_callback],
verbose = 2,
shuffle = True,
validation_data = (valid_X, valid_Y))
test_X, test_Y = load_data('test', test_data_path, n_input, n_forecast)
y_pred = model.predict(test_X)
scores = calcu_smape(y_pred, test_Y)
print('-'*50)
print('evalate result:')
print(data_name, 'n_input: %d, n_forecast: %d, %s: %.4f' % (n_input, n_forecast, model.metrics_names, scores))
loss函数
def smape(y_true, y_pred):
return 2.0 * K.mean(K.abs(y_pred - y_true) / (K.abs(y_pred) + K.abs(y_true) + 1e-31))
其他辅助函数
def split_data(X, Y):
data_index = [i for i in range(len(X))]
train_data_index, test_data_index = train_test_split(data_index, test_size=0.2, random_state=2021)
X_train, X_eval = X[train_data_index], X[test_data_index]
y_train, y_eval = Y[train_data_index], Y[test_data_index]
return X_train, y_train, X_eval, y_eval
模型概览
n_input = 48, n_forecast = 48 时的模型概览:
Layer (type) Output Shape Param
=================================================================
gru (GRU) (None, 432, 256) 198912
_________________________________________________________________
gru_1 (GRU) (None, 96) 101952
_________________________________________________________________
dropout (Dropout) (None, 96) 0
_________________________________________________________________
dense (Dense) (None, 48) 4656
=================================================================
Total params: 305,520
Trainable params: 305,520
Non-trainable params: 0
|