循环神经网络是有一个状态和输入序列结合拟合出结果,然后状态会继续带入下一个序列
import numpy as np
X = [1,2] # 输入序列
state = [0.0,0.0] # 状态向量
# 分开定义不同输入部分的权重
w_cell_state = np.asarray([[0.1,0.2],[0.3,0.4]])
w_cell_input = np.asarray([0.5,0.6])
b_cell = np.asarray([0.1,-0.1])
#定义用于输出的全连接层参数
w_output = np.asarray([[1.0],[2.0]])
b_output = 0.1
# 按照时间顺序执行循环神经网络的前向传播过程
for i in range(len(X)):
print(i)
berfore_activation = np.dot(state,w_cell_input)+X[i]*w_cell_input +b_cell
state = np.tanh(berfore_activation)
# 根据当前时刻的状态计算出最终的输出
final_output = np.dot(state,w_output) + b_output
# 输出每个时刻的信息
print('before activation:',berfore_activation)
print("stae:",state)
print("output:",final_output)
'''
0
before activation: [ 0.6 0.5]
stae: [ 0.53704957 0.46211716]
output: [ 1.56128388]
1
before activation: [ 1.64579508 1.64579508]
stae: [ 0.92827835 0.92827835]
output: [ 2.88483504]
'''
LSTM初探
# 以下是伪代码
import tensorflow as tf
# 在tensorflow中LSTM结构的定义是可以用一个简单的命令生成
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_hidden_size)
# 在LSTM中状态初始化全为0的数组
# state是一个包含了两个张量的类,state.c为遗忘门,state.h为输入门
state = lstm.zero_state(batch_size,tf.float32)
loss = 0.0
for i in range(num_steps)
#在第一个时刻声明LSTM结构中使用的变量,在之后的时刻都需要复用之前定义好的变量
if i>0:
tf.get_variable_scope().reuse_variables()
# 每一步处理时间序列中的一个时刻和前一时刻的状态传入LSTM中就可以得到当前LSTM的输出
lstm_output, state = lstm(current_input,state)
final_output = fully_connected(lstm_output)
loss += calc_loss(final_output,expected_output)
完整代码示例
import numpy as np
import tensorflow as tf
import matplotlib as mpl
mpl.use('agg')
from matplotlib import pyplot as plt
HIDDEN_SIZE = 30 # LSTM中隐藏节点的个数
NUM_LAYERS = 2 # LSTM的层数
TIMESTEPS = 10 # 循环神经网络训练序列长度
TRAINING_STEPS = 10000 # 训练轮数
BATCH_SIZE = 32 # batch的大小
TRAINING_EXAMPLES = 10000 # 训练集的个数
TESTING_EXAMPLES = 1000 # 测试集的个数
SAMPLE_GAP = 0.01 # 采样间隔
def generate_data(seq):
X = []
Y = []
for i in range(len(seq)-TIMESTEPS):
X.append(seq[i:i+TIMESTEPS])
Y.append(seq[i+TIMESTEPS])
return np.array(X,dtype=np.float32),np.array(Y,dtype=np.float32)
def lstm_model(X,Y,is_training):
#print('defined cell')
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
# 计算向前传播过程
#print('calculate outputs')
#print(cell,X)
outputs, _ = tf.nn.dynamic_rnn(cell,X,dtype=tf.float32)
# outputs是lstm每步的输出结果,结构为(batch_size,time,hidden_size),由于这个问题只关心最后一个时刻的输出
output = outputs[:,-1,:]
# 定义平方损失
print('get predictions')
predictions = tf.contrib.layers.fully_connected(output,1,activation_fn=None)
# 只在训练时计算损失函数和优化步骤,测试时直接返回预测结果
if not is_training:
return predictions, None, None
#print('calculate loss')
loss = tf.losses.mean_squared_error(labels=Y,predictions = predictions)
# 创建模型并优化
print('start opt')
train_op = tf.contrib.layers.optimize_loss(loss,tf.train.get_global_step(),optimizer='Adagrad',learning_rate = 0.1)
return predictions,loss,train_op
def train(sess,train_X,train_Y):
print('start training')
# from_tensor_slices就是给数据集做切片,让变量和y对应起来,dataset里面的一个元素为{x:Xi,y:Yi}
ds = tf.data.Dataset.from_tensor_slices((train_X,train_Y))
# repeat就是复制样本,shuffle就是随机打乱,batch就是将样本组合成batch
ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
# 从头到尾的读一遍数据,每次只取出一个数据batch
X,Y = ds.make_one_shot_iterator().get_next()
# 而tf.nn.dynamic_rnn()要求传入的数据集的维度是三维(batch_size, squence_length, num_features)。在这里因为特征是一维,因此没有显示。
X = tf.expand_dims(X,axis=2)
Y = tf.expand_dims(Y,axis=1)
#print('defined x and y',X,Y)
with tf.variable_scope('model'):
print('load model')
predictions, loss, train_op = lstm_model(X,Y,True)
print('start initializer')
sess.run(tf.global_variables_initializer())
for i in range(TRAINING_STEPS):
_, l = sess.run([train_op,loss])
if i % 100 == 0:
print('train step:' + str(i) + ", loss:" +str(l))
def run_eval(sess,test_X,test_Y):
print('start eval')
ds = tf.data.Dataset.from_tensor_slices((test_X,test_Y))
ds = ds.batch(1)
X,Y = ds.make_one_shot_iterator().get_next()
X = tf.expand_dims(X,axis=2)
Y = tf.expand_dims(Y,axis=1)
with tf.variable_scope('model',reuse=True):
prediction,_,_ = lstm_model(X,[0.0],False)
predictions = []
labels = []
for i in range(TESTING_EXAMPLES):
P,L = sess.run([prediction,Y])
predictions.append(P)
labels.append(L)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
rmse = np.sqrt(((predictions-labels)**2).mean(axis=0))
print('Mean Square Error is: %f'% rmse)
plt.figure()
plt.plot(predictions,label='predictions')
plt.plot(labels,label = 'real_sin')
plt.legend()
plt.show()
test_start = (TESTING_EXAMPLES+TIMESTEPS) * SAMPLE_GAP
test_end = test_start + (TESTING_EXAMPLES+TIMESTEPS) * SAMPLE_GAP
train_X, train_Y = generate_data(np.sin(np.linspace(0,test_start,TRAINING_STEPS+TIMESTEPS,dtype=np.float32)))
test_X,test_Y = generate_data(np.sin(np.linspace(test_start,test_end,TESTING_EXAMPLES+TIMESTEPS,dtype=np.float32)))
with tf.Session() as sess:
train(sess,train_X,train_Y)
run_eval(sess,test_X,test_Y)
'''
start training
load model
get predictions
start opt
start initializer
train step:0, loss:0.185513
train step:100, loss:0.00154637
train step:200, loss:0.000674907
train step:300, loss:0.000369885
train step:400, loss:0.000176506
train step:500, loss:0.000405856
train step:600, loss:0.000148857
train step:700, loss:0.000147915
train step:800, loss:0.000330028
train step:900, loss:0.00017602
train step:1000, loss:0.000111503
train step:1100, loss:0.000399117
train step:1200, loss:0.000182472
train step:1300, loss:0.000353213
train step:1400, loss:0.000293578
train step:1500, loss:0.000148629
train step:1600, loss:5.75956e-05
train step:1700, loss:0.00154916
train step:1800, loss:8.46601e-05
train step:1900, loss:6.13297e-05
train step:2000, loss:0.000141122
train step:2100, loss:0.00010441
train step:2200, loss:5.33387e-05
train step:2300, loss:0.000171484
train step:2400, loss:9.26147e-05
train step:2500, loss:3.65687e-05
train step:2600, loss:0.000103446
train step:2700, loss:4.82868e-05
train step:2800, loss:3.58153e-05
train step:2900, loss:1.79731e-05
train step:3000, loss:2.28823e-05
train step:3100, loss:5.06626e-05
train step:3200, loss:2.70808e-05
train step:3300, loss:7.32284e-05
train step:3400, loss:4.31858e-05
train step:3500, loss:4.22013e-05
train step:3600, loss:8.48996e-05
train step:3700, loss:4.91826e-05
train step:3800, loss:0.00170027
train step:3900, loss:0.000926301
train step:4000, loss:6.00364e-05
train step:4100, loss:3.65914e-05
train step:4200, loss:0.000148781
train step:4300, loss:3.73674e-05
train step:4400, loss:2.47654e-05
train step:4500, loss:2.06553e-05
train step:4600, loss:1.7806e-05
train step:4700, loss:9.61706e-06
train step:4800, loss:3.406e-05
train step:4900, loss:1.45551e-05
train step:5000, loss:5.3717e-06
train step:5100, loss:9.09046e-06
train step:5200, loss:1.53424e-05
train step:5300, loss:1.31456e-05
train step:5400, loss:3.54437e-06
train step:5500, loss:1.82684e-05
train step:5600, loss:1.67606e-05
train step:5700, loss:8.02673e-06
train step:5800, loss:1.85304e-05
train step:5900, loss:1.31649e-05
train step:6000, loss:2.03883e-05
train step:6100, loss:2.66873e-05
train step:6200, loss:1.3216e-05
train step:6300, loss:2.27932e-05
train step:6400, loss:0.000473492
train step:6500, loss:3.72205e-05
train step:6600, loss:5.09785e-05
train step:6700, loss:1.34629e-05
train step:6800, loss:1.62583e-05
train step:6900, loss:4.78306e-06
train step:7000, loss:6.65108e-06
train step:7100, loss:1.09091e-06
train step:7200, loss:7.09125e-06
train step:7300, loss:3.03532e-06
train step:7400, loss:7.27224e-07
train step:7500, loss:6.92976e-06
train step:7600, loss:1.03977e-06
train step:7700, loss:4.32182e-06
train step:7800, loss:1.82465e-05
train step:7900, loss:4.04126e-06
train step:8000, loss:3.22635e-06
train step:8100, loss:1.1027e-05
train step:8200, loss:7.95563e-06
train step:8300, loss:5.7349e-06
train step:8400, loss:1.37268e-05
train step:8500, loss:9.72122e-06
train step:8600, loss:8.64096e-06
train step:8700, loss:1.08009e-05
train step:8800, loss:0.000105467
train step:8900, loss:0.000228173
train step:9000, loss:8.84291e-06
train step:9100, loss:2.86701e-05
train step:9200, loss:4.04763e-05
train step:9300, loss:7.29185e-06
train step:9400, loss:6.88334e-06
train step:9500, loss:2.23952e-06
train step:9600, loss:1.56085e-06
train step:9700, loss:7.58298e-06
train step:9800, loss:1.87605e-06
train step:9900, loss:2.38302e-06
start eval
get predictions
Mean Square Error is: 0.033945
'''
|