1.背景
最近在烦恼怎么对深度学习进行调参,发现在optuna上可以实现。
optuna可以和主流的机器学习框架进行融合,然后进行调参。同时调参后,还有查看结果对比的功能。
2.安装
直接使用pip进行安装:
pip install optuna
3.在tensorflow上使用
首先看一下官方案例:
import tensorflow as tf
import optuna
def objective(trial):
n_layers = trial.suggest_int('n_layers', 1, 3)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
for i in range(n_layers):
num_hidden = trial.suggest_int(f'n_units_l{i}', 4, 128, log=True)
model.add(tf.keras.layers.Dense(num_hidden, activation='relu'))
model.add(tf.keras.layers.Dense(CLASSES))
...
return accuracy
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=100)
案例上分为三步:
direction
控制是以最大化还是最小化为准
sampler
指示您希望 Optuna 实施的采样器方法。
4.实例
4.1 定义模型、训练过程
class Trainer(object):
def __init__(self,
transformer_encoder,
discriminator_class,
optimizer,
epochs,
train_dataset_white,
val_dataset_white
) -> None:
super(Trainer, self).__init__()
self.train_dataset_white = train_dataset_white
self.val_dataset_white = val_dataset_white
self.epochs = epochs
self.transformer_encoder = transformer_encoder
self.discriminator_class = discriminator_class
self.optimizer = optimizer
train_step_signature = [
tf.TensorSpec(shape=(None, duel_seq_nums, seq_len), dtype=tf.float32),
tf.TensorSpec(shape=(None, sta_len), dtype=tf.float32),
tf.TensorSpec(shape=(None, 1), dtype=tf.int32),
]
@tf.function(input_signature=train_step_signature)
def train_step(self, inp_seq, inp_sta, label):
with tf.GradientTape() as tape:
enc_output = self.transformer_encoder(inp_seq, training = True)
predictions = self.discriminator_class(enc_output, inp_sta)
loss = loss_function(label, predictions)
trainable_variables = self.transformer_encoder.trainable_variables + self.discriminator_class.trainable_variables
gradients = tape.gradient(loss, trainable_variables)
self.optimizer.apply_gradients(zip(gradients, trainable_variables))
train_loss_class(loss)
predictions_label = tf.where(predictions >= 0.5, 1, 0)
train_precision_class(label, predictions_label)
train_recall_class(label, predictions_label)
return predictions
@tf.function(input_signature=train_step_signature)
def val_step(self, inp_seq, inp_sta, label):
enc_output = self.transformer_encoder(inp_seq, training = False)
predictions = self.discriminator_class(enc_output, inp_sta)
loss = loss_function(label, predictions)
val_loss_class(loss)
predictions_label = tf.where(predictions >= 0.5, 1, 0)
val_precision_class(label, predictions_label)
val_recall_class(label, predictions_label)
return predictions
def train(self):
ckpt = tf.train.Checkpoint(transformer_encoder=self.transformer_encoder, discriminator_class=self.discriminator_class, optimizer=self.optimizer)
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
_checkpoint_path = checkpoint_path + "_" + current_time
train_summary_writer, val_summary_writer = tensorboard()
discriminator_step = 0
max_val_f1 = 0
discriminator_step = 0
for epoch in range(10):
start_epoch = time.time()
train_loss_class.reset_states()
train_precision_class.reset_states()
train_recall_class.reset_states()
val_loss_class.reset_states()
val_precision_class.reset_states()
val_recall_class.reset_states()
for start_index, parsed_dataset in enumerate(train_dataset_white):
seq, sta, l = rd.read_dataset(parsed_dataset)
predictions = self.train_step(seq, sta, l)
discriminator_step += 1
f1_score = (2 * train_precision_class.result() * train_recall_class.result()) / (train_precision_class.result() + train_recall_class.result())
with train_summary_writer.as_default():
tf.summary.scalar('Class loss', train_loss_class.result(), step=discriminator_step)
tf.summary.scalar('precision', train_precision_class.result(), step=discriminator_step)
tf.summary.scalar('recall', train_recall_class.result(), step=discriminator_step)
tf.summary.scalar('F1', f1_score, step=discriminator_step)
if start_index % 1000 == 0:
logging.info('Epoch {:2d} | Index {:5d} | Class Loss {:.4f} | Secs {:.4f}'.format(epoch, start_index, train_loss_class.result(), time.time() - start_epoch))
if start_index == 2000:
break
logging.info('Epoch {:2d} | Train Loss {:.4f} | Precision {:.4f} Recall {:.4f} F1 {:.4f}'.format(epoch, train_loss_class.result(),
train_precision_class.result(),
train_recall_class.result(),
f1_score))
val_predicted_all = np.zeros([0, 1])
val_label = np.zeros([0, 1])
for start_index, parsed_dataset in enumerate(val_dataset_white):
seq, sta, l = rd.read_dataset(parsed_dataset)
predictions = self.val_step(seq, sta, l)
val_predicted_all = np.concatenate([val_predicted_all, np.reshape(predictions.numpy(), [-1, 1])], axis=0)
val_label = np.concatenate([val_label, np.reshape(l.numpy(), [-1, 1])], axis=0)
pre, rec, val_f1, _ = precision_recall_fscore_support(val_label, np.where(val_predicted_all >= 0.5, 1, 0))
val_f1 = val_f1[1]
with val_summary_writer.as_default():
tf.summary.scalar('Class loss', val_loss_class.result(), step=epoch)
tf.summary.scalar('precision', val_precision_class.result(), step=epoch)
tf.summary.scalar('recall', val_recall_class.result(), step=epoch)
tf.summary.scalar('F1', val_f1, step=epoch)
logging.info('Epoch {:2d} | Val Loss {:.4f} | Precision {:.4f} Recall {:.4f} F1 {:.4f}'.format(epoch, val_loss_class.result(),
val_precision_class.result(),
val_recall_class.result(),
val_f1))
if max_val_f1 <= val_f1:
max_val_f1 = val_f1
logging.info("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
logging.info("Finish Epoch in %f \n" % (time.time() - start_epoch))
return max_val_f1
Trainer
- transformer_encoder、discriminator_class:模型
- optimizer:优化器
- train_dataset_white、val_dataset_white:训练集和验证集
train_step
训练过程
val_step
验证过程
train
启动模型训练
4.2 objective函数定义
def build_model(params):
transformer_encoder = TransformerEncoder(
num_layers=params["num_layers"],
d_model=params["d_model"],
num_heads=hp.num_heads,
dff=params["dff"],
input_vocab_size=20,
target_vocab_size=22,
pe_input=100,
pe_target=100,
target_feature_len=21,
rate=params["dropout_rate"])
discriminator_class = DecoderClass()
optimizer = tf.keras.optimizers.Adam(params["learning_rate"])
logging.info('Model bulid: %f' % (time.time() - start_time))
trainer = Trainer(
transformer_encoder=transformer_encoder,
discriminator_class=discriminator_class,
optimizer=optimizer,
epochs=EPOCHS,
train_dataset_white=train_dataset_white,
val_dataset_white=val_dataset_white,
)
return trainer
def objective(trial):
params = {
'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-3),
'num_layers': trial.suggest_categorical('num_layers', [1, 2, 3, 4]),
'd_model': trial.suggest_categorical('d_model', [64, 128, 256, 512]),
'dff': trial.suggest_int('dff', 128, 1024),
'dropout_rate': trial.suggest_categorical('dropout_rate', [0.0, 0.1, 0.2])
}
trainer = build_model(params)
max_val_f1 = trainer.train()
return max_val_f1
params
传入需要控制的参数
4.3 启动optuna
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), storage='sqlite:///db.sqlite3')
study.optimize(objective, n_trials=50)
storge
其中可以保存模型调参的结果
5.图形化显示
从上面保存文件db.sqlite3 ,然后再安装:optuna-dashboard
pip install optuna-dashboard
接着启动命令:
optuna-dashboard sqlite:///db.sqlite3
然后在浏览器输入:127.0.0.1:8080 ,就可以看到具体的图像结果: 哪个参数比较重要:
|