使用tensorflow2 加载预训练的bert模型进行部署
1 模型准备
1.1 下载预训练的bert模型
本次使用的模型是谷歌提供的预训练模型 bert-case-chinese
1.2 安装transform
安装transform 包,用于加载bert模型
2 模型训练及保存
模型训练:(完整代码见最后) 1)将模型修改为计算图格式,使用tf.function(model.call)
# model initialization
model = TFBertForSequenceClassification.from_pretrained(model_path, num_labels=num_classes)
# 修改为计算图函数
callback=tf.function(model.call)
2)通过调用get_concrete_function设置模型输入参数
#设置模型的输入参数
concrete_function =callback.get_concrete_function([tf.TensorSpec([None, 50], tf.int32, name="input_ids"),
tf.TensorSpec([None, 50], tf.int32, name="attention_mask"),
tf.TensorSpec([None, 50], tf.int32, name="token_type_ids")])
- 模型保存,并设置signatures
#模型保存,并设置signatures
tf.saved_model.save(model, './tfsevingmodel/', signatures=concrete_function)
print(model.summary())
- 查看模型保存情况
python D:\pythonapp\anacondas\envs\torchenv\Lib\site-packages\tensorflow\python\tools\saved_model_cli.py show --dir tfsevingmodel --all
输出如下所示
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['attention_mask'] tensor_info:
dtype: DT_INT32
shape: (-1, 50)
name: serving_default_attention_mask:0
inputs['input_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 50)
name: serving_default_input_ids:0
inputs['token_type_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 50)
name: serving_default_token_type_ids:0
The given SavedModel SignatureDef contains the following output(s):
outputs['logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 10)
name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict
3 模型部署服务
1 docker 拉取tfserving 镜像。 docker pull tensorflow/serving
2 将上面保存的模型放到某个目录下 我是在windows下训练的模型,将保存在model路径下的模型 放在了/opt/tfserving下。
3 . 构建模型和tserving 的链接,启动服务。
docker run -p 8501:8501 --mount type=bind,source=/opt/tfserving/model/,target=/models/model -e MODEL_NAME=model -t tensorflow/serving 1 4 模型提供的服务请求默认为 http://localhost:8501/v1/models/model:predict
4 模型部署 http请求推理
1 curl 请求
curl -H "Content-Type: application/json" -X POST -d "{\"signature_name\": \"serving_default\",\"instances\":[{\"input_ids\": [1,1159,1100,914,7218,2564,704,1068,3333,4178,7305,4324,3227,5011,6381,3315,3017,5384,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\"attention_mask\": [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],\"token_type_ids\": [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}] }" http://192.168.10.100:8501/v1/models/model:predict
返回结过如下 { “predictions”: [[-2.01432037, -1.96287441, -2.01508093, -0.15862219, 10.0372896, -0.712031305, -1.18103349, -1.21998453, -0.111421183, -1.34767079] ] }
2 http 请求推理
tokenizer = BertTokenizer.from_pretrained(model_path )
headers = {"content-type": "application/json"}
def predict(text):
input_dict = tokenizer(text, return_tensors='tf',max_length=max_length,padding ='max_length')
input_ids = input_dict["input_ids"].numpy().tolist()[0]
attention_mask = input_dict["attention_mask"].numpy().tolist()[0]
token_type_ids = input_data["token_type_ids"].numpy().tolist()[0]
features = [{'input_ids': input_ids, 'attention_mask': attention_mask,'token_type_ids':token_type_ids}]
data = json.dumps({ "signature_name": "serving_default", "instances": features})
json_response = requests.post('http://192.168.10.100:8501/v1/models/model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
return predictions
text = "上海2010上半年四六级考试报名4月8日前完成"
predictions=predict(text)
label_to_index=tf.math.argmax(predictions[0]).numpy()
index_to_label=new_label[label_to_index]
index_to_label
输出 : '教育'
5 完整代码
5.1 模型训练保存
import logging
logging.basicConfig(level=logging.ERROR)
# from transformers import TFBertPreTrainedModel,TFBertMainLayer,BertTokenizer
from transformers import TFBertForSequenceClassification,BertTokenizer
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
def convert_example_to_feature(review):
# combine step for tokenization, WordPiece vector mapping, adding special tokens as well as truncating reviews longer than the max length
return tokenizer.encode_plus(review,
add_special_tokens = True, # add [CLS], [SEP]
max_length = max_length, # max length of the text that can go to BERT
pad_to_max_length = True, # add [PAD] tokens
return_attention_mask = True, # add attention mask to not focus on pad tokens
truncation=True
)
# map to the expected input to TFBertForSequenceClassification, see here
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
return {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_masks,
}, label
def encode_examples(ds, limit=-1):
# prepare list, so that we can build up final TensorFlow dataset from slices.
input_ids_list = []
token_type_ids_list = []
attention_mask_list = []
label_list = []
if (limit > 0):
ds = ds.take(limit)
for index, row in ds.iterrows():
review = row["text"]
label = row["y"]
bert_input = convert_example_to_feature(review)
input_ids_list.append(bert_input['input_ids'])
token_type_ids_list.append(bert_input['token_type_ids'])
attention_mask_list.append(bert_input['attention_mask'])
label_list.append([label])
return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)
def split_dataset(df):
train_set, x = train_test_split(df,
stratify=df['label'],
test_size=0.1,
random_state=42)
val_set, test_set = train_test_split(x,
stratify=x['label'],
test_size=0.5,
random_state=43)
return train_set,val_set, test_set
data_path = "data.txt" # 数据路径
model_path = "./bert-case-chinese/" #模型路径,建议预先下载(https://huggingface.co/bert-base-chinese#)
max_length = 50
batch_size = 30
learning_rate = 2e-5
number_of_epochs = 5
num_classes = 10 # 类别数
# read data
df_raw = pd.read_csv(data_path,sep="\t",header=None,names=["text","label"])
df_label = pd.DataFrame({"label":["财经","房产","股票","教育","科技","社会","时政","体育","游戏","娱乐"],"y":list(range(10))})
new_label=df_label.to_dict()['label']
df_raw = pd.merge(df_raw,df_label,on="label",how="left")
# split data
train_data,val_data, test_data = split_dataset(df_raw)
# tokenizer
tokenizer = BertTokenizer.from_pretrained(model_path)
# train dataset
ds_train_encoded = encode_examples(train_data).shuffle(10000).batch(batch_size)
# val dataset
ds_val_encoded = encode_examples(val_data).batch(batch_size)
# test dataset
ds_test_encoded = encode_examples(test_data).batch(batch_size)
# model initialization
model = TFBertForSequenceClassification.from_pretrained(model_path, num_labels=num_classes)
callback=tf.function(model.call)
model.load_weights("./ckpt/news.ckpt")
concrete_function =callback.get_concrete_function([tf.TensorSpec([None, 50], tf.int32, name="input_ids"),
tf.TensorSpec([None, 50], tf.int32, name="attention_mask"),
tf.TensorSpec([None, 50], tf.int32, name="token_type_ids")])
# tf.saved_model.save(model, './tfsevingmodel/', signatures=concrete_function)
print(model.summary())
5.2 模型部署
docker run -p 8501:8501 --mount type=bind,source=/opt/tfserving/model/,target=/models/model -e MODEL_NAME=model -t tensorflow/serving
5.2 模型推理
from transformers import BertTokenizer
import tensorflow as tf
import pandas as pd
import json
import requests
model_path = "./bert-case-chinese/"
max_length=50
df_label = pd.DataFrame({"label":["财经","房产","股票","教育","科技","社会","时政","体育","游戏","娱乐"],"y":list(range(10))})
new_label=df_label.to_dict()['label']
tokenizer = BertTokenizer.from_pretrained(model_path )
headers = {"content-type": "application/json"}
def predict(text):
input_dict = tokenizer(text, return_tensors='tf',max_length=max_length,padding ='max_length')
input_ids = input_dict["input_ids"].numpy().tolist()[0]
attention_mask = input_dict["attention_mask"].numpy().tolist()[0]
token_type_ids = input_dict["token_type_ids"].numpy().tolist()[0]
features = [{'input_ids': input_ids, 'attention_mask': attention_mask,'token_type_ids':token_type_ids}]
data = json.dumps({ "signature_name": "serving_default", "instances": features})
json_response = requests.post('http://192.168.10.100:8501/v1/models/model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
return predictions
text = "上海2010上半年四六级考试报名4月8日前完成"
predictions=predict(text)
label_to_index=tf.math.argmax(predictions[0]).numpy()
index_to_label=new_label[label_to_index]
index_to_label
|