要求:tensorflow>2.6.0
from typing import Dict, Text
import keras.layers
import tensorflow as tf
from keras import Model
import tensorflow_recommenders as tfrs
###############################模型编写###############################
class RecUserModel(tf.keras.Model):
'''
用户属性支持:用户姓名、用户点击数据(最近前三)、点赞数据(最近前三)、收藏数据(最贱前三)
'''
def __init__(self,user_tag_vocabulary):
super().__init__()
self.userTags_vector = tf.keras.layers.StringLookup()
self.userTags_vector.adapt(user_tag_vocabulary)
self.userTags_embedding = keras.layers.Embedding(input_dim=len(self.userTags_vector.get_vocabulary()),output_dim=4)
#@tf.function(input_signature=({"userTags":tf.TensorSpec(shape=(None,None), dtype=tf.dtypes.string, name="userTags"),"age":tf.TensorSpec(shape=(None,None), dtype=tf.dtypes.float32, name="age")},))
def call(self, inputs):
tags_lookup = self.userTags_vector(inputs.get("userTags"))
user_embedding = tf.math.reduce_sum(self.userTags_embedding(tags_lookup),axis=-2,keepdims=False)
return user_embedding+inputs.get("age")
class RecItemModel(tf.keras.Model):
def __init__(self, item_tag_vocabulary):
super().__init__()
self.userTags_vector =tf.keras.layers.StringLookup()
self.userTags_vector.adapt(item_tag_vocabulary)
self.userTags_embedding = keras.layers.Embedding(input_dim=len(self.userTags_vector.get_vocabulary()),
output_dim=4)
#@tf.function(input_signature=({"itemTags":tf.TensorSpec(shape=(None,None), dtype=tf.dtypes.string, name="itemTags")},))
def call(self, inputs):
tags_lookup = self.userTags_vector(inputs.get("itemTags"))
user_embedding = tf.math.reduce_sum(self.userTags_embedding(tags_lookup), axis=1, keepdims=False)
return user_embedding
class ItemRecModel(tfrs.Model):
# We derive from a custom base class to help reduce boilerplate. Under the hood,
# these are still plain Keras Models.
def __init__(
self,
user_model: tf.keras.Model,
item_model: tf.keras.Model,
task: tfrs.tasks.Retrieval):
super().__init__()
# Set up user and movie representations.
self.user_model = user_model
self.movie_model = item_model
# Set up a retrieval task.
self.task = task
def compute_loss(self, features: Dict[Text, Dict], training=False) -> tf.Tensor:
# Define how the loss is computed.
user_embeddings = self.user_model(features["user_features"])
#
movie_embeddings = self.movie_model(features["item_features"])
return self.task(user_embeddings, movie_embeddings)
###################################数据处理逻辑########################################
'''
数据样例:
age userTag itemTag
13 a,b,c c,d,e
14 e,h,g n,k,m
15 e,f,g n,k,m
16 e,d,g 验,过,m
14 e,n,g n,e,m
11 e,m,g n,k,m
19 e,s,g n,c,m
'''
def str_json(message):
'''
解决标签长度不一致问题
'''
return {
"user_features": {
"age": [tf.strings.to_number(message[0], tf.float32)],
"usertags": tf.pad(tf.strings.split(message[1], sep=','),[[0, 5]])[:5]
},
"item_features": {
"itemtags": tf.pad(tf.strings.split(message[2], sep=','),[[0, 5]])[:5]
}
}
original_data =tf.data.TextLineDataset(['G:/git_alg/prepare_project/recommenders/data/test.csv'],num_parallel_reads=2)
map_result = original_data.skip(1).map(lambda x:tf.strings.split(x,sep='\t'))\
.map(str_json)
userTag_vocabulary =map_result.flat_map(lambda x: tf.data.Dataset.from_tensor_slices(x["user_features"]["userTags"])).unique()
itemTag_vocabulary = map_result.flat_map(lambda x: tf.data.Dataset.from_tensor_slices(x["item_features"]["itemTags"])).unique()
##########################模型训练######################################
user_model = RecUserModel(userTag_vocabulary)
item_model = RecItemModel(itemTag_vocabulary)
#
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
map_result.map(lambda x:x["item_features"]).batch(5).map(item_model),k=3
)
)
item_rec_model = ItemRecModel(user_model,item_model,task)
item_rec_model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
model_file ="G:/git_alg/prepare_project/recommenders/saved_model/"
callback = tf.keras.callbacks.ModelCheckpoint(filepath=model_file,
save_weights_only=True,
verbose=1)
item_rec_model.fit(map_result.batch(2), epochs=3,callbacks=[callback])
#
tf.keras.models.save_model(user_model,'G:/git_alg/prepare_project/recommenders/user_model')
tf.keras.models.save_model(item_model,'G:/git_alg/prepare_project/recommenders/item_model')
###############################检索样例#######################
user_model = tf.keras.models.load_model('G:/git_alg/prepare_project/recommenders/user_model',compile=False)
item_model = tf.keras.models.load_model('G:/git_alg/prepare_project/recommenders/item_model',compile=False)
index = tfrs.layers.factorized_top_k.BruteForce(user_model,k=2)
index.index_from_dataset(
map_result.map(lambda x:x["item_features"]).batch(3).map(lambda x: item_model(x) ))
# # Get some recommendations.注意维度匹配
titles = index({"age":tf.constant([[11]],dtype=tf.float32),"userTags":tf.constant([["n","k"]],dtype=tf.string)})
print(f"Top 3 recommendations for user 42: {titles}")
具体的可执行代码:GitHub - guoyandan/rec_model: python 版本的推荐模型(数据加载、多特征处理、多特征组合关联推荐、数据标签处理)
|