论文地址
Wide & Deep Learning for Recommender Systems
基本原理
利用Wide部分加强模型的“记忆能力”,利用Deep部分加强模型的“泛化能力”
- 优点:开创了组合模型的构造方法,对深度学习推荐系统后续发展产生巨大影响
- 缺点:Wide部分需要人工进行特征组合的筛选
网络结构图
代码实现
import warnings
warnings.filterwarnings("ignore")
import itertools
import pandas as pd
import numpy as np
from tqdm import tqdm
from collections import namedtuple
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from utils import SparseFeat, DenseFeat, VarLenSparseFeat
from tensorflow.keras.utils import plot_model
def data_process(data_df, dense_features, sparse_features):
data_df[dense_features] = data_df[dense_features].fillna(0.0)
for f in dense_features:
data_df[f] = data_df[f].apply(lambda x: np.log(x + 1) if x > -1 else -1)
data_df[sparse_features] = data_df[sparse_features].fillna("-1")
for f in sparse_features:
lbe = LabelEncoder()
data_df[f] = lbe.fit_transform(data_df[f])
return data_df[dense_features + sparse_features]
def build_input_layers(feature_columns):
dense_input_dict, sparse_input_dict = {}, {}
for fc in feature_columns:
if isinstance(fc, SparseFeat):
sparse_input_dict[fc.name] = Input(shape=(1,), name=fc.name)
elif isinstance(fc, DenseFeat):
dense_input_dict[fc.name] = Input(shape=(fc.dimension,), name=fc.name)
return dense_input_dict, sparse_input_dict
def build_embedding_layers(feature_columns, input_layers_dict, is_linear):
embedding_layers_dict = dict()
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
if is_linear:
for fc in sparse_feature_columns:
embedding_layers_dict[fc.name] = Embedding(fc.vocabulary_size, 1, name='1d_emb_' + fc.name)
else:
for fc in sparse_feature_columns:
embedding_layers_dict[fc.name] = Embedding(fc.vocabulary_size, fc.embedding_dim, name='kd_emb_' + fc.name)
return embedding_layers_dict
def get_linear_logits(dense_input_dict, sparse_input_dict, sparse_feature_columns):
concat_dense_inputs = Concatenate(axis=1)(list(dense_input_dict.values()))
dense_logits_output = Dense(1)(concat_dense_inputs)
linear_embedding_layers = build_embedding_layers(sparse_feature_columns, sparse_input_dict, is_linear=True)
sparse_1d_embed = []
for fc in sparse_feature_columns:
feat_input = sparse_input_dict[fc.name]
embed = Flatten()(linear_embedding_layers[fc.name](feat_input))
sparse_1d_embed.append(embed)
sparse_logits_output = Add()(sparse_1d_embed)
linear_logits = Add()([dense_logits_output, sparse_logits_output])
return linear_logits
def concat_embedding_list(feature_columns, input_layer_dict, embedding_layer_dict, flatten=False):
sparse_feature_columns = list(filter(lambda x: isinstance(x, SparseFeat), feature_columns))
embedding_list = []
for fc in sparse_feature_columns:
_input = input_layer_dict[fc.name]
_embed = embedding_layer_dict[fc.name]
embed = _embed(_input)
if flatten:
embed = Flatten()(embed)
embedding_list.append(embed)
return embedding_list
def get_dnn_logits(dense_input_dict, sparse_input_dict, sparse_feature_columns, dnn_embedding_layers):
concat_dense_inputs = Concatenate(axis=1)(list(dense_input_dict.values()))
sparse_kd_embed = concat_embedding_list(sparse_feature_columns, sparse_input_dict, dnn_embedding_layers,
flatten=True)
concat_sparse_kd_embed = Concatenate(axis=1)(sparse_kd_embed)
dnn_input = Concatenate(axis=1)([concat_dense_inputs, concat_sparse_kd_embed])
dnn_out = Dropout(0.5)(Dense(1024, activation='relu')(dnn_input))
dnn_out = Dropout(0.3)(Dense(512, activation='relu')(dnn_out))
dnn_out = Dropout(0.1)(Dense(256, activation='relu')(dnn_out))
dnn_logits = Dense(1)(dnn_out)
return dnn_logits
def WideNDeep(linear_feature_columns, dnn_feature_columns):
dense_input_dict, sparse_input_dict = build_input_layers(linear_feature_columns + dnn_feature_columns)
linear_sparse_feature_columns = list(filter(lambda x: isinstance(x, SparseFeat), linear_feature_columns))
input_layers = list(dense_input_dict.values()) + list(sparse_input_dict.values())
linear_logits = get_linear_logits(dense_input_dict, sparse_input_dict, linear_sparse_feature_columns)
embedding_layers = build_embedding_layers(dnn_feature_columns, sparse_input_dict, is_linear=False)
dnn_sparse_feature_columns = list(filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns))
dnn_logits = get_dnn_logits(dense_input_dict, sparse_input_dict, dnn_sparse_feature_columns, embedding_layers)
output_logits = Add()([linear_logits, dnn_logits])
output_layer = Activation("sigmoid")(output_logits)
model = Model(input_layers, output_layer)
return model
if __name__ == "__main__":
data = pd.read_csv('../data/criteo_sample.txt')
columns = data.columns.values
dense_features = [feat for feat in columns if 'I' in feat]
sparse_features = [feat for feat in columns if 'C' in feat]
train_data = data_process(data, dense_features, sparse_features)
train_data['label'] = data['label']
linear_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
for feat in dense_features]
dnn_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
for feat in dense_features]
history = WideNDeep(linear_feature_columns, dnn_feature_columns)
history.summary()
plot_model(history, to_file='model.png')
history.compile(optimizer="adam",
loss="binary_crossentropy",
metrics=["binary_crossentropy", tf.keras.metrics.AUC(name='auc')])
train_model_input = {name: data[name] for name in dense_features + sparse_features}
history.fit(train_model_input, train_data['label'].values,
batch_size=64, epochs=5, validation_split=0.2, )
总结分析
针对类别型特征进行Embedding后输出作为后续层次的输入,连续型特征直接输入;
连续型特征进行Concat后经过Linear层输出1维的特征向量,类别型特征分别进行Linear的Embedding得到1维的输出,再将其Add得到1维的特征向量,再与连续型特征输出做Add操作,得到Wide层输出;
连续型特征进行Concat得到P维的特征向量,类别型特征分别进行Embedding后进行Concat得到N*K维的特征向量(其中N是类别特征数,K是Embedding层的维度),再将二者进行Concat得到Deep层输入,经过若干个“全连接层+Dropout”联合层,最后一层的Linear输出1维的推理结果;
将Wide部分的输出和Deep部分的输出进行Add,得到输出向量,经过激活函数得到预测结果,这里的激活函数是Sigmoid;
参考文献
很多…
|