2018阿里广告点击率预估模型---DIN,Tensorflow2.0代码实践( 四 )


def call(self, inputs):
# user为用户ID , item为物品id , hist为之前的历史记录 , 即物品id列表 , sl为最大列表长度
user, item, hist, sl = inputs[0], tf.squeeze(inputs[1], axis=1), inputs[2], tf.squeeze(inputs[3], axis=1)
# user_embed = self.u_embed(user)
item_embed = self.concat_embed(item)
hist_embed = self.concat_embed(hist)
......
def concat_embed(self, item):
"""
拼接物品embedding和物品种类embedding
:param item: 物品id
:return: 拼接后的embedding
"""
# cate = tf.transpose(tf.gather_nd(self.cate_list, [item]))
cate = tf.gather(self.cate_list, item)
cate = tf.squeeze(cate, axis=1) if cate.shape[-1] == 1 else cate
item_embed = self.item_embed(item)
item_cate_embed = self.cate_embed(cate)
embed = self.concat([item_embed, item_cate_embed])
return embed

3、根据模型 , 再根据候选广告的内容对用户行为中的物品做机制 , 即与候选广告相似的物品赋予更大的权重 。
def call(self, inputs):
......
# 经过attention的物品embedding
hist_att_embed = self.attention(item_embed, hist_embed, sl)
hist_att_embed = self.bn1(hist_att_embed)
hist_att_embed = tf.reshape(hist_att_embed, [-1, self.hidden_units * 2])
u_embed = self.dense(hist_att_embed)
......
def attention(self, queries, keys, keys_length):
"""
activation unit
:param queries: 候选广告(物品)embedding
:param keys: 用户行为(历史记录)embedding
:param keys_length: 用户行为embedding中的有效长度
:return:
"""
# 候选物品的隐藏向量维度 , hidden_unit * 2
queries_hidden_units = queries.shape[-1]
# 每个历史记录的物品embed都需要与候选物品的embed拼接 , 故候选物品embed重复keys.shape[1]次
# keys.shape[1]为最大的序列长度 , 即431 , 为了方便矩阵计算
# [None, 431 * hidden_unit * 2]
queries = tf.tile(queries, [1, keys.shape[1]])
# 重塑候选物品embed的shape
# [None, 431, hidden_unit * 2]
queries = tf.reshape(queries, [-1, keys.shape[1], queries_hidden_units])
# 拼接候选物品embed与hist物品embed
# [None, 431, hidden * 2 * 4]
embed = tf.concat([queries, keys, queries - keys, queries * keys], axis=-1)
# 全连接, 得到权重W
d_layer_1 = self.att_dense1(embed)
d_layer_2 = self.att_dense2(d_layer_1)
# [None, 431, 1]
d_layer_3 = self.att_dense3(d_layer_2)
# 重塑输出权重类型, 每个hist物品embed有对应权重值
# [None, 1, 431]
outputs = tf.reshape(d_layer_3, [-1, 1, keys.shape[1]])
# Mask
# 此处将为历史记录的物品embed令为True
# [None, 431]
key_masks = tf.sequence_mask(keys_length, keys.shape[1])
# 增添维度
# [None, 1, 431]
key_masks = tf.expand_dims(key_masks, 1)
# 填充矩阵
paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
# 构造输出矩阵 , 其实就是为了实现【sum pooling】 。True即为原outputs的值 , False为上述填充值 , 为很小的值 , softmax后接近0
# [None, 1, 431] ----> 每个历史浏览物品的权重
outputs = tf.where(key_masks, outputs, paddings)
# Scale , keys.shape[-1]为hist_embed的隐藏单元数
outputs = outputs / (keys.shape[-1] ** 0.5)
# Activation , 归一化
outputs = tf.nn.softmax(outputs)
# 对hist_embed进行加权
# [None, 1, 431] * [None, 431, hidden_unit * 2] = [None, 1, hidden_unit * 2]
outputs = tf.matmul(outputs, keys)
return outputs