Source code for elliot.recommender.neural.NAIS.nais_model

"""
Module description:

"""

__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo, Daniele Malitesta'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it, daniele.malitesta@poliba.it'

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


[docs]class NAIS_model(keras.Model): def __init__(self, data, algorithm, weight_size, factors, lr, l_w, l_b, alpha, beta, num_users, num_items, random_seed=42, name="NAIS", **kwargs): super().__init__(name=name, **kwargs) tf.random.set_seed(random_seed) self._data = data self._algorithm = algorithm self._weight_size = weight_size self._factors = factors self._lr = lr self._l_w = l_w self._l_b = l_b self._alpha = alpha self._beta = beta self._num_users = num_users self._num_items = num_items self._history_item_matrix, self._history_lens, self._mask_history_matrix = self.create_history_item_matrix() self.initializer = tf.keras.initializers.RandomUniform(-0.001, 0.001) self.Bi = tf.Variable(tf.zeros(self._num_items), name='Bi', dtype=tf.float32) self.Bu = tf.Variable(tf.zeros(self._num_users), name='Bu', dtype=tf.float32) self.Gi = tf.Variable(self.initializer(shape=[self._num_items, self._factors]), name='Gi', dtype=tf.float32) self.Gj = tf.Variable(self.initializer(shape=[self._num_items, self._factors]), name='Gj', dtype=tf.float32) self._mlp_layers = keras.Sequential() if self._algorithm == 'concat': self._mlp_layers.add( keras.layers.Dense(self._factors * 2, activation='relu', kernel_initializer=self.initializer)) elif self._algorithm == 'product': self._mlp_layers.add( keras.layers.Dense(self._factors, activation='relu', kernel_initializer=self.initializer)) else: raise Exception('Algorithm not found for NAIS. Please select concat of product to use NAIS.') self._mlp_layers.add( keras.layers.Dense(self._weight_size, activation='linear', kernel_initializer=self.initializer)) self._mlp_layers.add( keras.layers.Dense(1, activation='linear', kernel_initializer=self.initializer)) self.optimizer = tf.optimizers.Adam(self._lr) self.loss = keras.losses.BinaryCrossentropy()
[docs] @tf.function def attention(self, user_history, target): # Attention Layers if self._algorithm == 'concat': mlp_input = tf.concat([user_history, tf.broadcast_to(tf.expand_dims(target, axis=1), user_history.shape)], axis=2) # batch_size x max_len x factors*2 elif self._algorithm == 'product': mlp_input = user_history * tf.expand_dims(target, axis=1) # batch_size x max_len x factors return tf.squeeze(self._mlp_layers(mlp_input)) # batch_size x max_len
[docs] @tf.function def batch_attention(self, user_history, target): batch_eval = user_history.shape[0] # Attention Layers target = tf.repeat(target, user_history.shape[0], axis=0) user_history = tf.reshape(user_history, [user_history.shape[1] * user_history.shape[0], user_history.shape[2], user_history.shape[3]]) if self._algorithm == 'concat': mlp_input = tf.concat([user_history, tf.broadcast_to(tf.expand_dims(target, axis=1), user_history.shape)], axis=2) # batch_size x max_len x factors*2 elif self._algorithm == 'product': mlp_input = user_history * tf.expand_dims(target, axis=1) # batch_size x max_len x factors results = tf.squeeze(self._mlp_layers(mlp_input)) return tf.reshape(results, [batch_eval, self._num_items, results.shape[1]]) # br x batch_size x max_len
[docs] @tf.function def batch_softmax(self, logits, item_num, similarity, user_bias, item_bias): # Mask Softmax batch_eval = logits.shape[0] exp_logits = tf.exp(logits) # batch_size x max_len exp_sum = tf.reduce_sum(exp_logits, axis=1, keepdims=True) exp_sum = tf.pow(exp_sum, self._beta) weights = tf.divide(exp_logits, exp_sum) coeff = tf.reshape(tf.pow(tf.cast(item_num, tf.float32), -self._alpha), [batch_eval, self._num_items]) prod = coeff * tf.reduce_sum(weights * similarity, axis=2) return 1 / (1 + tf.math.exp(-(prod + tf.reshape(user_bias, prod.shape) + item_bias)))
[docs] @tf.function def softmax(self, logits, item_num, similarity, user_bias, item_bias, batch_mask_mat=None): # Mask Softmax exp_logits = tf.exp(logits) # batch_size x max_len if batch_mask_mat is not None: exp_logits = batch_mask_mat * exp_logits # batch_size x max_len exp_sum = tf.reduce_sum(exp_logits, axis=1, keepdims=True) exp_sum = tf.pow(exp_sum, self._beta) weights = tf.divide(exp_logits, exp_sum) coeff = tf.pow(tf.cast(item_num, tf.float32), -self._alpha) return 1 / (1 + tf.math.exp(-(coeff * tf.reduce_sum(weights * similarity, axis=1) + user_bias + item_bias)))
[docs] @tf.function def call(self, inputs, training=None): user, item = inputs # user_inter = self._history_item_matrix[user] user_inter = tf.nn.embedding_lookup(self._history_item_matrix, user) # item_num = self._history_lens[user] item_num = tf.nn.embedding_lookup(self._history_lens, user) # batch_mask_mat = self._mask_history_matrix[user] batch_mask_mat = tf.nn.embedding_lookup(self._mask_history_matrix, user) user_history = tf.squeeze(tf.nn.embedding_lookup(self.Gi, user_inter)) # batch_size x max_len x factors target = tf.squeeze(tf.nn.embedding_lookup(self.Gj, item)) # batch_size x factors user_bias = tf.squeeze(tf.nn.embedding_lookup(self.Bu, user)) # batch_size x 1 item_bias = tf.squeeze(tf.nn.embedding_lookup(self.Bi, item)) similarity = tf.squeeze(tf.matmul(user_history, tf.expand_dims(target, axis=2))) logits = self.attention(user_history, target) scores = self.softmax(logits, item_num, similarity, user_bias, item_bias, batch_mask_mat) return scores, user_bias, item_bias, user_history, target
[docs] @tf.function def train_step(self, batch): user, pos, label = batch with tf.GradientTape() as tape: # Clean Inference output, user_bias, item_bias, source, target = self(inputs=(user, pos), training=True) reg_loss = self._l_b * tf.nn.l2_loss(user_bias) + self._l_b * tf.nn.l2_loss(item_bias) + tf.reduce_sum( [tf.nn.l2_loss(source), tf.nn.l2_loss(target)]) reg_loss = tf.cast(reg_loss, tf.float64) loss = self.loss(output, label) + reg_loss grads = tape.gradient(loss, self.trainable_weights) self.optimizer.apply_gradients(zip(grads, self.trainable_weights)) return loss
[docs] @tf.function def predict(self, user, **kwargs): # user_inters = self._history_item_matrix[user] user_inters = tf.nn.embedding_lookup(self._history_item_matrix, user) # item_num = self._history_lens[user] item_num = tf.nn.embedding_lookup(self._history_lens, user) # user_input = user_inters[:item_num] user_input = user_inters[:] repeats = self._num_items user_bias = tf.squeeze(tf.nn.embedding_lookup(self.Bu, user)) item_num = tf.repeat(item_num, repeats) user_history = tf.squeeze(tf.nn.embedding_lookup(self.Gi, user_input)) # inter_num x embedding_size user_history = tf.ones([repeats, 1, 1]) * user_history # target_items x inter_num x embedding_size targets = self.Gj # target_items x embedding_size item_bias = self.Bi similarity = tf.squeeze(tf.matmul(user_history, tf.expand_dims(targets, axis=2))) logits = self.attention(user_history, targets) scores = self.softmax(logits, item_num, similarity, user_bias, item_bias) return scores
[docs] @tf.function def batch_predict(self, user_start, user_stop): # user_inters = self._history_item_matrix[user] user_inters = tf.nn.embedding_lookup(self._history_item_matrix, range(user_start, user_stop)) # item_num = self._history_lens[user] item_num = tf.nn.embedding_lookup(self._history_lens, range(user_start, user_stop)) # user_input = user_inters[:item_num] user_input = user_inters[:] repeats = self._num_items user_bias = tf.repeat(tf.nn.embedding_lookup(self.Bu, range(user_start, user_stop)), repeats) item_num = tf.repeat(item_num, repeats) user_history = tf.squeeze(tf.nn.embedding_lookup(self.Gi, user_input)) # bs x inter_num x embedding_size user_history = tf.ones([repeats, 1, 1, 1]) * user_history # bs x target_items x inter_num x embedding_size user_history = tf.reshape(user_history, [user_history.shape[1], user_history.shape[0], user_history.shape[2], user_history.shape[3]]) targets = self.Gj # target_items x embedding_size item_bias = self.Bi similarity = tf.squeeze(tf.matmul(user_history, tf.expand_dims(targets, axis=2))) logits = self.batch_attention(user_history, targets) scores = self.batch_softmax(logits, item_num, similarity, user_bias, item_bias) return scores
[docs] def create_history_item_matrix(self): user_ids, item_ids = self._data.sp_i_train.nonzero()[0], self._data.sp_i_train.nonzero()[1] row_num, max_col_num = self._num_users, self._num_items row_ids, col_ids = user_ids, item_ids history_len = np.zeros(row_num, dtype=np.int64) for row_id in row_ids: history_len[row_id] += 1 col_num = np.max(history_len) history_matrix = np.zeros((row_num, col_num), dtype=np.int64) mask_history_matrix = np.zeros((row_num, col_num), dtype=np.int64) history_len[:] = 0 for row_id, col_id in zip(row_ids, col_ids): history_matrix[row_id, history_len[row_id]] = col_id mask_history_matrix[row_id, history_len[row_id]] = 1 history_len[row_id] += 1 # return tf.convert_to_tensor(history_matrix), tf.convert_to_tensor(history_len), tf.convert_to_tensor(mask_history_matrix) return tf.Variable(history_matrix), tf.Variable(history_len, dtype=tf.float32), tf.Variable(mask_history_matrix, dtype=tf.float32)
[docs] @tf.function def get_top_k(self, predictions, train_mask, k=100): return tf.nn.top_k(tf.where(train_mask, predictions, -np.inf), k=k, sorted=True)
[docs] @tf.function def get_positions(self, predictions, train_mask, items, inner_test_user_true_mask): predictions = tf.gather(predictions, inner_test_user_true_mask) train_mask = tf.gather(train_mask, inner_test_user_true_mask) equal = tf.reshape(items, [len(items), 1]) i = tf.argsort(tf.where(train_mask, predictions, -np.inf), axis=-1, direction='DESCENDING', stable=False, name=None) positions = tf.where(tf.equal(equal, i))[:, 1] return 1 - (positions / tf.reduce_sum(tf.cast(train_mask, tf.int64), axis=1))
[docs] def get_config(self): raise NotImplementedError
[docs]class LatentFactor(tf.keras.layers.Embedding): def __init__(self, num_instances, dim, zero_init=False, name=None): if zero_init: initializer = 'zeros' else: initializer = 'uniform' super(LatentFactor, self).__init__(input_dim=num_instances, output_dim=dim, embeddings_initializer=initializer, name=name)
[docs] def censor(self, censor_id): unique_censor_id, _ = tf.unique(censor_id) embedding_gather = tf.gather(self.variables[0], indices=unique_censor_id) norm = tf.norm(embedding_gather, axis=1, keepdims=True) return self.variables[0].scatter_nd_update(indices=tf.expand_dims(unique_censor_id, 1), updates=embedding_gather / tf.math.maximum(norm, 0.1))