Source code for elliot.recommender.latent_factor_models.PMF.probabilistic_matrix_factorization

"""
Module description:

Mnih, Andriy, and Russ R. Salakhutdinov. "Probabilistic matrix factorization." Advances in neural information processing systems 20 (2007)

"""

__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'

import numpy as np
import pickle
from tqdm import tqdm

from elliot.dataset.samplers import pointwise_pos_neg_sampler as pws
from elliot.recommender.latent_factor_models.PMF.probabilistic_matrix_factorization_model import ProbabilisticMatrixFactorizationModel
from elliot.recommender.recommender_utils_mixin import RecMixin
from elliot.utils.write import store_recommendation

from elliot.recommender.base_recommender_model import BaseRecommenderModel
from elliot.recommender.base_recommender_model import init_charger


[docs]class PMF(RecMixin, BaseRecommenderModel): r""" Probabilistic Matrix Factorization For further details, please refer to the `paper <https://papers.nips.cc/paper/2007/file/d7322ed717dedf1eb4e6e52a37ea7bcd-Paper.pdf>`_ Args: factors: Number of latent factors lr: Learning rate reg: Regularization coefficient gaussian_variance: Variance of the Gaussian distribution To include the recommendation model, add it to the config file adopting the following pattern: .. code:: yaml models: PMF: meta: save_recs: True epochs: 10 batch_size: 512 factors: 50 lr: 0.001 reg: 0.0025 gaussian_variance: 0.1 """ @init_charger def __init__(self, data, config, params, *args, **kwargs): self._params_list = [ ("_learning_rate", "lr", "lr", 0.001, None, None), ("_factors", "factors", "factors", 50, None, None), ("_l_w", "reg", "reg", 0.0025, None, None), ("_gvar", "gaussian_variance", "gvar", 0.1, None, None), ] self.autoset_params() if self._batch_size < 1: self._batch_size = self._data.transactions self._ratings = self._data.train_dict self._sp_i_train = self._data.sp_i_train self._i_items_set = list(range(self._num_items)) self._sampler = pws.Sampler(self._data.i_train_dict) self._model = ProbabilisticMatrixFactorizationModel(self._num_users, self._num_items, self._factors, self._l_w, self._gvar, self._learning_rate, self._seed) @property def name(self): return "PMF"\ + f"_{self.get_base_params_shortcut()}" \ + f"_{self.get_params_shortcut()}"
[docs] def predict(self, u: int, i: int): pass
[docs] def train(self): if self._restore: return self.restore_weights() for it in self.iterate(self._epochs): loss = 0 steps = 0 with tqdm(total=int(self._data.transactions // self._batch_size), disable=not self._verbose) as t: for batch in self._sampler.step(self._data.transactions, self._batch_size): steps += 1 loss += self._model.train_step(batch) t.set_postfix({'loss': f'{loss.numpy() / steps:.5f}'}) t.update() self.evaluate(it, loss.numpy()/(it + 1))
[docs] def get_recommendations(self, k: int = 100): predictions_top_k_test = {} predictions_top_k_val = {} for index, offset in enumerate(range(0, self._num_users, self._batch_size)): offset_stop = min(offset + self._batch_size, self._num_users) predictions = self._model.get_recs( ( np.repeat(np.array(list(range(offset, offset_stop)))[:, None], repeats=self._num_items, axis=1), np.array([self._i_items_set for _ in range(offset, offset_stop)]) ) ) recs_val, recs_test = self.process_protocol(k, predictions, offset, offset_stop) predictions_top_k_val.update(recs_val) predictions_top_k_test.update(recs_test) return predictions_top_k_val, predictions_top_k_test