Source code for elliot.recommender.latent_factor_models.PureSVD.pure_svd_model

"""
Module description:

"""

__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'

import pickle

import numpy as np
from scipy import sparse as sp
from sklearn.utils.extmath import randomized_svd


[docs]class PureSVDModel(object): """ Simple Matrix Factorization class """ def __init__(self, factors, data, random_seed): self._data = data self._private_users = data.private_users self._public_users = data.public_users self._private_items = data.private_items self._public_items = data.public_items self.factors = factors self.random_seed = random_seed self.train_dict = self._data.train_dict self.user_num, self.item_num = self._data.num_users, self._data.num_items self.user_vec, self.item_vec = None, None
[docs] def train_step(self): U, sigma, Vt = randomized_svd(self._data.sp_i_train, n_components=self.factors, random_state=self.random_seed) s_Vt = sp.diags(sigma) * Vt self.user_vec = U self.item_vec = s_Vt.T
[docs] def predict(self, user, item): return self.user_vec[self._data.public_users[user], :].dot(self.item_vec[self._data.public_items[item], :])
[docs] def get_user_recs(self, user_id, mask, top_k=100): # user_items = self._data.train_dict[user].keys() # k = min(k, self._data.num_items - len(user_items)) # predictions = {i: self.predict(user, i) for i in self._data.items if i not in user_items} # indices, values = zip(*predictions.items()) # indices = np.array(indices) # values = np.array(values) # local_k = min(k, len(values)) # partially_ordered_preds_indices = np.argpartition(values, -local_k)[-local_k:] # real_values = values[partially_ordered_preds_indices] # real_indices = indices[partially_ordered_preds_indices] # local_top_k = real_values.argsort()[::-1] # return [(real_indices[item], real_values[item]) for item in local_top_k] user_id = self._public_users.get(user_id) b = self.user_vec[user_id] @ self.item_vec.T a = mask[user_id] b[~a] = -np.inf indices, values = zip(*[(self._private_items.get(u_list[0]), u_list[1]) for u_list in enumerate(b.data)]) indices = np.array(indices) values = np.array(values) local_k = min(top_k, len(values)) partially_ordered_preds_indices = np.argpartition(values, -local_k)[-local_k:] real_values = values[partially_ordered_preds_indices] real_indices = indices[partially_ordered_preds_indices] local_top_k = real_values.argsort()[::-1] return [(real_indices[item], real_values[item]) for item in local_top_k]
[docs] def get_model_state(self): saving_dict = {} saving_dict['user_vec'] = self.user_vec saving_dict['item_vec'] = self.item_vec return saving_dict
[docs] def set_model_state(self, saving_dict): self.user_vec = saving_dict['user_vec'] self.item_vec = saving_dict['item_vec']
[docs] def load_weights(self, path): with open(path, "rb") as f: self.set_model_state(pickle.load(f))
[docs] def save_weights(self, path): with open(path, "wb") as f: pickle.dump(self.get_model_state(), f)