Source code for elliot.recommender.latent_factor_models.Slim.slim_model

"""
Module description:

"""

__version__ = '0.3.1'
__author__ = 'Felice Antonio Merra, Vito Walter Anelli, Claudio Pomo'
__email__ = 'felice.merra@poliba.it, vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'

import pickle
import time
import numpy as np
import sys
import scipy.sparse as sp
from sklearn.linear_model import ElasticNet


[docs]class SlimModel(object): def __init__(self, data, num_users, num_items, l1_ratio, alpha, epochs, neighborhood, random_seed): self._data = data self._num_users = num_users self._num_items = num_items self._l1_ratio = l1_ratio self._alpha = alpha self._epochs = epochs self._neighborhood = neighborhood self.md = ElasticNet(alpha=self._alpha, l1_ratio=self._l1_ratio, positive=True, fit_intercept=False, copy_X=False, precompute=True, selection='random', max_iter=100, random_state=random_seed, tol=1e-4) self._w_sparse = None self.pred_mat = None
[docs] def train(self, verbose): train = self._data.sp_i_train_ratings dataBlock = 10000000 rows = np.empty(dataBlock, dtype=np.int32) cols = np.empty(dataBlock, dtype=np.int32) values = np.empty(dataBlock, dtype=np.float32) numCells = 0 start_time = time.time() start_time_printBatch = start_time for currentItem in range(self._num_items): y = train[:, currentItem].toarray() # set the j-th column of X to zero start_pos = train.indptr[currentItem] end_pos = train.indptr[currentItem + 1] current_item_data_backup = train.data[start_pos: end_pos].copy() train.data[start_pos: end_pos] = 0.0 # fit one ElasticNet model per column self.md.fit(train, y) nonzero_model_coef_index = self.md.sparse_coef_.indices nonzero_model_coef_value = self.md.sparse_coef_.data local_topK = min(len(nonzero_model_coef_value) - 1, self._neighborhood) relevant_items_partition = (-nonzero_model_coef_value).argpartition(local_topK)[0:local_topK] relevant_items_partition_sorting = np.argsort(-nonzero_model_coef_value[relevant_items_partition]) ranking = relevant_items_partition[relevant_items_partition_sorting] for index in range(len(ranking)): if numCells == len(rows): rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32))) cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32))) values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32))) rows[numCells] = nonzero_model_coef_index[ranking[index]] cols[numCells] = currentItem values[numCells] = nonzero_model_coef_value[ranking[index]] numCells += 1 train.data[start_pos:end_pos] = current_item_data_backup if verbose and (time.time() - start_time_printBatch > 300 or ( currentItem + 1) % 1000 == 0 or currentItem == self._num_items - 1): print('{}: Processed {} ( {:.2f}% ) in {:.2f} minutes. Items per second: {:.0f}'.format( 'SLIMElasticNetRecommender', currentItem + 1, 100.0 * float(currentItem + 1) / self._num_items, (time.time() - start_time) / 60, float(currentItem) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_printBatch = time.time() # generate the sparse weight matrix self._w_sparse = sp.csr_matrix((values[:numCells], (rows[:numCells], cols[:numCells])), shape=(self._num_items, self._num_items), dtype=np.float32)
[docs] def prepare_predictions(self): self.pred_mat = self._data.sp_i_train_ratings.dot(self._w_sparse).toarray()
[docs] def predict(self, u, i): return self.pred_mat[u, i]
[docs] def get_user_recs(self, user, mask, k=100): # user_items = self._data.train_dict[user].keys() # predictions = {i: self.predict(user, i) for i in self._data.items if i not in user_items} ui = self._data.public_users[user] user_mask = mask[ui] predictions = {self._data.private_items[i]: self.predict(ui, i) for i in range(self._data.num_items) if user_mask[i]} indices, values = zip(*predictions.items()) indices = np.array(indices) values = np.array(values) local_k = min(k, len(values)) partially_ordered_preds_indices = np.argpartition(values, -local_k)[-local_k:] real_values = values[partially_ordered_preds_indices] real_indices = indices[partially_ordered_preds_indices] local_top_k = real_values.argsort()[::-1] return [(real_indices[item], real_values[item]) for item in local_top_k]
[docs] def get_model_state(self): saving_dict = {} saving_dict['_A_tilde'] = self._A_tilde return saving_dict
[docs] def set_model_state(self, saving_dict): self._A_tilde = saving_dict['_A_tilde']
[docs] def load_weights(self, path): with open(path, "rb") as f: self.set_model_state(pickle.load(f))
[docs] def save_weights(self, path): with open(path, "wb") as f: pickle.dump(self.get_model_state(), f)