"""
Module description:
"""
__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'
import numpy as np
import pickle
import time
import typing as t
import scipy.sparse as sp
from elliot.recommender.recommender_utils_mixin import RecMixin
from elliot.utils.write import store_recommendation
from elliot.recommender.base_recommender_model import BaseRecommenderModel
from elliot.recommender.knn.attribute_user_knn.attribute_user_knn_similarity import Similarity
from elliot.recommender.knn.attribute_user_knn.tfidf_utils import TFIDF
from elliot.recommender.base_recommender_model import init_charger
[docs]class AttributeUserKNN(RecMixin, BaseRecommenderModel):
r"""
Attribute User-kNN proposed in MyMediaLite Recommender System Library
For further details, please refer to the `paper <https://www.researchgate.net/publication/221141162_MyMediaLite_A_free_recommender_system_library>`_
Args:
neighbors: Number of item neighbors
similarity: Similarity function
profile: Profile type ('binary', 'tfidf')
To include the recommendation model, add it to the config file adopting the following pattern:
.. code:: yaml
models:
AttributeUserKNN:
meta:
save_recs: True
neighbors: 40
similarity: cosine
profile: binary
"""
@init_charger
def __init__(self, data, config, params, *args, **kwargs):
self._params_list = [
("_num_neighbors", "neighbors", "nn", 40, int, None),
("_similarity", "similarity", "sim", "cosine", None, None),
("_profile_type", "profile", "profile", "binary", None, None),
("_implicit", "implicit", "bin", False, None, None),
("_loader", "loader", "load", "ItemAttributes", None, None),
]
self.autoset_params()
self._ratings = self._data.train_dict
self._side = getattr(self._data.side_information, self._loader, None)
if self._profile_type == "tfidf":
self._tfidf_obj = TFIDF(self._side.feature_map)
self._tfidf = self._tfidf_obj.tfidf()
self._user_profiles = self._tfidf_obj.get_profiles(self._ratings)
else:
self._user_profiles = {user: self.compute_binary_profile(user_items) for user, user_items in self._ratings.items()}
self._i_feature_dict = {self._data.public_users[user]: {self._side.public_features[feature]: value
for feature, value in user_features.items()}
for user, user_features in self._user_profiles.items()}
self._sp_i_features = self.build_feature_sparse_values()
self._model = Similarity(data=self._data, attribute_matrix=self._sp_i_features, num_neighbors=self._num_neighbors, similarity=self._similarity, implicit=self._implicit)
[docs] def get_single_recommendation(self, mask, k, *args):
return {u: self._model.get_user_recs(u, mask, k) for u in self._ratings.keys()}
[docs] def get_recommendations(self, k: int = 10):
predictions_top_k_val = {}
predictions_top_k_test = {}
recs_val, recs_test = self.process_protocol(k)
predictions_top_k_val.update(recs_val)
predictions_top_k_test.update(recs_test)
return predictions_top_k_val, predictions_top_k_test
@property
def name(self):
return f"AttributeUserKNN_{self.get_params_shortcut()}"
[docs] def train(self):
if self._restore:
return self.restore_weights()
start = time.time()
self._model.initialize()
end = time.time()
print(f"The similarity computation has taken: {end - start}")
print(f"Transactions: {self._data.transactions}")
self.evaluate()
# best_metric_value = 0
#
# recs = self.get_recommendations(self.evaluator.get_needed_recommendations())
# result_dict = self.evaluator.eval(recs)
# self._results.append(result_dict)
# print(f'Finished')
#
# if self._results[-1][self._validation_k]["val_results"][self._validation_metric] > best_metric_value:
# print("******************************************")
# if self._save_weights:
# with open(self._saving_filepath, "wb") as f:
# pickle.dump(self._model.get_model_state(), f)
# if self._save_recs:
# store_recommendation(recs, self._config.path_output_rec_result + f"{self.name}.tsv")
[docs] def compute_binary_profile(self, user_items_dict: t.Dict):
user_features = {}
partial = 1/len(user_items_dict)
for item in user_items_dict.keys():
for feature in self._side.feature_map.get(item,[]):
user_features[feature] = user_features.get(feature, 0) + partial
return user_features
[docs] def build_feature_sparse(self):
rows_cols = [(i, f) for i, features in self._i_feature_dict.items() for f in features]
rows = [u for u, _ in rows_cols]
cols = [i for _, i in rows_cols]
data = sp.csr_matrix((np.ones_like(rows), (rows, cols)), dtype='float32',
shape=(self._num_items, len(self._side.public_features)))
return data
[docs] def build_feature_sparse_values(self):
rows_cols_values = [(u, f, v) for u, features in self._i_feature_dict.items() for f, v in features.items()]
rows = [u for u, _, _ in rows_cols_values]
cols = [i for _, i, _ in rows_cols_values]
values = [r for _, _, r in rows_cols_values]
data = sp.csr_matrix((values, (rows, cols)), dtype='float32',
shape=(self._num_users, len(self._side.public_features)))
return data