"""
Module description:
"""
__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'
import numpy as np
import pickle
import time
import typing as t
import scipy.sparse as sp
from elliot.recommender.recommender_utils_mixin import RecMixin
from elliot.utils.write import store_recommendation
from elliot.recommender.base_recommender_model import BaseRecommenderModel
from elliot.recommender.content_based.VSM.vector_space_model_similarity import Similarity
from elliot.recommender.content_based.VSM.tfidf_utils import TFIDF
from elliot.recommender.base_recommender_model import init_charger
[docs]class VSM(RecMixin, BaseRecommenderModel):
r"""
Vector Space Model
For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/2362499.2362501>`_ and the `paper <https://ieeexplore.ieee.org/document/9143460>`_
Args:
similarity: Similarity metric
user_profile:
item_profile:
To include the recommendation model, add it to the config file adopting the following pattern:
.. code:: yaml
models:
VSM:
meta:
save_recs: True
similarity: cosine
user_profile: binary
item_profile: binary
"""
@init_charger
def __init__(self, data, config, params, *args, **kwargs):
self._params_list = [
("_similarity", "similarity", "sim", "cosine", None, None),
("_user_profile_type", "user_profile", "up", "tfidf", None, None),
("_item_profile_type", "item_profile", "ip", "tfidf", None, None),
("_loader", "loader", "load", "ItemAttributes", None, None),
]
self.autoset_params()
self._ratings = self._data.train_dict
self._side = getattr(self._data.side_information, self._loader, None)
if self._user_profile_type == "tfidf":
self._tfidf_obj = TFIDF(self._side.feature_map)
self._tfidf = self._tfidf_obj.tfidf()
self._user_profiles = self._tfidf_obj.get_profiles(self._ratings)
else:
self._user_profiles = {user: self.compute_binary_profile(user_items)
for user, user_items in self._ratings.items()}
self._i_user_feature_dict = {self._data.public_users[user]: {self._side.public_features[feature]: value
for feature, value in user_features.items()}
for user, user_features in self._user_profiles.items()}
self._sp_i_user_features = self.build_feature_sparse_values(self._i_user_feature_dict, self._num_users)
if self._item_profile_type == "tfidf":
self._tfidf_obj = TFIDF(self._side.feature_map)
self._tfidf = self._tfidf_obj.tfidf()
self._i_item_feature_dict = {
i_item: {self._side.public_features[feature]: self._tfidf[item].get(feature, 0)
for feature in self._side.feature_map[item]}
for item, i_item in self._data.public_items.items()}
self._sp_i_item_features = self.build_feature_sparse_values(self._i_item_feature_dict, self._num_items)
else:
self._i_item_feature_dict = {i_item: [self._side.public_features[feature] for feature
in self._side.feature_map[item]]
for item, i_item in self._data.public_items.items()}
self._sp_i_item_features = self.build_feature_sparse(self._i_item_feature_dict, self._num_items)
self._model = Similarity(self._data, self._sp_i_user_features, self._sp_i_item_features, self._similarity)
[docs] def get_single_recommendation(self, mask, k, *args):
return {u: self._model.get_user_recs(u, mask, k) for u in self._ratings.keys()}
[docs] def get_recommendations(self, k: int = 10):
predictions_top_k_val = {}
predictions_top_k_test = {}
recs_val, recs_test = self.process_protocol(k)
predictions_top_k_val.update(recs_val)
predictions_top_k_test.update(recs_test)
return predictions_top_k_val, predictions_top_k_test
@property
def name(self):
return f"VSM_{self.get_params_shortcut()}"
[docs] def train(self):
if self._restore:
return self.restore_weights()
start = time.time()
self._model.initialize()
end = time.time()
self.logger.info(f"The similarity computation has taken: {end - start}")
self.evaluate()
[docs] def compute_binary_profile(self, user_items_dict: t.Dict):
user_features = {}
# partial = 1/len(user_items_dict)
for item in user_items_dict.keys():
for feature in self._side.feature_map.get(item, []):
# user_features[feature] = user_features.get(feature, 0) + partial
user_features[feature] = user_features.get(feature, 1)
return user_features
[docs] def build_feature_sparse(self, feature_dict, num_entities):
rows_cols = [(i, f) for i, features in feature_dict.items() for f in features]
rows = [u for u, _ in rows_cols]
cols = [i for _, i in rows_cols]
data = sp.csr_matrix((np.ones_like(rows), (rows, cols)), dtype='float32',
shape=(num_entities, len(self._side.public_features)))
return data
[docs] def build_feature_sparse_values(self, feature_dict, num_entities):
rows_cols_values = [(u, f, v) for u, features in feature_dict.items() for f, v in features.items()]
rows = [u for u, _, _ in rows_cols_values]
cols = [i for _, i, _ in rows_cols_values]
values = [r for _, _, r in rows_cols_values]
data = sp.csr_matrix((values, (rows, cols)), dtype='float32',
shape=(num_entities, len(self._side.public_features)))
return data