Source code for elliot.recommender.content_based.VSM.tfidf_utils

import typing as t
from collections import Counter
import math
import numpy as np


[docs]class TFIDF: def __init__(self, map: t.Dict[int, t.List[int]]): self.__map = map self.__o = Counter(feature for feature_list in self.__map.values() for feature in feature_list ) self.__maxi = max(self.__o.values()) self.__total_documents = len(self.__map) self.__idfo = {k: math.log(self.__total_documents/v) for k, v in self.__o.items()} self.__tfidf = {} for k, v in self.__map.items(): normalization = math.sqrt(sum([self.__idfo[i]**2 for i in v])) self.__tfidf[k] ={i:self.__idfo[i]/normalization for i in v}
[docs] def tfidf(self): return self.__tfidf
[docs] def get_profiles(self, ratings: t.Dict[int, t.Dict[int, float]]): profiles = {} profiles = {u: {f: profiles.get(u, {}).get(f, []) + [v] for i in items.keys() if i in self.__tfidf.keys() for f, v in self.__tfidf[i].items()} for u, items in ratings.items()} profiles = {u: {f: np.average(v) for f, v in f_dict.items()} for u, f_dict in profiles.items()} return profiles