Source code for elliot.result_handler.result_handler

"""
Module description:

"""

__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'

import os

import pandas as pd
from datetime import datetime
import json
from enum import Enum

from elliot.evaluation.statistical_significance import PairedTTest, WilcoxonTest

_eval_results = "test_results"
_eval_statistical_results = "test_statistical_results"


[docs]class StatTest(Enum):
    PairedTTest = [PairedTTest, "paired_ttest"]
    WilcoxonTest = [WilcoxonTest, "wilcoxon_test"]


[docs]class ResultHandler:
    def __init__(self, rel_threshold=1):
        self.oneshot_recommenders = {}
        self.ks = list()
        self.rel_threshold = rel_threshold

[docs]    def add_oneshot_recommender(self, **kwargs):
        new_ks = set(kwargs["test_results"].keys())
        [self.ks.append(k) for k in new_ks if k not in self.ks]
        # self.oneshot_recommenders[kwargs["name"].split("_")[0]] = [kwargs]
        self.oneshot_recommenders[kwargs["name"]] = [kwargs]

[docs]    def save_best_results(self, output=''):
        global_results = dict(self.oneshot_recommenders)
        for k in self.ks:
            results = {}
            for rec in global_results.keys():
                for result in global_results[rec]:
                    results.update({result['params']['name']: result[_eval_results][k]})
            info = pd.DataFrame.from_dict(results, orient='index')
            info.insert(0, 'model', info.index)
            info.to_csv(os.path.abspath(os.sep.join([output,
                f'rec_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.tsv'])),
                sep='\t', index=False)

[docs]    def save_best_results_as_triplets(self, output='../results/'):
        global_results = dict(self.oneshot_recommenders)
        for k in self.ks:
            results = {}
            for rec in global_results.keys():
                for result in global_results[rec]:
                    results.update({result['params']['name']: result[_eval_results][k]})
            info = pd.DataFrame.from_dict(results, orient='index')
            info.insert(0, 'model', info.index)
            triplets = info.set_index("model").stack().reset_index()
            triplets.to_csv(os.path.abspath(os.sep.join([output,
                f'triplets_rec_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.tsv'])),
                sep='\t', index=False, header=["model", "metric", "value"])

[docs]    def save_best_models(self, output='../results/', default_metric = "nDCG", default_k = [10]):
        global_results = dict(self.oneshot_recommenders)
        k = default_k[0]
        models = [{"default_validation_metric": default_metric,
                   "default_validation_cutoff": k,
                   "rel_threshold": self.rel_threshold}]
        for rec in global_results.keys():
            for model in global_results[rec]:
                models.append({"meta": model["params"]["meta"].__dict__, "recommender": rec,
                               "configuration": {key: value for key, value in model["params"].items() if
                                                 key != 'meta'}})
        with open(os.path.abspath(os.sep.join([output,
                f'bestmodelparams_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.json'])),
                mode='w') as f:
            json.dump(models, f, indent=4)

[docs]    def save_best_statistical_results(self, stat_test, output='../results/'):
        global_results = dict(self.oneshot_recommenders)
        for k in self.ks:
            results = []
            paired_list = []
            for rec_0, rec_0_model in global_results.items():
                for rec_1, rec_1_model in global_results.items():
                    if (rec_0 != rec_1) & ((rec_0, rec_1) not in paired_list):
                        paired_list.append((rec_0, rec_1))
                        paired_list.append((rec_1, rec_0))

                        metrics = rec_0_model[0][_eval_statistical_results][k].keys()

                        # common_users = []
                        for metric_name in metrics:
                            array_0 = rec_0_model[0][_eval_statistical_results][k][metric_name]
                            array_1 = rec_1_model[0][_eval_statistical_results][k][metric_name]

                            common_users = stat_test.value[0].common_users(array_0, array_1)

                            p_value = stat_test.value[0].compare(array_0, array_1, common_users)

                            results.append((rec_0_model[0]['params']['name'],
                                            rec_1_model[0]['params']['name'],
                                            metric_name,
                                            p_value))
                            results.append((rec_1_model[0]['params']['name'],
                                            rec_0_model[0]['params']['name'],
                                            metric_name,
                                            p_value))

            with open(os.path.abspath(os.sep.join([output,
                    f'stat_{stat_test.value[1]}_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.tsv'])),
                    "w") as f:
                for tup in results:
                    f.write(f"{tup[0]}\t{tup[1]}\t{tup[2]}\t{tup[3]}\n")


[docs]class HyperParameterStudy:
    def __init__(self, rel_threshold=1):
        self.trials = {}
        self.ks = set()
        self.rel_threshold = rel_threshold

[docs]    def add_trials(self, obj):
        self.ks.update(set(obj.results[0]["test_results"].keys()))
        name = obj.results[0]["params"]["name"].split("_")[0]
        self.trials[name] = obj.results

[docs]    def save_trials(self, output='../results/'):
        for k in self.ks:
            for rec, performance in self.trials.items():
                results = {}
                for result in performance:
                    results.update({result['params']['name']: result[_eval_results][k]})
                info = pd.DataFrame.from_dict(results, orient='index')
                info.insert(0, 'model', info.index)
                info.to_csv(os.path.abspath(os.sep.join([output,
                    f'rec_{rec}_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.tsv'])),
                    sep='\t', index=False)

[docs]    def save_trials_as_triplets(self, output='../results/'):
        for k in self.ks:
            for rec, performance in self.trials.items():
                results = {}
                for result in performance:
                    results.update({result['params']['name']: result[_eval_results][k]})
                info = pd.DataFrame.from_dict(results, orient='index')
                info.insert(0, 'model', info.index)
                triplets = info.set_index("model").stack().reset_index()
                triplets.to_csv(os.path.abspath(os.sep.join([output,
                    f'triplets_rec_{rec}_cutoff_{k}_relthreshold_{self.rel_threshold}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}.tsv'])),
                    sep='\t', index=False, header=["model", "metric", "value"])