"""
Module description:
"""
__version__ = '0.3.1'
__author__ = 'Vito Walter Anelli, Claudio Pomo'
__email__ = 'vitowalter.anelli@poliba.it, claudio.pomo@poliba.it'
import copy
import os
import re
from ast import literal_eval
from collections import OrderedDict
from functools import reduce
from os.path import isfile, join
from types import SimpleNamespace
from hyperopt import hp
from yaml import FullLoader as FullLoader
from yaml import load
import elliot.hyperoptimization as ho
from elliot.utils.folder import manage_directories
regexp = re.compile(r'[\D][\w-]+\.[\w-]+')
_experiment = 'experiment'
_version = 'version'
_data_config = "data_config"
_splitting = "splitting"
_evaluation = "evaluation"
_prefiltering = "prefiltering"
_binarize = "binarize"
_negative_sampling = "negative_sampling"
_dataset = 'dataset'
_dataloader = 'dataloader'
_weights = 'path_output_rec_weight'
_performance = 'path_output_rec_performance'
_logger_config = 'path_logger_config'
_log_folder = 'path_log_folder'
_verbose = 'verbose'
_recs = 'path_output_rec_result'
_top_k = 'top_k'
_config_test = 'config_test'
_print_triplets = 'print_results_as_triplets'
_metrics = 'metrics'
_relevance_threshold = 'relevance_threshold'
_paired_ttest = 'paired_ttest'
_wilcoxon_test = 'wilcoxon_test'
_models = 'models'
_recommender = 'recommender'
_gpu = 'gpu'
_external_models_path = 'external_models_path'
_hyper_max_evals = 'hyper_max_evals'
_hyper_opt_alg = 'hyper_opt_alg'
_data_paths = 'data_paths'
_meta = 'meta'
_random_seed = 'random_seed'
_align_side_with_train = "align_side_with_train"
[docs]class NameSpaceModel:
def __init__(self, config_path, base_folder_path_elliot, base_folder_path_config):
self.base_namespace = SimpleNamespace()
self._base_folder_path_elliot = base_folder_path_elliot
self._base_folder_path_config = base_folder_path_config
self.config_file = open(config_path)
self.config = load(self.config_file, Loader=FullLoader)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.config[_experiment].get(_gpu, -1))
@staticmethod
def _set_path(config_path, local_path):
if os.path.isabs(local_path):
return os.path.abspath(local_path)
else:
if local_path.startswith((".", "..")) or regexp.search(local_path):
# return f"{config_path}/{local_path}"
return os.path.abspath(os.sep.join([config_path, local_path]))
else:
# the string is an attribute but not a path
return local_path
@staticmethod
def _safe_set_path(config_path, raw_local_path, dataset_name):
if isinstance(raw_local_path, str):
local_path = raw_local_path.format(dataset_name)
if os.path.isabs(local_path):
return os.path.abspath(local_path)
else:
if local_path.startswith((".", "..")) or regexp.search(local_path):
return os.path.abspath(os.sep.join([config_path, local_path]))
else:
# the string is an attribute but not a path
return local_path
else:
return raw_local_path
[docs] def fill_base(self):
# for path in self.config[_experiment][_data_paths].keys():
# self.config[_experiment][_data_paths][path] = \
# self.config[_experiment][_data_paths][path].format(self.config[_experiment][_dataset])
default_results_recs = os.sep.join(["..", "results", "{0}", "recs"])
default_results_weights = os.sep.join(["..", "results", "{0}", "weights"])
default_results_performance = os.sep.join(["..", "results", "{0}", "performance"])
self.config[_experiment][_recs] = os.path.abspath(self.config[_experiment]\
.get(_recs, self._set_path(self._base_folder_path_config, default_results_recs))\
.format(self.config[_experiment][_dataset]))
self.config[_experiment][_weights] = os.path.abspath(self.config[_experiment]\
.get(_weights, self._set_path(self._base_folder_path_config, default_results_weights)) \
.format(self.config[_experiment][_dataset]))
self.config[_experiment][_performance] = os.path.abspath(self.config[_experiment]\
.get(_performance, self._set_path(self._base_folder_path_config, default_results_performance)) \
.format(self.config[_experiment][_dataset]))
self.config[_experiment][_dataloader] = self.config[_experiment].get(_dataloader, "DataSetLoader")
self.config[_experiment][_version] = self.config[_experiment].get(_version, __version__)
manage_directories(self.config[_experiment][_recs], self.config[_experiment][_weights],
self.config[_experiment][_performance])
for p in [_data_config, _weights, _recs, _dataset, _top_k, _performance, _logger_config,
_log_folder, _dataloader, _splitting, _prefiltering, _evaluation, _external_models_path,
_print_triplets, _config_test, _negative_sampling, _binarize, _random_seed, _align_side_with_train,
_version]:
if p == _data_config:
side_information = self.config[_experiment][p].get("side_information", None)
if side_information:
if isinstance(side_information, list):
side_information = [SimpleNamespace(**{k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in side.items()}) for side in side_information]
self.config[_experiment][p].update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in self.config[_experiment][p].items()})
self.config[_experiment][p]["side_information"] = side_information
self.config[_experiment][p][_dataloader] = "DataSetLoader"
setattr(self.base_namespace, p, SimpleNamespace(**self.config[_experiment][p]))
elif isinstance(side_information, dict):
side_information = self.config[_experiment][p].get("side_information", {})
side_information.update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in side_information.items()})
side_information = SimpleNamespace(**side_information)
self.config[_experiment][p].update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in self.config[_experiment][p].items()})
self.config[_experiment][p]["side_information"] = side_information
self.config[_experiment][p][_dataloader] = self.config[_experiment][p].get(_dataloader,
"DataSetLoader")
setattr(self.base_namespace, p, SimpleNamespace(**self.config[_experiment][p]))
else:
raise Exception("Side information is neither a list nor a dict. No other options are allowed.")
else:
self.config[_experiment][p]["side_information"] = []
self.config[_experiment][p][_dataloader] = self.config[_experiment][p].get(_dataloader,
"DataSetLoader")
self.config[_experiment][p].update(
{k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in self.config[_experiment][p].items()})
setattr(self.base_namespace, p, SimpleNamespace(**self.config[_experiment][p]))
elif p == _splitting and self.config[_experiment].get(p, {}):
self.config[_experiment][p].update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in self.config[_experiment][p].items()})
test_splitting = self.config[_experiment][p].get("test_splitting", {})
validation_splitting = self.config[_experiment][p].get("validation_splitting", {})
if test_splitting:
test_splitting = SimpleNamespace(**test_splitting)
self.config[_experiment][p]["test_splitting"] = test_splitting
if validation_splitting:
validation_splitting = SimpleNamespace(**validation_splitting)
self.config[_experiment][p]["validation_splitting"] = validation_splitting
setattr(self.base_namespace, p, SimpleNamespace(**self.config[_experiment][p]))
elif p == _prefiltering and self.config[_experiment].get(p, {}):
if not isinstance(self.config[_experiment][p], list):
self.config[_experiment][p] = [self.config[_experiment][p]]
preprocessing_strategies = [SimpleNamespace(**strategy) for strategy in self.config[_experiment][p]]
self.config[_experiment][p] = preprocessing_strategies
setattr(self.base_namespace, p, self.config[_experiment][p])
elif p == _negative_sampling and self.config[_experiment].get(p, {}):
self.config[_experiment][p].update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in self.config[_experiment][p].items()})
self.config[_experiment][p] = SimpleNamespace(**self.config[_experiment][p])
if getattr(self.config[_experiment][p], 'strategy', '') == 'random':
path = os.path.abspath(os.sep.join([self._base_folder_path_config, "..", "data",
self.config[_experiment][_dataset], "negative.tsv"]))
setattr(self.config[_experiment][p], 'file_path', path)
setattr(self.base_namespace, p, self.config[_experiment][p])
elif p == _evaluation and self.config[_experiment].get(p, {}):
complex_metrics = self.config[_experiment][p].get("complex_metrics", {})
paired_ttest = self.config[_experiment][p].get("paired_ttest", {})
wilcoxon_test = self.config[_experiment][p].get("wilcoxon_test", {})
for complex_metric in complex_metrics:
complex_metric.update({k: self._safe_set_path(self._base_folder_path_config, v, self.config[_experiment][_dataset])
for k, v in complex_metric.items()})
# complex_metric.update({k: self._set_path(self._base_folder_path_config,
# v.format(self.config[_experiment][_dataset]))
# for k, v in complex_metric.items() if isinstance(v, str)})
self.config[_experiment][p]["complex_metrics"] = complex_metrics
self.config[_experiment][p]["paired_ttest"] = paired_ttest
self.config[_experiment][p]["wilcoxon_test"] = wilcoxon_test
setattr(self.base_namespace, p, SimpleNamespace(**self.config[_experiment][p]))
elif p == _logger_config:
if not self.config[_experiment].get(p, False):
setattr(self.base_namespace, p, os.path.abspath(os.sep.join([self._base_folder_path_elliot, "config", "logger_config.yml"])))
else:
setattr(self.base_namespace, p,
self._safe_set_path(self._base_folder_path_config, self.config[_experiment][p], self.config[_experiment][_dataset]))
# setattr(self.base_namespace, p, f"{self._base_folder_path_elliot}/config/logger_config.yml")
elif p == _log_folder:
if not self.config[_experiment].get(p, False):
setattr(self.base_namespace, p, os.path.abspath(os.sep.join([self._base_folder_path_elliot, "..", "log"])))
else:
setattr(self.base_namespace, p,
self._safe_set_path(self._base_folder_path_config, self.config[_experiment][p], self.config[_experiment][_dataset]))
# setattr(self.base_namespace, p, f"{self._base_folder_path_elliot}/../log/")
elif p == _external_models_path and self.config[_experiment].get(p, False):
self.config[_experiment][p] = self._safe_set_path(self._base_folder_path_config, self.config[_experiment][p], "")
setattr(self.base_namespace, p, self.config[_experiment][p])
elif p == _config_test:
setattr(self.base_namespace, p, self.config[_experiment].get(p, False))
elif p == _random_seed:
setattr(self.base_namespace, p, self.config[_experiment].get(p, 42))
elif p == _binarize:
setattr(self.base_namespace, p, self.config[_experiment].get(p, False))
elif p == _align_side_with_train:
setattr(self.base_namespace, p, self.config[_experiment].get(p, True))
else:
if self.config[_experiment].get(p):
setattr(self.base_namespace, p, self.config[_experiment][p])
[docs] def fill_model(self):
for key in self.config[_experiment][_models]:
meta_model = self.config[_experiment][_models][key].get(_meta, {})
model_name_space = SimpleNamespace(**self.config[_experiment][_models][key])
setattr(model_name_space, _meta, SimpleNamespace(**meta_model))
if any(isinstance(value, list) for value in self.config[_experiment][_models][key].values()):
space_list = []
for k, value in self.config[_experiment][_models][key].items():
if isinstance(value, list):
valid_functions = ["choice",
"randint",
"uniform",
"quniform",
"loguniform",
"qloguniform",
"normal",
"qnormal",
"lognormal",
"qlognormal"
]
if isinstance(value[0], str) and value[0] in valid_functions:
func_ = getattr(hp, value[0].replace(" ","").split("(")[0])
val_string = value[0].replace(" ", "").split("(")[1].split(")")[0] \
if len(value[0].replace(" ", "").split("(")) > 1 else None
val = [literal_eval(val_string) if val_string else None]
val.extend([literal_eval(val.replace(" ", "").replace(")", "")) if isinstance(val, str) else
val for val in value[1:]])
val = [v for v in val if v is not None]
space_list.append((k, func_(k, *val)))
elif all(isinstance(item, str) for item in value):
space_list.append((k, hp.choice(k, value)))
else:
space_list.append((k, hp.choice(k, literal_eval(
"[" + str(",".join([str(v) for v in value])) + "]")
)))
_SPACE = OrderedDict(space_list)
_estimated_evals = reduce(lambda x, y: x*y, [len(param.pos_args) - 1 for _, param in _SPACE.items()], 1)
_max_evals = meta_model.get(_hyper_max_evals, _estimated_evals)
if _max_evals <= 0:
raise Exception("Only pure value lists can be used without hyper_max_evals option. Please define hyper_max_evals in model/meta configuration.")
_opt_alg = ho.parse_algorithms(meta_model.get(_hyper_opt_alg, "grid"))
yield key, (model_name_space, _SPACE, _max_evals, _opt_alg)
else:
if key == "RecommendationFolder":
folder_path = getattr(model_name_space, "folder", None)
if folder_path:
onlyfiles = [f for f in os.listdir(folder_path) if isfile(join(folder_path, f))]
for file_ in onlyfiles:
local_model_name_space = copy.copy(model_name_space)
local_model_name_space.path = os.path.join(folder_path, file_)
yield "ProxyRecommender", local_model_name_space
else:
raise Exception("RecommendationFolder meta-model must expose the folder field.")
else:
yield key, model_name_space