Source code for surprise.model_selection.validation

"""
The validation module contains the cross_validate function, inspired from
the mighty scikit learn.
"""

import time

import numpy as np
from joblib import delayed, Parallel

from .. import accuracy

from .split import get_cv


[docs]def cross_validate( algo, data, measures=["rmse", "mae"], cv=None, return_train_measures=False, n_jobs=1, pre_dispatch="2*n_jobs", verbose=False, ): """ Run a cross validation procedure for a given algorithm, reporting accuracy measures and computation times. See an example in the :ref:`User Guide <cross_validate_example>`. Args: algo(:obj:`AlgoBase \ <surprise.prediction_algorithms.algo_base.AlgoBase>`): The algorithm to evaluate. data(:obj:`Dataset <surprise.dataset.Dataset>`): The dataset on which to evaluate the algorithm. measures(list of string): The performance measures to compute. Allowed names are function names as defined in the :mod:`accuracy <surprise.accuracy>` module. Default is ``['rmse', 'mae']``. cv(cross-validation iterator, int or ``None``): Determines how the ``data`` parameter will be split (i.e. how trainsets and testsets will be defined). If an int is passed, :class:`KFold <surprise.model_selection.split.KFold>` is used with the appropriate ``n_splits`` parameter. If ``None``, :class:`KFold <surprise.model_selection.split.KFold>` is used with ``n_splits=5``. return_train_measures(bool): Whether to compute performance measures on the trainsets. Default is ``False``. n_jobs(int): The maximum number of folds evaluated in parallel. - If ``-1``, all CPUs are used. - If ``1`` is given, no parallel computing code is used at all,\ which is useful for debugging. - For ``n_jobs`` below ``-1``, ``(n_cpus + n_jobs + 1)`` are\ used. For example, with ``n_jobs = -2`` all CPUs but one are\ used. Default is ``1``. pre_dispatch(int or string): Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - ``None``, in which case all the jobs are immediately created\ and spawned. Use this for lightweight and fast-running\ jobs, to avoid delays due to on-demand spawning of the\ jobs. - An int, giving the exact number of total jobs that are\ spawned. - A string, giving an expression as a function of ``n_jobs``,\ as in ``'2*n_jobs'``. Default is ``'2*n_jobs'``. verbose(int): If ``True`` accuracy measures for each split are printed, as well as train and test times. Averages and standard deviations over all splits are also reported. Default is ``False``: nothing is printed. Returns: dict: A dict with the following keys: - ``'test_*'`` where ``*`` corresponds to a lower-case accuracy measure, e.g. ``'test_rmse'``: numpy array with accuracy values for each testset. - ``'train_*'`` where ``*`` corresponds to a lower-case accuracy measure, e.g. ``'train_rmse'``: numpy array with accuracy values for each trainset. Only available if ``return_train_measures`` is ``True``. - ``'fit_time'``: numpy array with the training time in seconds for each split. - ``'test_time'``: numpy array with the testing time in seconds for each split. """ measures = [m.lower() for m in measures] cv = get_cv(cv) delayed_list = ( delayed(fit_and_score)(algo, trainset, testset, measures, return_train_measures) for (trainset, testset) in cv.split(data) ) out = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch)(delayed_list) (test_measures_dicts, train_measures_dicts, fit_times, test_times) = zip(*out) test_measures = dict() train_measures = dict() ret = dict() for m in measures: # transform list of dicts into dict of lists # Same as in GridSearchCV.fit() test_measures[m] = np.asarray([d[m] for d in test_measures_dicts]) ret["test_" + m] = test_measures[m] if return_train_measures: train_measures[m] = np.asarray([d[m] for d in train_measures_dicts]) ret["train_" + m] = train_measures[m] ret["fit_time"] = fit_times ret["test_time"] = test_times if verbose: print_summary( algo, measures, test_measures, train_measures, fit_times, test_times, cv.n_splits, ) return ret
def fit_and_score(algo, trainset, testset, measures, return_train_measures=False): """Helper method that trains an algorithm and compute accuracy measures on a testset. Also report train and test times. Args: algo(:obj:`AlgoBase \ <surprise.prediction_algorithms.algo_base.AlgoBase>`): The algorithm to use. trainset(:obj:`Trainset <surprise.trainset.Trainset>`): The trainset. testset(:obj:`testset`): The testset. measures(list of string): The performance measures to compute. Allowed names are function names as defined in the :mod:`accuracy <surprise.accuracy>` module. return_train_measures(bool): Whether to compute performance measures on the trainset. Default is ``False``. Returns: tuple: A tuple containing: - A dictionary mapping each accuracy metric to its value on the testset (keys are lower case). - A dictionary mapping each accuracy metric to its value on the trainset (keys are lower case). This dict is empty if return_train_measures is False. - The fit time in seconds. - The testing time in seconds. """ start_fit = time.time() algo.fit(trainset) fit_time = time.time() - start_fit start_test = time.time() predictions = algo.test(testset) test_time = time.time() - start_test if return_train_measures: train_predictions = algo.test(trainset.build_testset()) test_measures = dict() train_measures = dict() for m in measures: f = getattr(accuracy, m.lower()) test_measures[m] = f(predictions, verbose=0) if return_train_measures: train_measures[m] = f(train_predictions, verbose=0) return test_measures, train_measures, fit_time, test_time def print_summary( algo, measures, test_measures, train_measures, fit_times, test_times, n_splits ): """Helper for printing the result of cross_validate.""" print( "Evaluating {} of algorithm {} on {} split(s).".format( ", ".join(m.upper() for m in measures), algo.__class__.__name__, n_splits ) ) print() row_format = "{:<18}" + "{:<8}" * (n_splits + 2) s = row_format.format( "", *[f"Fold {i + 1}" for i in range(n_splits)] + ["Mean"] + ["Std"] ) s += "\n" s += "\n".join( row_format.format( key.upper() + " (testset)", *[f"{v:1.4f}" for v in vals] + [f"{np.mean(vals):1.4f}"] + [f"{np.std(vals):1.4f}"], ) for (key, vals) in test_measures.items() ) if train_measures: s += "\n" s += "\n".join( row_format.format( key.upper() + " (trainset)", *[f"{v:1.4f}" for v in vals] + [f"{np.mean(vals):1.4f}"] + [f"{np.std(vals):1.4f}"], ) for (key, vals) in train_measures.items() ) s += "\n" s += row_format.format( "Fit time", *[f"{t:.2f}" for t in fit_times] + [f"{np.mean(fit_times):.2f}"] + [f"{np.std(fit_times):.2f}"], ) s += "\n" s += row_format.format( "Test time", *[f"{t:.2f}" for t in test_times] + [f"{np.mean(test_times):.2f}"] + [f"{np.std(test_times):.2f}"], ) print(s)