"""
The validation module contains the cross_validate function, inspired from
the mighty scikit learn.
"""
import time
import numpy as np
from joblib import delayed, Parallel
from .. import accuracy
from .split import get_cv
[docs]def cross_validate(
algo,
data,
measures=["rmse", "mae"],
cv=None,
return_train_measures=False,
n_jobs=1,
pre_dispatch="2*n_jobs",
verbose=False,
):
"""
Run a cross validation procedure for a given algorithm, reporting accuracy
measures and computation times.
See an example in the :ref:`User Guide <cross_validate_example>`.
Args:
algo(:obj:`AlgoBase \
<surprise.prediction_algorithms.algo_base.AlgoBase>`):
The algorithm to evaluate.
data(:obj:`Dataset <surprise.dataset.Dataset>`): The dataset on which
to evaluate the algorithm.
measures(list of string): The performance measures to compute. Allowed
names are function names as defined in the :mod:`accuracy
<surprise.accuracy>` module. Default is ``['rmse', 'mae']``.
cv(cross-validation iterator, int or ``None``): Determines how the
``data`` parameter will be split (i.e. how trainsets and testsets
will be defined). If an int is passed, :class:`KFold
<surprise.model_selection.split.KFold>` is used with the
appropriate ``n_splits`` parameter. If ``None``, :class:`KFold
<surprise.model_selection.split.KFold>` is used with
``n_splits=5``.
return_train_measures(bool): Whether to compute performance measures on
the trainsets. Default is ``False``.
n_jobs(int): The maximum number of folds evaluated in parallel.
- If ``-1``, all CPUs are used.
- If ``1`` is given, no parallel computing code is used at all,\
which is useful for debugging.
- For ``n_jobs`` below ``-1``, ``(n_cpus + n_jobs + 1)`` are\
used. For example, with ``n_jobs = -2`` all CPUs but one are\
used.
Default is ``1``.
pre_dispatch(int or string): Controls the number of jobs that get
dispatched during parallel execution. Reducing this number can be
useful to avoid an explosion of memory consumption when more jobs
get dispatched than CPUs can process. This parameter can be:
- ``None``, in which case all the jobs are immediately created\
and spawned. Use this for lightweight and fast-running\
jobs, to avoid delays due to on-demand spawning of the\
jobs.
- An int, giving the exact number of total jobs that are\
spawned.
- A string, giving an expression as a function of ``n_jobs``,\
as in ``'2*n_jobs'``.
Default is ``'2*n_jobs'``.
verbose(int): If ``True`` accuracy measures for each split are printed,
as well as train and test times. Averages and standard deviations
over all splits are also reported. Default is ``False``: nothing is
printed.
Returns:
dict: A dict with the following keys:
- ``'test_*'`` where ``*`` corresponds to a lower-case accuracy
measure, e.g. ``'test_rmse'``: numpy array with accuracy values
for each testset.
- ``'train_*'`` where ``*`` corresponds to a lower-case accuracy
measure, e.g. ``'train_rmse'``: numpy array with accuracy values
for each trainset. Only available if ``return_train_measures`` is
``True``.
- ``'fit_time'``: numpy array with the training time in seconds for
each split.
- ``'test_time'``: numpy array with the testing time in seconds for
each split.
"""
measures = [m.lower() for m in measures]
cv = get_cv(cv)
delayed_list = (
delayed(fit_and_score)(algo, trainset, testset, measures, return_train_measures)
for (trainset, testset) in cv.split(data)
)
out = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch)(delayed_list)
(test_measures_dicts, train_measures_dicts, fit_times, test_times) = zip(*out)
test_measures = dict()
train_measures = dict()
ret = dict()
for m in measures:
# transform list of dicts into dict of lists
# Same as in GridSearchCV.fit()
test_measures[m] = np.asarray([d[m] for d in test_measures_dicts])
ret["test_" + m] = test_measures[m]
if return_train_measures:
train_measures[m] = np.asarray([d[m] for d in train_measures_dicts])
ret["train_" + m] = train_measures[m]
ret["fit_time"] = fit_times
ret["test_time"] = test_times
if verbose:
print_summary(
algo,
measures,
test_measures,
train_measures,
fit_times,
test_times,
cv.n_splits,
)
return ret
def fit_and_score(algo, trainset, testset, measures, return_train_measures=False):
"""Helper method that trains an algorithm and compute accuracy measures on
a testset. Also report train and test times.
Args:
algo(:obj:`AlgoBase \
<surprise.prediction_algorithms.algo_base.AlgoBase>`):
The algorithm to use.
trainset(:obj:`Trainset <surprise.trainset.Trainset>`): The trainset.
testset(:obj:`testset`): The testset.
measures(list of string): The performance measures to compute. Allowed
names are function names as defined in the :mod:`accuracy
<surprise.accuracy>` module.
return_train_measures(bool): Whether to compute performance measures on
the trainset. Default is ``False``.
Returns:
tuple: A tuple containing:
- A dictionary mapping each accuracy metric to its value on the
testset (keys are lower case).
- A dictionary mapping each accuracy metric to its value on the
trainset (keys are lower case). This dict is empty if
return_train_measures is False.
- The fit time in seconds.
- The testing time in seconds.
"""
start_fit = time.time()
algo.fit(trainset)
fit_time = time.time() - start_fit
start_test = time.time()
predictions = algo.test(testset)
test_time = time.time() - start_test
if return_train_measures:
train_predictions = algo.test(trainset.build_testset())
test_measures = dict()
train_measures = dict()
for m in measures:
f = getattr(accuracy, m.lower())
test_measures[m] = f(predictions, verbose=0)
if return_train_measures:
train_measures[m] = f(train_predictions, verbose=0)
return test_measures, train_measures, fit_time, test_time
def print_summary(
algo, measures, test_measures, train_measures, fit_times, test_times, n_splits
):
"""Helper for printing the result of cross_validate."""
print(
"Evaluating {} of algorithm {} on {} split(s).".format(
", ".join(m.upper() for m in measures), algo.__class__.__name__, n_splits
)
)
print()
row_format = "{:<18}" + "{:<8}" * (n_splits + 2)
s = row_format.format(
"", *[f"Fold {i + 1}" for i in range(n_splits)] + ["Mean"] + ["Std"]
)
s += "\n"
s += "\n".join(
row_format.format(
key.upper() + " (testset)",
*[f"{v:1.4f}" for v in vals]
+ [f"{np.mean(vals):1.4f}"]
+ [f"{np.std(vals):1.4f}"],
)
for (key, vals) in test_measures.items()
)
if train_measures:
s += "\n"
s += "\n".join(
row_format.format(
key.upper() + " (trainset)",
*[f"{v:1.4f}" for v in vals]
+ [f"{np.mean(vals):1.4f}"]
+ [f"{np.std(vals):1.4f}"],
)
for (key, vals) in train_measures.items()
)
s += "\n"
s += row_format.format(
"Fit time",
*[f"{t:.2f}" for t in fit_times]
+ [f"{np.mean(fit_times):.2f}"]
+ [f"{np.std(fit_times):.2f}"],
)
s += "\n"
s += row_format.format(
"Test time",
*[f"{t:.2f}" for t in test_times]
+ [f"{np.mean(test_times):.2f}"]
+ [f"{np.std(test_times):.2f}"],
)
print(s)