Source code for cyclic_boosting.observers

from __future__ import absolute_import, division, print_function

import copy

import numpy as np

from cyclic_boosting import utils


[docs] class BaseObserver(object): """ Base class for observers Observers are used to extract information from cyclic boosting estimators that might be of further interest, but are not needed by the estimator in order to make predictions. """
[docs] def observe_iterations(self, iteration, X, y, prediction, weights, estimator_state): """ Called after each iteration of the algorithm. Parameters ---------- iteration : int number of the iteration X : :class:`pandas.DataFrame` or :class:`numpy.ndarray` feature matrix y : numpy.ndarray target array prediction : numpy.ndarray current target prediction weights : numpy.ndarray target weights estimator_state : dict state of the estimator. See :meth:`cyclic_boosting.base.CyclicBoostingBase.get_state` for information on what will be passed here """
[docs] def observe_feature_iterations(self, iteration, feature_i, X, y, prediction, weights, estimator_state): """ Called after each feature has been processed as part of a full iteration of the algorithm. Parameters ---------- iteration : int number of the iteration feature_i : int number of the feature that was processed X : :class:`pandas.DataFrame` or :class:`numpy.ndarray` feature matrix y : numpy.ndarray target array prediction : numpy.ndarray current target prediction weights : numpy.ndarray target weights estimator_state : dict state of the estimator. See :meth:`~cyclic_boosting.base.CyclicBoostingBase.get_state` for information on what will be passed here local_variables : dict local variables """
[docs] class PlottingObserver(BaseObserver): """ Observer retrieving all information necessary to obtain analysis plots based on a cyclic boosting training. Instances of this class are intended to be passed as elements of the ``observers`` parameter to a cyclic boosting estimator, where each will gather information on a specific iteration. Afterwards, they can be passed to :func:`~cyclic_boosting.plots.plot_analysis`. Parameters ---------- iteration : int The observer will save all necessary information for the analysis plots based on the state of the internal variables of the estimator after the given iteration has been calculated. Default is `-1`, which signifies the last iteration. """ def __init__(self, iteration=-1): if iteration == 0: raise ValueError("This plotting observer only makes sense with iterations >= 1.") self.iteration = iteration self.features = None self.link_function = None self.n_feature_bins = None self.loss = list() self.factor_change = list() self.histograms = None self._fitted = False
[docs] def observe_iterations(self, iteration, X, y, prediction, weights, estimator_state, delta=None, quantile=None): """Observe iterations in cyclic_boosting estimator to collect information for necessary for plots. This function is called in each major loop and once in the end. Parameters ---------- iteration: int current major iteration of cyclic boosting loop X: pd.DataFrame or numpy.ndarray shape(n, k) feature matrix y: np.ndarray target array prediction: np.ndarray array of current cyclic boosting prediction weights: np.ndarray array of event weights estimator_state: dict state object of cyclic_boosting estimator """ features = estimator_state["features"] if (iteration <= self.iteration and iteration != -1) or self.iteration == -1: self.loss.append(estimator_state["insample_loss"]) if iteration != 0: # for iteration 0 there are no old fators to compare with self.factor_change.append(delta) if iteration == self.iteration: self._fitted = True self.features = copy.deepcopy(features) self.n_feature_bins = {feature.feature_group: feature.n_multi_bins_finite for feature in self.features} self.link_function = estimator_state["link_function"] self.histograms = calc_in_sample_histograms(y, prediction, weights, quantile)
[docs] def observe_feature_iterations(self, iteration, feature_i, X, y, prediction, weights, estimator_state): """Observe iterations in cyclic_boosting estimator to collect information for necessary for plots. This function is called in each feature/minor loop. Parameters ---------- iteration: int current major iteration number of cyclic boosting loop feature_i: int current minor iteration number of cyclic boosting loop X: pd.DataFrame or numpy.ndarray shape(n, k) feature matrix y: np.ndarray target array prediction: np.ndarray array of current cyclic boosting prediction weights: np.ndarray array of event weights estimator_state: dict state object of cyclic_boosting estimator """ pass
[docs] def check_fitted(self): if not self._fitted: raise ValueError("Observer not filled.")
[docs] def calc_in_sample_histograms(y, pred, weights, quantile=None): """ Calculates histograms for use with diagonal plot. Parameters ---------- y : numpy.ndarray truth pred : numpy.ndarray prediction weights: np.ndarray array of event weights Returns ------- result : tuple Tuple consisting of: * means * bin_centers * errors * counts """ nbins = 100 bin_boundaries, bin_centers = utils.calc_linear_bins(pred, nbins) bin_numbers = utils.digitize(pred, bin_boundaries) means, _, counts, errors = utils.calc_means_medians(bin_numbers, y, weights) if quantile is not None: means = utils.calc_weighted_quantile(bin_numbers, y, weights, quantile) errors = None bin_centers = bin_centers[np.where(~np.isnan(means.reindex(np.arange(1, nbins + 1))))] # quantiles do not work for classification mode if np.isin(y, [0, 1]).all(): return means, bin_centers, None, counts else: return means, bin_centers, errors, counts
__all__ = ["PlottingObserver", "BaseObserver", "calc_in_sample_histograms"]