"""Convenient smoother mappings from feature property values (and tuples) to
typical smoothers used for the Cyclic Boosting regressor"""
from __future__ import absolute_import, division, print_function
import logging
import six
from cyclic_boosting import flags, smoothing
from cyclic_boosting.smoothing.base import AbstractBinSmoother
from cyclic_boosting.smoothing.meta_smoother import (
NormalizationRegressionTypeSmoother,
NormalizationSmoother,
RegressionType,
RegressionTypeSmoother,
)
from typing import Optional
_logger = logging.getLogger(__name__)
def _simplify_flags(feature_property: int, feature_group: Optional[str] = None):
"""
Simplifies a general flag to a basic set to select a smoother later on.
Parameters
----------
feature_property:
A single feature property.
feature_group:
Optional argument for the name of the feature/feature_group
corresponding to the feature_property used for logging.
"""
if flags.is_linear_set(feature_property):
return flags.IS_LINEAR
elif flags.is_seasonal_set(feature_property):
return flags.IS_SEASONAL
elif flags.is_monotonic_set(feature_property):
if flags.increasing_set(feature_property):
return flags.IS_MONOTONIC | flags.INCREASING
elif flags.decreasing_set(feature_property):
return flags.IS_MONOTONIC | flags.DECREASING
else:
return flags.IS_MONOTONIC
elif flags.is_continuous_set(feature_property):
return flags.IS_CONTINUOUS
elif flags.is_ordered_set(feature_property):
return flags.IS_ORDERED
elif flags.is_unordered_set(feature_property):
return flags.IS_UNORDERED
else:
features = ""
if feature_group is not None:
features = "for feature {}".format(feature_group)
if flags.has_missing_set(feature_property):
_logger.info("No feature property set. Thus it is set to default IS_UNORDERED!")
else:
_logger.warning(
"Feature property {0} is not known {1}."
" Thus it is converted to IS_UNORDERED!".format(flags.flags_to_string(feature_property), features)
)
return flags.IS_UNORDERED
def _default_smoother_types(neutral_factor_link=0, use_normalization=True):
smoother_types = {
flags.IS_UNORDERED: smoothing.onedim.WeightedMeanSmoother(prior_prediction=neutral_factor_link),
flags.IS_ORDERED: smoothing.onedim.WeightedMeanSmootherNeighbors(),
flags.IS_CONTINUOUS: smoothing.onedim.OrthogonalPolynomialSmoother(),
flags.IS_LINEAR: smoothing.extrapolate.LinearExtrapolator(),
flags.IS_SEASONAL:
# the seasonal smoother does not work with offset_tozero
# when the normalization is done within the smoother
smoothing.onedim.SeasonalSmoother(offset_tozero=not use_normalization),
flags.IS_MONOTONIC: smoothing.onedim.IsotonicRegressor(increasing="auto"),
flags.IS_MONOTONIC | flags.INCREASING: smoothing.onedim.IsotonicRegressor(increasing=True),
flags.IS_MONOTONIC | flags.DECREASING: smoothing.onedim.IsotonicRegressor(increasing=False),
}
return smoother_types
def determine_reg_type(feature_group, feature_property, feature_type):
"""Function to determine the RegressionType of a feature.
Parameters
----------
feature_group: tuple
Name of the feature_group
feature_property: tuple
Tuple of feature properties.
feature_type:
Type of the feature.
"""
if not isinstance(feature_property, tuple):
if flags.is_linear_set(feature_property):
return RegressionType.extrapolating
elif flags.is_seasonal_set(feature_property) or flags.is_continuous_set(feature_property):
return RegressionType.interpolating
else:
return RegressionType.discontinuous
else:
reg_types = [determine_reg_type(fg, fp, feature_type) for fg, fp in zip(feature_group, feature_property)]
if RegressionType.discontinuous in reg_types:
return RegressionType.discontinuous
elif RegressionType.interpolating in reg_types:
return RegressionType.interpolating
else:
return RegressionType.extrapolating
def determine_meta_smoother(smoother, use_normalization, reg_type=None):
"""Wrapper function that chooses the correct meta_estimators
for the smoothers in the SmootherChoice class.
Parameters
----------
smoother: :class:`AbstractBinSmoother`
smoother that should be wrapped.
use_normalization: bool
Flag to decide if normalization should be used for the smoothers.
reg_type: :class:`RegressionType`
If a ``RegressionType`` is set the RegressionTypeSmoother is used.
"""
if not use_normalization and reg_type is None:
return smoother
if use_normalization and reg_type is None:
return NormalizationSmoother(smoother)
if not use_normalization and reg_type is not None:
return RegressionTypeSmoother(smoother, reg_type)
if use_normalization and reg_type is not None:
return NormalizationRegressionTypeSmoother(smoother, reg_type)
[docs]
class SmootherChoice(object):
r"""Base class for selecting smoothers for cyclic boosting.
Maps feature property tuples to smoothers in 1D/2D/3D.
Parameters
----------
use_regression_type: bool
Flag to decide if ``RegressionType`` regularization should be used
for the smoothers.
(default = True)
use_normalization: bool
Flag to decide if normalization should be used for the smoothers.
(default = True)
explicit_smoothers: dict
A dictionary with custom 1-d smoothers that override the default
one-dimensional smoothers chosen by the feautre property.
Needs to be of the format {feature_group : smoother},
where feature_group is a tuple of strings and smoother is
and instance of AbstractBinSmoother.
(default = None)
"""
neutral_factor_link = 0
def __init__(self, use_regression_type=True, use_normalization=True, explicit_smoothers=None):
self.use_regression_type = use_regression_type
self.use_normalization = use_normalization
self.explicit_smoothers = self._validate_explicit_smoothers(explicit_smoothers)
self.onedim_smoothers = _default_smoother_types(self.neutral_factor_link, use_normalization)
@staticmethod
def _validate_explicit_smoothers(explicit_smoothers):
if explicit_smoothers is None:
return {}
def is_tuple_of_strings(x):
return isinstance(x, tuple) and all(isinstance(s, six.string_types) for s in x)
if not all(is_tuple_of_strings(feature_group) for feature_group in explicit_smoothers.keys()):
raise ValueError(
"All explicit smoothers passed to the SmootherChoice"
" need to have a tuple of strings as a feature group key."
)
if not all(isinstance(sm, AbstractBinSmoother) for sm in explicit_smoothers.values()):
raise ValueError(
"All explicit smoothers passed to the SmootherChoice" " need to be instances of AbstractBinSmoother."
)
return explicit_smoothers
[docs]
def choice_fct(self, feature_group, feature_property, feature_type=None):
"""
Returns the smoother specified by the `get_raw_smoother` method
If an explicit smoother is defined for the feature group,
the explicit smoother is used instead.
The result is wrapped with a meta_smoother using the `wrap_smoother` method.
Parameters
----------
feature_group: tuple
Name of the feature_group
feature_property: tuple
Tuple of feature properties.
feature_type:
Type of the feature.
"""
explicit_smoother = self.explicit_smoothers.get(feature_group)
if explicit_smoother is not None:
smoother = explicit_smoother
else:
smoother = self.get_raw_smoother(feature_group, feature_property, feature_type)
return self.wrap_smoother(smoother, feature_group, feature_property, feature_type)
[docs]
def get_onedim_smoother(self, feature_property, feature_name=None):
"""
Returns the standard one-dimensional smoother to be used for a
specific feature.
If an explicit 1D-smoother for the feature is defined, it is returned,
otherwise the default smoother for the feature property is chosen.
Parameters
----------
feature_property: int
Feature property defined as flag.
feature_name: str
Name of the feature
"""
feature_group = (feature_name,)
explicit_smoother = self.explicit_smoothers.get(feature_group)
if explicit_smoother is not None:
return explicit_smoother
else:
return self.onedim_smoothers[_simplify_flags(feature_property)]
[docs]
def get_raw_smoother(self, feature_group, feature_property, feature_type=None):
"""Method returning the raw smoother for the `feature_group`,
`feature_property` and `feature_type` specified.
This is smoother is not yet wrapped with a `meta_smoother` from the
`wrap_smoother` method.
Parameters
----------
feature_group: tuple
Name of the feature_group
feature_property: tuple
Tuple of feature properties.
feature_type:
Type of the feature.
"""
raise NotImplementedError("Please implement this method.")
[docs]
def wrap_smoother(self, smoother, feature_group, feature_property, feature_type):
"""Wrapper method that chooses the correct meta_estimators
for the smoothers in the SmootherChoice class.
Parameters
----------
smoother: :class:`AbstractBinSmoother`
smoother that should be wrapped.
feature_group: tuple
Name of the feature_group
feature_property: tuple
Tuple of feature properties.
feature_type:
Type of the feature.
"""
reg_type = None
if self.use_regression_type:
reg_type = determine_reg_type(feature_group, feature_property, feature_type)
return determine_meta_smoother(smoother, self.use_normalization, reg_type)
[docs]
class SmootherChoiceWeightedMean(SmootherChoice):
r"""Weighted mean smoothing for multi-dimensional feature groups.
This defines a set of common smoothers where
choose_smoothers_for_factor_model selects from.
"""
[docs]
def get_raw_smoother(self, feature_group, feature_prop, feature_type=None):
if len(feature_group) > 1:
smoother = smoothing.multidim.WeightedMeanSmoother(prior_prediction=self.neutral_factor_link)
else:
smoother = self.get_onedim_smoother(feature_prop[0], feature_group[0])
return smoother
[docs]
class SmootherChoiceGroupBy(SmootherChoice):
"""
Groupby smoothing for multi-dimensional feature groups.
"""
[docs]
def wrap_smoother(self, smoother, feature_group, feature_property, feature_type=None):
# only the properties of the innermost feature should
# determine the regression type of the groupby smoother:
if not isinstance(smoother, smoothing.multidim.GroupBySmootherCB):
return super(self.__class__, self).wrap_smoother(
smoother, feature_group, feature_property[-1], feature_type
)
else:
return smoother
[docs]
def get_raw_smoother(self, feature_group, feature_prop, feature_type=None):
innermost_smoother = self.get_onedim_smoother(feature_prop[-1], feature_group[-1])
if len(feature_group) > 1:
return smoothing.multidim.GroupBySmootherCB(
self.wrap_smoother(innermost_smoother, feature_group, feature_prop),
n_dim=len(feature_group),
)
else:
return innermost_smoother
class NoSmootherChoice(SmootherChoice):
"""This SmootherChoice class only use BinValueSmoother for all features
that do no smoothing. It is thought for experimental use only.
"""
def choice_fct(self, feature_group, feature_prop, feature_type=None):
if len(feature_group) == 1:
smoother = smoothing.onedim.BinValuesSmoother()
else:
smoother = smoothing.multidim.BinValuesSmoother()
return self.wrap_smoother(smoother, feature_group, feature_prop, feature_type)
__all__ = [
"SmootherChoice",
"SmootherChoiceWeightedMean",
"SmootherChoiceGroupBy",
]