Source code for cyclic_boosting.binning._utils
from __future__ import absolute_import, division, print_function
import logging
import numpy as np
import pandas as pd
from cyclic_boosting import flags
_logger = logging.getLogger(__name__)
def _read_feature_property(col, feature_properties=None):
"""
Get the feature property for a specific column
Parameters
----------
col: int or str
column index specifier, integer for :class:`numpy.ndarray` or
:class:`str` for :class:`pandas.DataFrame`
feature_properties: dict
Dictionary listing the names of all features as keys and their
preprocessing flags as values. When using a numpy feature matrix X with
no column names the keys of the feature properties are the column
indices.
Returns
-------
int
feature property
"""
if feature_properties is None:
return flags.IS_CONTINUOUS
else:
try:
fprop = feature_properties[col]
flags.check_flags_consistency(fprop)
except KeyError:
_logger.warning("Column '%s' not found in " "feature_properties dict." % col)
fprop = None
return fprop
[docs]
def minimal_difference(values):
"""Minimal difference of consecutive array values
excluding zero differences.
:param values: Array values
:type values: :class:`numpy.ndarray` with dim=1.
"""
bin_widths = values[1:] - values[:-1]
bin_widths = bin_widths[bin_widths > 0]
if len(bin_widths) > 0:
return np.min(bin_widths)
else:
return 1
[docs]
def get_column_index(X, column_name_or_index):
"""Integer column index of pandas.Dataframe or numpy.ndarray.
:param X: input matrix
:type X: numpy.ndarray(dim=2) or pandas.DataFrame
:param column_name_or_index: column name or index
:type column_name_or_index: string or int
:rtype: int
"""
if isinstance(X, pd.DataFrame):
return list(X.columns).index(column_name_or_index)
else:
return column_name_or_index
[docs]
def get_bin_bounds(binners, feat_group):
"""
Gets the bin boundaries for each feature group.
Parameters
----------
binners: list
List of binners.
feat_group: str or tuple of str
A feature property for which the bin boundaries should be extracted
from the binners.
"""
if binners is None:
return None
bin_bounds = {}
for binner in binners:
bin_bounds.update(binner.get_feature_bin_boundaries())
if feat_group in bin_bounds and bin_bounds[feat_group] is not None:
return bin_bounds[feat_group][:, 0]
else:
return None
def check_frame_empty(X):
"""Check if a :class:`pd.DataFrame` or a :class:`numpy.ndarray`
is empty.
:param X: input matrix
:type X: :class:`pd.DataFrame` or a :class:`numpy.ndarray`
"""
if isinstance(X, pd.DataFrame):
return X.empty
else:
return X.size == 0