Skip to content

Class CategoricalEncoding

Methods

fit(self, dataframe, y=None, columns_to_encode=None, operations=None, payload=None)

Parameters:

Name Type Description Default
dataframe DataFrame

dataframe containing column values

required
y Union[NoneType, pandas.core.series.Series, pandas.core.frame.DataFrame]

target column to use for encoding value creation, None

None
columns_to_encode Union[NoneType, int, str, List[Union[str, int]]]

Column names to encode, by default None

None
operations Optional[List[Callable]]

encoding operation to perform, by default np.mean

None
payload dict

Alternate method to calculate values at a single go. The payload can be sent in the form of a dict as {'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None

None
Source code in nitrofe\encoding\encoding_features.py
def fit(
    self,
    dataframe: pd.DataFrame,
    y: Union[None, pd.Series, pd.DataFrame] = None,
    columns_to_encode: Union[None, int, str, List[Union[str, int]]] = None,
    operations: Union[None, List[Callable]] = None,
    payload: dict = None,
):
    """

    Parameters
    ----------
    dataframe : pd.DataFrame
         dataframe containing column values
    y : Union[None, pd.Series, pd.DataFrame], optional
        target column to use for encoding value creation, None
    columns_to_encode : Union[None, int, str, List[Union[str, int]]], optional
        Column names to encode, by default None
    operations : Union[None, List[Callable]], optional
        encoding operation to perform, by default np.mean
    payload : dict, optional
        Alternate method to calculate values at a single go.
        The payload can be sent in the form of a dict as
        {'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None

    """

    self.encoding_dict = {}
    self.payload = payload
    operations = [np.mean] if operations==None else operations
    if y is None:
        y = pd.DataFrame()

    self._handle_concatenated_dataframe_column_names(y, dataframe)

    if self.payload is None:
        self._check_fit_columns_to_encode(columns_to_encode, dataframe)
        self._check_operations(operations)

        self.payload = {
            _col: {
                target_items: self.operations
                for target_items in self.target_columns
            }
            for _col in self.columns_to_encode
        }

    for _col in self.payload.keys():

        _col_frame = self.concatenated_dataframe.groupby([_col]).agg(
            self.payload[_col]
        )
        _col_frame.columns = [
            _col + "_groupby_" + lvlzero + "_"
            for lvlzero in _col_frame.columns.get_level_values(0)
        ] + _col_frame.columns.get_level_values(1)

        self.encoding_dict[_col] = _col_frame

    return self.encoding_dict