Class CategoricalEncoding
Methods
fit(self, dataframe, y=None, columns_to_encode=None, operations=None, payload=None)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframe |
DataFrame |
dataframe containing column values |
required |
y |
Union[NoneType, pandas.core.series.Series, pandas.core.frame.DataFrame] |
target column to use for encoding value creation, None |
None |
columns_to_encode |
Union[NoneType, int, str, List[Union[str, int]]] |
Column names to encode, by default None |
None |
operations |
Optional[List[Callable]] |
encoding operation to perform, by default np.mean |
None |
payload |
dict |
Alternate method to calculate values at a single go. The payload can be sent in the form of a dict as {'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None |
None |
Source code in nitrofe\encoding\encoding_features.py
def fit(
self,
dataframe: pd.DataFrame,
y: Union[None, pd.Series, pd.DataFrame] = None,
columns_to_encode: Union[None, int, str, List[Union[str, int]]] = None,
operations: Union[None, List[Callable]] = None,
payload: dict = None,
):
"""
Parameters
----------
dataframe : pd.DataFrame
dataframe containing column values
y : Union[None, pd.Series, pd.DataFrame], optional
target column to use for encoding value creation, None
columns_to_encode : Union[None, int, str, List[Union[str, int]]], optional
Column names to encode, by default None
operations : Union[None, List[Callable]], optional
encoding operation to perform, by default np.mean
payload : dict, optional
Alternate method to calculate values at a single go.
The payload can be sent in the form of a dict as
{'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None
"""
self.encoding_dict = {}
self.payload = payload
operations = [np.mean] if operations==None else operations
if y is None:
y = pd.DataFrame()
self._handle_concatenated_dataframe_column_names(y, dataframe)
if self.payload is None:
self._check_fit_columns_to_encode(columns_to_encode, dataframe)
self._check_operations(operations)
self.payload = {
_col: {
target_items: self.operations
for target_items in self.target_columns
}
for _col in self.columns_to_encode
}
for _col in self.payload.keys():
_col_frame = self.concatenated_dataframe.groupby([_col]).agg(
self.payload[_col]
)
_col_frame.columns = [
_col + "_groupby_" + lvlzero + "_"
for lvlzero in _col_frame.columns.get_level_values(0)
] + _col_frame.columns.get_level_values(1)
self.encoding_dict[_col] = _col_frame
return self.encoding_dict