Class SmoothedEncoding
Methods
fit(self, dataframe, y=None, columns_to_encode=None, operations=None, weight_of_overall=0.3, payload=None)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframe |
DataFrame |
dataframe containing column values |
required |
y |
Union[NoneType, pandas.core.series.Series, pandas.core.frame.DataFrame] |
target column to use for encoding value creation, None |
None |
columns_to_encode |
Union[NoneType, int, str, List[Union[str, int]]] |
Column names to encode, by default None |
None |
operations |
Optional[List[Callable]] |
encoding operation to perform, by default [np.mean] |
None |
weight_of_overall |
Union[float, int] |
Categorical weight of importance, by default 0.3 |
0.3 |
payload |
dict |
Alternate method to calculate values at a single go. The payload can be sent in the form of a dict as {'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None |
None |
Source code in nitrofe\encoding\encoding_features.py
def fit(
self,
dataframe: pd.DataFrame,
y: Union[None, pd.Series, pd.DataFrame] = None,
columns_to_encode: Union[None, int, str, List[Union[str, int]]] = None,
operations: Union[None, List[Callable]] = None,
weight_of_overall: Union[float, int] = 0.3,
payload: dict = None,
):
"""
Parameters
----------
dataframe : pd.DataFrame
dataframe containing column values
y : Union[None, pd.Series, pd.DataFrame], optional
target column to use for encoding value creation, None
columns_to_encode : Union[None, int, str, List[Union[str, int]]], optional
Column names to encode, by default None
operations : Union[None, List[Callable]], optional
encoding operation to perform, by default [np.mean]
weight_of_overall : Union[float, int], optional
Categorical weight of importance, by default 0.3
payload : dict, optional
Alternate method to calculate values at a single go.
The payload can be sent in the form of a dict as
{'name of column to encode':{'name of column to encode over':['operation function one','operation function two']}}, by default None
"""
self.encoding_dict = {}
self.payload = payload
operations = [np.mean] if operations==None else operations
self.weight_of_overall = weight_of_overall
if y is None:
y = pd.DataFrame()
self._handle_concatenated_dataframe_column_names(y, dataframe)
if self.payload is None:
self._check_fit_columns_to_encode(columns_to_encode, dataframe)
self._check_operations(operations)
self.payload = {
_col: {
target_items: self.operations
for target_items in self.target_columns
}
for _col in self.columns_to_encode
}
self._check_weight_of_overall()
for _col in self.payload.keys():
_col_frame = (
self.concatenated_dataframe.groupby([_col])
.agg(self.payload[_col])
.multiply(
self.concatenated_dataframe.groupby([_col])[_col].count(), axis=0
)
+ self.weight_of_overall
* np.concatenate(
[
self.concatenated_dataframe.agg(
{x: self.payload[_col][x]}
).values.flatten()
for x in payload[_col].keys()
]
).ravel()
).div(
self.weight_of_overall
+ self.concatenated_dataframe.groupby([_col])[_col].count(),
axis=0,
)
_col_frame.columns = [
_col + "_groupby_" + "target_smoothed_" + lvlzero + "_"
for lvlzero in _col_frame.columns.get_level_values(0)
] + _col_frame.columns.get_level_values(1)
self.encoding_dict[_col] = _col_frame
return self.encoding_dict