MinMaxScaler
ADLStream.data.preprocessing.MinMaxScaler
Transform features by scaling each feature between zero and one.
This estimator scales and translates each feature (column) individually such that it is in the the range (0, 1).
The transformation is given by x_scaled = (x - min_x) / (max_x - min_x)
where min_x is the minimun value seen until now for the feature x and max_x represents the maximun value seen until now for the feauter x.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
share_params |
bool |
Whether to share scaler parameters among columns. Defaults to False. |
False |
Source code in ADLStream/data/preprocessing/min_max_scaler.py
class MinMaxScaler(BasePreprocessor):
"""Transform features by scaling each feature between zero and one.
This estimator scales and translates each feature (column) individually
such that it is in the the range (0, 1).
The transformation is given by
x_scaled = (x - min_x) / (max_x - min_x)
where min_x is the minimun value seen until now for the feature x and
max_x represents the maximun value seen until now for the feauter x.
Arguments:
share_params (bool): Whether to share scaler parameters among columns.
Defaults to False.
"""
def __init__(self, share_params=False):
self.share_params = share_params
self.data_min = None
self.data_max = None
def _minimum(self, a, b):
assert len(a) == len(b)
min_values = [min(a[i], b[i]) for i in range(len(a))]
if self.share_params:
min_values = [min(min_values) for _ in min_values]
return min_values
def _maximum(self, a, b):
assert len(a) == len(b)
max_values = [max(a[i], b[i]) for i in range(len(a))]
if self.share_params:
max_values = [max(max_values) for _ in max_values]
return max_values
def learn_one(self, x):
"""Updates `min` and `max` parameters for each feature
Args:
x (list): input data from stream generator.
Returns:
BasePreprocessor: self updated scaler.
"""
if self.data_min is None:
self.data_min = x
self.data_max = x
self.data_min = self._minimum(x, self.data_min)
self.data_max = self._maximum(x, self.data_max)
return self
def _min_max(self, val, min_val, max_val):
def _safe_div_zero(a, b):
return 0 if b == 0 else a / b
return _safe_div_zero((val - min_val), (max_val - min_val))
def transform_one(self, x):
"""Scales one instance data
Args:
x (list): input data from stream generator.
Returns:
scaled_x (list): minmax scaled data.
"""
assert (
self.data_min is not None
), "Parameters not initialized - learn_one before must be called before transform_one."
scaled_x = [
self._min_max(v, m, M) for v, m, M in zip(x, self.data_min, self.data_max)
]
return scaled_x
learn_one(self, x)
¶
Updates min
and max
parameters for each feature
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x |
list |
input data from stream generator. |
required |
Returns:
Type | Description |
---|---|
BasePreprocessor |
self updated scaler. |
Source code in ADLStream/data/preprocessing/min_max_scaler.py
def learn_one(self, x):
"""Updates `min` and `max` parameters for each feature
Args:
x (list): input data from stream generator.
Returns:
BasePreprocessor: self updated scaler.
"""
if self.data_min is None:
self.data_min = x
self.data_max = x
self.data_min = self._minimum(x, self.data_min)
self.data_max = self._maximum(x, self.data_max)
return self
transform_one(self, x)
¶
Scales one instance data
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x |
list |
input data from stream generator. |
required |
Returns:
Type | Description |
---|---|
scaled_x (list) |
minmax scaled data. |
Source code in ADLStream/data/preprocessing/min_max_scaler.py
def transform_one(self, x):
"""Scales one instance data
Args:
x (list): input data from stream generator.
Returns:
scaled_x (list): minmax scaled data.
"""
assert (
self.data_min is not None
), "Parameters not initialized - learn_one before must be called before transform_one."
scaled_x = [
self._min_max(v, m, M) for v, m, M in zip(x, self.data_min, self.data_max)
]
return scaled_x