MeanNormalizationScaler
ADLStream.data.preprocessing.MeanNormalizationScaler
The transformation is given by x_scaled = (x - avg_x) / (max_x - min_x)
where min_x is the minimun value seen until now for the feature x, max_x represents the maximun value seen until now for the feature x and avg_x is the mean seen until now for the feature x.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
share_params |
bool |
Whether to share scaler parameters among columns. Defaults to False. |
False |
Source code in ADLStream/data/preprocessing/mean_normalization_scaler.py
class MeanNormalizationScaler(BasePreprocessor):
"""The transformation is given by
x_scaled = (x - avg_x) / (max_x - min_x)
where min_x is the minimun value seen until now for the feature x,
max_x represents the maximun value seen until now for the feature x
and avg_x is the mean seen until now for the feature x.
Arguments:
share_params (bool): Whether to share scaler parameters among columns.
Defaults to False.
"""
def __init__(self, share_params=False):
self.share_params = share_params
self.data_min = None
self.data_max = None
self.data_sum = None
self.data_count = 1
self.data_avg = None
def _minimum(self, a, b):
assert len(a) == len(b)
min_values = [min(a[i], b[i]) for i in range(len(a))]
if self.share_params:
min_values = [min(min_values) for _ in min_values]
return min_values
def _maximum(self, a, b):
assert len(a) == len(b)
max_values = [max(a[i], b[i]) for i in range(len(a))]
if self.share_params:
max_values = [max(max_values) for _ in max_values]
return max_values
def _mean(self, a):
if self.share_params == False:
assert len(a) == len(self.data_sum)
self.data_sum = [self.data_sum[i] + a[i] for i in range(len(a))]
mean = [(self.data_sum[i]) / self.data_count for i in range(len(a))]
else:
self.data_sum += sum(a)
mean = [self.data_sum / (self.data_count * len(a))] * len(a)
return mean
def learn_one(self, x):
"""Updates `min` `max` `avg` and `count` parameters for each feature
Args:
x (list): input data from stream generator.
Returns:
BasePreprocessor: self updated scaler.
"""
if self.data_min is None:
self.data_min = x
self.data_max = x
self.data_avg = x
self.data_sum = [0.0] * len(x)
if self.share_params == True:
self.data_sum = 0.0
self.data_min = self._minimum(x, self.data_min)
self.data_max = self._maximum(x, self.data_max)
self.data_avg = self._mean(x)
self.data_count += 1
return self
def _mean_normalization(self, val, min_val, max_val, avg_val):
def _safe_div_zero(a, b):
return 0 if b == 0 else a / b
return _safe_div_zero((val - avg_val), (max_val - min_val))
def transform_one(self, x):
"""Scales one instance data
Args:
x (list): input data from stream generator.
Returns:
scaled_x (list): minmax scaled data.
"""
assert self.data_min is not None
scaled_x = [
self._mean_normalization(v, m, M, a)
for v, m, M, a in zip(x, self.data_min, self.data_max, self.data_avg)
]
return scaled_x
learn_one(self, x)
¶
Updates min
max
avg
and count
parameters for each feature
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x |
list |
input data from stream generator. |
required |
Returns:
Type | Description |
---|---|
BasePreprocessor |
self updated scaler. |
Source code in ADLStream/data/preprocessing/mean_normalization_scaler.py
def learn_one(self, x):
"""Updates `min` `max` `avg` and `count` parameters for each feature
Args:
x (list): input data from stream generator.
Returns:
BasePreprocessor: self updated scaler.
"""
if self.data_min is None:
self.data_min = x
self.data_max = x
self.data_avg = x
self.data_sum = [0.0] * len(x)
if self.share_params == True:
self.data_sum = 0.0
self.data_min = self._minimum(x, self.data_min)
self.data_max = self._maximum(x, self.data_max)
self.data_avg = self._mean(x)
self.data_count += 1
return self
transform_one(self, x)
¶
Scales one instance data
Parameters:
Name | Type | Description | Default |
---|---|---|---|
x |
list |
input data from stream generator. |
required |
Returns:
Type | Description |
---|---|
scaled_x (list) |
minmax scaled data. |
Source code in ADLStream/data/preprocessing/mean_normalization_scaler.py
def transform_one(self, x):
"""Scales one instance data
Args:
x (list): input data from stream generator.
Returns:
scaled_x (list): minmax scaled data.
"""
assert self.data_min is not None
scaled_x = [
self._mean_normalization(v, m, M, a)
for v, m, M, a in zip(x, self.data_min, self.data_max, self.data_avg)
]
return scaled_x