Source code for equadratures.scalers

"""
Classes to scale data. 

Some of these classes are called internally by other modules, but they can also be used independently as a pre-processing stage.

Scalers can fit to one set of data, and used to transform other data sets with the same number of dimensions.

Examples
--------
Fitting scaler implicitly during transform
    >>> # Define some 1D sample data
    >>> X = np.random.RandomState(0).normal(2,0.5,200)
    >>> (X.mean(),X.std())
    >>> (2.0354552465705806, 0.5107113843479977)
    >>>
    >>> # Scale to zero mean and unit variance
    >>> X = eq.scalers.scaler_meanvar().transform(X)
    >>> (X.mean(),X.std())
    >>> (2.886579864025407e-17, 1.0)

Using the same scaling to transform train and test data
    >>> # Define some 5D example data
    >>> X = np.random.RandomState(0).uniform(-10,10,size=(50,5))
    >>> y = X[:,0]**2 - X[:,4]
    >>> # Split into train/test
    >>> X_train, X_test,y_train,y_test = eq.datasets.train_test_split(X,y,train=0.7,random_seed=0)
    >>> (X_train.min(),X_train.max())
    >>> (-9.906090476149059, 9.767476761184525)
    >>>
    >>> # Define a scaler and fit to training split
    >>> scaler = eq.scalers.scaler_minmax()
    >>> scaler.fit(X_train)
    >>>
    >>> # Transform train and test data with same scaler
    >>> X_train = scaler.transform(X_train)
    >>> X_test = scaler.transform(X_test)
    >>> (X_train.min(),X_train.max())
    >>> (-1.0, 1.0)
    >>>
    >>> # Finally, e.g. of transforming data back again
    >>> X_train = scaler.untransform(X_train)
    >>> (X_train.min(),X_train.max())
    >>> (-9.906090476149059, 9.767476761184525)
"""
import numpy as np

[docs]class scaler_minmax(object): """ Scale the data to have a min/max of -1 to 1. """ def __init__(self): self.fitted = False
[docs] def fit(self,X): """ Fit scaler to data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to fit scaler to. """ if X.ndim == 1: X = X.reshape(-1,1) self.Xmin = np.min(X,axis=0) self.Xmax = np.max(X,axis=0) self.fitted = True
[docs] def transform(self,X): """ Transforms data. Calls :meth:`~equadratures.scalers.scaler_minmax.fit` fit internally if scaler not already fitted. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to transform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing transformed data. """ if X.ndim == 1: X = X.reshape(-1,1) if not self.fitted: self.fit(X) Xtrans = 2.0 * ( (X[:,:]-self.Xmin)/(self.Xmax - self.Xmin) ) - 1.0 return Xtrans
[docs] def untransform(self,X): """ Untransforms data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to untransform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing untransformed data. Raises ------ Exception scaler has not been fitted """ if X.ndim == 1: X = X.reshape(-1,1) if not self.fitted: raise Exception('scaler has not been fitted') Xuntrans = 0.5*(X[:,:]+1)*(self.Xmax - self.Xmin) + self.Xmin return Xuntrans
[docs]class scaler_meanvar(object): """ Scale the data to have a mean of 0 and variance of 1. """ def __init__(self): self.fitted = False
[docs] def fit(self,X): """ Fit scaler to data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to fit scaler to. """ if X.ndim == 1: X = X.reshape(-1,1) self.Xmean = np.mean(X,axis=0) self.Xstd = np.std(X,axis=0) self.fitted = True
[docs] def transform(self,X): """ Transforms data. Calls :meth:`~equadratures.scalers.scaler_meanvar.fit` fit internally if scaler not already fitted. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to transform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing transformed data. """ if X.ndim == 1: X = X.reshape(-1,1) if not self.fitted: self.fit(X) eps = np.finfo(np.float64).tiny Xtrans = (X[:,:]-self.Xmean)/(self.Xstd+eps) return Xtrans
[docs] def untransform(self,X): """ Untransforms data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to untransform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing untransformed data. Raises ------ Exception scaler has not been fitted """ if X.ndim == 1: X = X.reshape(-1,1) if not self.fitted: raise Exception('scaler has not been fitted') eps = np.finfo(np.float64).tiny Xuntrans = X[:,:]*(self.Xstd+eps) + self.Xmean return Xuntrans
[docs]class scaler_custom(object): """ Scale the data by the provided offset and divisor. Parameters ---------- offset : float, numpy.ndarray Offset to subtract from data. Either a float, or array with shape (number_of_samples, number_of_dimensions). div : float, numpy.ndarray Divisor to divide data with. Either a float, or array with shape (number_of_samples, number_of_dimensions). """ def __init__(self, offset, div): self.offset = offset self.div = div self.fitted = True
[docs] def transform(self,X): """ Transforms data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to transform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing transformed data. """ if X.ndim == 1: X = X.reshape(-1,1) eps = np.finfo(np.float64).tiny Xtrans = (X - self.offset)/(self.div + eps) return Xtrans
[docs] def untransform(self,X): """ Untransforms data. Parameters ---------- X : numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing data to untransform. Returns ------- numpy.ndarray Array with shape (number_of_samples, number_of_dimensions) containing untransformed data. """ if X.ndim == 1: X = X.reshape(-1,1) eps = np.finfo(np.float64).tiny Xuntrans = X * (self.div + eps) + self.offset return Xuntrans