Source code for equadratures.scalers

"""
Classes to scale data. 

Some of these classes are called internally by other modules, but they can also be used independently as a pre-processing stage.

Scalers can fit to one set of data, and used to transform other data sets with the same number of dimensions.

Examples
--------
Fitting scaler implicitly during transform
    >>> # Define some 1D sample data
    >>> X = np.random.RandomState(0).normal(2,0.5,200)
    >>> (X.mean(),X.std())
    >>> (2.0354552465705806, 0.5107113843479977)
    >>>
    >>> # Scale to zero mean and unit variance
    >>> X = eq.scalers.scaler_meanvar().transform(X)
    >>> (X.mean(),X.std())
    >>> (2.886579864025407e-17, 1.0)

Using the same scaling to transform train and test data
    >>> # Define some 5D example data
    >>> X = np.random.RandomState(0).uniform(-10,10,size=(50,5))
    >>> y = X[:,0]**2 - X[:,4]
    >>> # Split into train/test
    >>> X_train, X_test,y_train,y_test = eq.datasets.train_test_split(X,y,train=0.7,random_seed=0)
    >>> (X_train.min(),X_train.max())
    >>> (-9.906090476149059, 9.767476761184525)
    >>>
    >>> # Define a scaler and fit to training split
    >>> scaler = eq.scalers.scaler_minmax()
    >>> scaler.fit(X_train)
    >>>
    >>> # Transform train and test data with same scaler
    >>> X_train = scaler.transform(X_train)
    >>> X_test = scaler.transform(X_test)
    >>> (X_train.min(),X_train.max())
    >>> (-1.0, 1.0)
    >>>
    >>> # Finally, e.g. of transforming data back again
    >>> X_train = scaler.untransform(X_train)
    >>> (X_train.min(),X_train.max())
    >>> (-9.906090476149059, 9.767476761184525)
"""
import numpy as np

[docs]class scaler_minmax(object):
    """ Scale the data to have a min/max of -1 to 1. """
    def __init__(self):
        self.fitted = False

[docs]    def fit(self,X):
        """ Fit scaler to data.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to fit scaler to.
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        self.Xmin = np.min(X,axis=0)
        self.Xmax = np.max(X,axis=0)
        self.fitted = True

[docs]    def transform(self,X):
        """ Transforms data. Calls :meth:`~equadratures.scalers.scaler_minmax.fit` fit internally if scaler not already fitted.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to transform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing transformed data.
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        if not self.fitted: self.fit(X)
        Xtrans = 2.0 * ( (X[:,:]-self.Xmin)/(self.Xmax - self.Xmin) ) - 1.0
        return Xtrans 

[docs]    def untransform(self,X):
        """ Untransforms data.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to untransform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing untransformed data.

        Raises
        ------
        Exception 
            scaler has not been fitted
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        if not self.fitted:
            raise Exception('scaler has not been fitted')
        Xuntrans = 0.5*(X[:,:]+1)*(self.Xmax - self.Xmin) + self.Xmin
        return Xuntrans

[docs]class scaler_meanvar(object):
    """ 
    Scale the data to have a mean of 0 and variance of 1.
    """
    def __init__(self):
        self.fitted = False

[docs]    def fit(self,X):
        """ Fit scaler to data.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to fit scaler to.
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        self.Xmean = np.mean(X,axis=0)
        self.Xstd  = np.std(X,axis=0)
        self.fitted = True

[docs]    def transform(self,X):
        """ Transforms data. Calls :meth:`~equadratures.scalers.scaler_meanvar.fit` fit internally if scaler not already fitted.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to transform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing transformed data.
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        if not self.fitted: self.fit(X)
        eps = np.finfo(np.float64).tiny
        Xtrans = (X[:,:]-self.Xmean)/(self.Xstd+eps)
        return Xtrans 

[docs]    def untransform(self,X):
        """ Untransforms data.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to untransform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing untransformed data.

        Raises
        ------
        Exception 
            scaler has not been fitted
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        if not self.fitted:
            raise Exception('scaler has not been fitted')
        eps = np.finfo(np.float64).tiny
        Xuntrans = X[:,:]*(self.Xstd+eps) + self.Xmean
        return Xuntrans

[docs]class scaler_custom(object):
    """ Scale the data by the provided offset and divisor.
    
    Parameters
    ----------
    offset : float, numpy.ndarray
        Offset to subtract from data. Either a float, or array with shape (number_of_samples, number_of_dimensions).
    div : float, numpy.ndarray
        Divisor to divide data with. Either a float, or array with shape (number_of_samples, number_of_dimensions).
    """
    def __init__(self, offset, div):
        self.offset = offset
        self.div = div
        self.fitted = True

[docs]    def transform(self,X):
        """ Transforms data. 

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to transform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing transformed data.
        """
        if X.ndim == 1: X = X.reshape(-1,1)
        eps = np.finfo(np.float64).tiny
        Xtrans = (X - self.offset)/(self.div + eps)
        return Xtrans

[docs]    def untransform(self,X):
        """ Untransforms data.

        Parameters
        ----------
        X : numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing data to untransform.

        Returns
        -------
        numpy.ndarray
            Array with shape (number_of_samples, number_of_dimensions) containing untransformed data.
        """

        if X.ndim == 1: X = X.reshape(-1,1)
        eps = np.finfo(np.float64).tiny
        Xuntrans = X * (self.div + eps) + self.offset
        return Xuntrans