Source code for equadratures.logistic_poly

from equadratures import *
import numpy as np
from scipy.optimize import minimize
try:
    import pymanopt
    manopt = True
except ImportError as e:
    manopt = False
if manopt:
    from pymanopt.manifolds import Stiefel
    from pymanopt import Problem
    from pymanopt.solvers import ConjugateGradient

[docs]class LogisticPoly(object): """ Class for defining a logistic subspace polynomial, used for classification tasks. Parameters ---------- n : optional, int Dimension of subspace (should be smaller than ambient input dimension d). Defaults to 2. M_init : optional, numpy array of dimensions (d, n) Initial guess for subspace matrix. Defaults to a random projection. tol : optional, float Optimisation terminates when cost function on training data falls below ``tol``. Defaults to 1e-7. cauchy_tol : optional, float Optimisation terminates when the difference between the average of the last ``cauchy_length`` cost function evaluations and the current cost is below ``cauchy_tol`` times the current evaluation. Defaults to 1e-5. cauchy_length : optional, int Length of comparison history for Cauchy convergence. Defaults to 3. verbosity : optional, one of (0, 1, 2) Print debug messages during optimisation. 0 for no messages, 1 for printing final residual every restart, 2 for printing residuals at every iteration. Defaults to 0. order : optional, int Maximum order of subspace polynomial used. Defaults to 2. C : optional, float L2 penalty on coefficients. Defaults to 1.0. max_M_iters : optional, int Maximum optimisation iterations per restart. Defaults to 10. restarts : optional, int Number of times to restart optimisation. The result with lowest training error is taken at the end. Defaults to 1. Examples -------- Fitting and testing a logistic polynomial on a dataset. >>> log_quad = eq.LogisticPoly(n=1, cauchy_tol=1e-5, verbosity=0, order=p_order, max_M_iters=100, C=0.001) >>> log_quad.fit(X_train, y_train) >>> prediction = log_quad.predict(X_test) >>> error_rate = np.sum(np.abs(np.round(prediction) - y_test)) / y_test.shape[0] >>> print(error_rate) """ def __init__(self, n=2, M_init=None, tol=1e-7, cauchy_tol=1e-5, cauchy_length=3, verbosity=2, order=2, C=1.0, max_M_iters=10, restarts=1): if not manopt: raise ModuleNotFoundError('pymanopt is required for logistic_poly module.') self.n = n self.tol = tol self.cauchy_tol = cauchy_tol self.verbosity = verbosity self.cauchy_length = cauchy_length self.C = C self.max_M_iters = max_M_iters self.restarts = restarts self.order = order self.M_init = M_init self.fitted = False @staticmethod def _sigmoid(U): return 1.0 / (1.0 + np.exp(-U)) def _p(self, X, M, c): self.dummy_poly.coefficients = c return self.dummy_poly.get_polyfit(X @ M).reshape(-1) def _phi(self, X, M, c): pW = self._p(X, M, c) return self._sigmoid(pW) def _cost(self, f, X, M, c): this_phi = self._phi(X, M, c) return -np.sum(f * np.log(this_phi + 1e-15) + (1.0 - f) * np.log(1 - this_phi + 1e-15)) \ + 0.5 * self.C * np.linalg.norm(c)**2 def _dcostdc(self, f, X, M, c): W = X @ M self.dummy_poly.coefficients = c V = self.dummy_poly.get_poly(W) # U = self.dummy_poly.get_polyfit(W).reshape(-1) diff = f - self._phi(X, M, c) return -np.dot(V, diff) + self.C * c def _dcostdM(self, f, X, M, c): self.dummy_poly.coefficients = c W = X @ M # U = self.dummy_poly.get_polyfit(W).reshape(-1) J = np.array(self.dummy_poly.get_poly_grad(W)) if len(J.shape) == 2: J = J[np.newaxis,:,:] diff = f - self._phi(X, M, c) Jt = J.transpose((2,0,1)) XJ = X[:, :, np.newaxis] * np.dot(Jt[:, np.newaxis, :, :], c) result = -np.dot(XJ.transpose((1,2,0)), diff) return result
[docs] def fit(self, X_train, f_train): """ Method to fit logistic polynomial. Parameters ---------- X_train : numpy array, shape (N, d) Training input points. f_train : numpy array, shape (N) Training output targets. """ f = f_train X = X_train tol = self.tol d = X_train.shape[1] n = self.n current_best_residual = np.inf my_params = [Parameter(order=self.order, distribution='uniform', lower=-np.sqrt(d), upper=np.sqrt(d)) for _ in range(n)] my_basis = Basis('total-order') self.dummy_poly = Poly(parameters=my_params, basis=my_basis, method='least-squares') for r in range(self.restarts): if self.M_init is None: M0 = np.linalg.qr(np.random.randn(d, self.n))[0] else: M0 = self.M_init.copy() my_poly_init = Poly(parameters=my_params, basis=my_basis, method='least-squares', sampling_args={'mesh': 'user-defined', 'sample-points': X @ M0, 'sample-outputs': f}) my_poly_init.set_model() c0 = my_poly_init.coefficients.copy() residual = self._cost(f, X, M0, c0) cauchy_length = self.cauchy_length residual_history = [] iter_ind = 0 M = M0.copy() c = c0.copy() while residual > tol: if self.verbosity == 2: print('residual = %f' % residual) residual_history.append(residual) # Minimize over M func_M = lambda M_var: self._cost(f, X, M_var, c) grad_M = lambda M_var: self._dcostdM(f, X, M_var, c) manifold = Stiefel(d, n) solver = ConjugateGradient(maxiter=self.max_M_iters) problem = Problem(manifold=manifold, cost=func_M, egrad=grad_M, verbosity=0) M = solver.solve(problem, x=M) # Minimize over c func_c = lambda c_var: self._cost(f, X, M, c_var) grad_c = lambda c_var: self._dcostdc(f, X, M, c_var) res = minimize(func_c, x0=c, method='CG', jac=grad_c) c = res.x residual = self._cost(f, X, M, c) if iter_ind < cauchy_length: iter_ind += 1 elif np.abs(np.mean(residual_history[-cauchy_length:]) - residual)/residual < self.cauchy_tol: break if self.verbosity > 0: print('Final residual on training data: %f' % self._cost(f, X, M, c)) if residual < current_best_residual: self.M = M self.c = c current_best_residual = residual self.fitted = True
[docs] def predict(self, X): """ Method to predict from input test points. Parameters ---------- X : numpy array, shape (N, d) Test input points. Returns ---------- numpy array, shape (N) Predictions at specified test points. """ if not self.fitted: raise ValueError('Call fit() to fit logistic polynomial first.') return self._phi(X, self.M, self.c)