"""Perform unconstrained or constrained optimisation."""
from equadratures.basis import Basis
from equadratures.poly import Poly
from equadratures.subspaces import Subspaces
from equadratures.parameter import Parameter
from scipy import optimize
import numpy as np
from scipy.special import factorial
from scipy.stats import linregress
import warnings
warnings.filterwarnings('ignore')
[docs]class Optimisation:
""" This class performs unconstrained or constrained optimisation of poly objects or custom functions using scipy.optimize.minimize or an in-house trust-region method.
Parameters
----------
method : str
A string specifying the method that will be used for optimisation. Any of the methods available from :obj:`scipy.optimize.minimize` can be chosen. In the case of general constrained optimisation, the options are ``COBYLA``, ``SLSQP``, and ``trust-constr``. The in-house options ``trust-region`` and ``omorf`` are also available.
"""
def __init__(self, method):
self.method = method
self.objective = {'function': None, 'gradient': None, 'hessian': None}
self.maximise = False
self.bounds = None
self.constraints = []
self.num_evals = 0
# np.random.seed(42)
if self.method in ['trust-region', 'omorf']:
self.num_evals = 0
self.S = np.array([])
self.f = np.array([])
self.g = np.array([])
[docs] def add_objective(self, poly=None, custom=None, maximise=False):
""" Adds objective function to be optimised.
Parameters
----------
poly : Poly
A Poly object.
custom : dict, optional
Dictionary containing optional arguments:
- **function** (Callable): The objective function to be called.
- **jac_function** (Callable, *optional*): The gradient (or derivative) of the objective.
- **hess_function** (Callable, *optional*): The Hessian of the objective function.
maximise : bool, optional
A flag to specify if the user would like to maximise the function instead of minimising it.
"""
assert poly is not None or custom is not None
if self.method == 'trust-region':
assert poly is None
assert custom is not None
self.maximise = maximise
k = 1.0
if self.maximise:
k = -1.0
if poly is not None:
f = poly.get_polyfit_function()
jac = poly.get_polyfit_grad_function()
hess = poly.get_polyfit_hess_function()
objective = lambda x: k*np.asscalar(f(x))
objective_deriv = lambda x: k*jac(x)[:,0]
objective_hess = lambda x: k*hess(x)[:,:,0]
elif custom is not None:
assert 'function' in custom
objective = lambda s: k*custom['function'](s)
if 'jac_function' in custom:
objective_deriv = lambda s: k*custom['jac_function'](s)
else:
objective_deriv = '2-point'
if 'hess_function' in custom:
objective_hess = lambda s: k*custom['hess_function'](s)
else:
objective_hess = optimize.BFGS()
self.objective = {'function': objective, 'gradient': objective_deriv, 'hessian': objective_hess}
[docs] def add_bounds(self, lb, ub):
""" Adds bounds :math:`lb <= x <=ub` to the optimisation problem. Only ``L-BFGS-B``, ``TNC``, ``SLSQP``, ``trust-constr``, ``trust-region``, and ``COBYLA`` methods can handle bounds.
Parameters
----------
lb : numpy.ndarray
1-by-n matrix that contains lower bounds of x.
ub : numpy.ndarray
1-by-n matrix that contains upper bounds of x.
"""
assert lb.size == ub.size
assert self.method in ['L-BFGS-B', 'TNC', 'SLSQP', 'trust-constr', 'COBYLA', 'trust-region', 'omorf']
if self.method in ['trust-region', 'omorf']:
self.bounds = [lb, ub]
elif self.method != 'COBYLA':
self.bounds = []
for i in range(lb.size):
self.bounds.append((lb[i], ub[i]))
self.bounds = tuple(self.bounds)
else:
for factor in range(lb.size):
if not np.isinf(lb[factor]):
l = {'type': 'ineq',
'fun': lambda x, i=factor: x[i] - lb[i]}
self.constraints.append(l)
if not np.isinf(ub[factor]):
u = {'type': 'ineq',
'fun': lambda x, i=factor: ub[i] - x[i]}
self.constraints.append(u)
[docs] def add_linear_ineq_con(self, A, b_l, b_u):
""" Adds linear inequality constraints :math:`b_l <= A x <= b_u` to the optimisation problem.
Only ``trust-constr``, ``COBYLA``, and ``SLSQP`` methods can handle general constraints.
Parameters
----------
A : numpy.ndarray
An (M,n) matrix that contains coefficients of the linear inequality constraints.
b_l : numpy.ndarray
An (M,1) matrix that specifies lower bounds of the linear inequality constraints. If there is no lower bound, set ``b_l = -np.inf * np.ones(M)``.
b_u : numpy.ndarray
An (M,1) matrix that specifies upper bounds of the linear inequality constraints. If there is no upper bound, set ``b_u = np.inf * np.ones(M)``.
"""
# trust-constr method has its own linear constraint handler
assert self.method in ['SLSQP', 'trust-constr', 'COBYLA']
if self.method == 'trust-constr':
self.constraints.append(optimize.LinearConstraint(A,b_l,b_u))
# other methods add inequality constraints using dictionary files
else:
if not np.any(np.isinf(b_l)):
self.constraints.append({'type':'ineq', 'fun': lambda x: np.dot(A,x) - b_l, 'jac': lambda x: A})
if not np.any(np.isinf(b_u)):
self.constraints.append({'type':'ineq', 'fun': lambda x: -np.dot(A,x) + b_u, 'jac': lambda x: -A})
[docs] def add_nonlinear_ineq_con(self, poly=None, custom=None):
""" Adds nonlinear inequality constraints :math:`lb <= g(x) <= ub` (for poly option) with :math:`lb`, :math:`ub = bounds` or :math:`g(x) >= 0` (for function option) to the optimisation problem.
Only ``trust-constr``, ``COBYLA``, and ``SLSQP`` methods can handle general constraints.
If Poly object is provided in the poly dictionary, gradients and Hessians will be computed automatically. If a lambda function is provided via ``function`` dictionary, the user may also provide ``jac_function`` for gradients and ``hess_function`` for Hessians; otherwise, a 2-point differentiation rule
will be used to approximate the derivative and a BFGS update will be used to approximate the Hessian.
Parameters
----------
poly : dict, optional
Dictionary containing a Poly and bounds for constraints:
- **poly** (Poly): An instance of the Poly class.
- **bounds** (numpy.ndarray): An array with two entries specifying the lower and upper bounds of the inequality. If there is no lower bound, set ``bounds[0] = -np.inf``. If there is no upper bound, set ``bounds[1] = np.inf``.
custom : dict, optional
Dictionary containing additional custom callable arguments:
- **function** (Callable): The constraint function to be called.
- **jac_function** (Callable, *optional*): The gradient (or derivative) of the constraint.
- **hess_function** (Callable, *optional*): The Hessian of the constraint function.
"""
assert self.method in ['SLSQP', 'trust-constr', 'COBYLA']
assert poly is not None or custom is not None
if poly is not None:
assert 'bounds' in poly
bounds = poly['bounds']
assert 'poly' in poly
gpoly = poly['poly']
# Get lambda functions for function, gradient, and Hessians from poly object
g = gpoly.get_polyfit_function()
jac = gpoly.get_polyfit_grad_function()
hess = gpoly.get_polyfit_hess_function()
constraint = lambda x: g(x)[0]
constraint_deriv = lambda x: jac(x)[:,0]
constraint_hess = lambda x, v: hess(x)[:,:,0]
if self.method == 'trust-constr':
self.constraints.append(optimize.NonlinearConstraint(constraint, bounds[0], bounds[1], \
jac = constraint_deriv, hess = constraint_hess))
# other methods add inequality constraints using dictionary files
elif self.method == 'SLSQP':
if not np.isinf(bounds[0]):
self.constraints.append({'type':'ineq', 'fun': lambda x: constraint(x) - bounds[0], \
'jac': constraint_deriv})
if not np.isinf(bounds[1]):
self.constraints.append({'type':'ineq', 'fun': lambda x: -constraint(x) + bounds[1], \
'jac': lambda x: -constraint_deriv(x)})
else:
if not np.isinf(bounds[0]):
self.constraints.append({'type':'ineq', 'fun': lambda x: constraint(x) - bounds[0]})
if not np.isinf(bounds[1]):
self.constraints.append({'type':'ineq', 'fun': lambda x: -constraint(x) + bounds[1]})
elif custom is not None:
assert 'function' in custom
constraint = custom['function']
if 'jac_function' in custom:
constraint_deriv = custom['jac_function']
else:
constraint_deriv = '2-point'
if 'hess_function' in custom:
constraint_hess = lambda x, v: custom['hess_function'](x)
else:
constraint_hess = optimize.BFGS()
if self.method == 'trust-constr':
self.constraints.append(optimize.NonlinearConstraint(constraint, 0.0, np.inf, jac = constraint_deriv, \
hess = constraint_hess))
elif self.method == 'SLSQP':
if 'jac_function' in custom:
self.constraints.append({'type': 'ineq', 'fun': constraint, 'jac': constraint_deriv})
else:
self.constraints.append({'type': 'ineq', 'fun': constraint})
else:
self.constraints.append({'type': 'ineq', 'fun': constraint})
[docs] def add_linear_eq_con(self, A, b):
""" Adds linear equality constraints :math:`Ax = b` to the optimisation routine. Only ``trust-constr`` and ``SLSQP`` methods can handle equality constraints.
Parameters
----------
A : numpy.ndarray
An (M, n) matrix that contains coefficients of the linear equality constraints.
b : numpy.ndarray
An (M, 1) matrix that specifies right hand side of the linear equality constraints.
"""
assert self.method == 'trust-constr' or 'SLSQP'
if self.method == 'trust-constr':
self.constraints.append(optimize.LinearConstraint(A,b,b))
else:
self.constraints.append({'type':'eq', 'fun': lambda x: A.dot(x) - b, 'jac': lambda x: A})
[docs] def add_nonlinear_eq_con(self, poly=None, custom=None):
""" Adds nonlinear inequality constraints :math:`g(x) = value` (for poly option) or :math:`g(x) = 0` (for function option) to the optimisation routine.
Only ``trust-constr`` and ``SLSQP`` methods can handle equality constraints. If poly object is provided in the poly dictionary, gradients and Hessians will be computed automatically.
Parameters
----------
poly : dict, optional
Dictionary containing a Poly and value for constraints:
- **poly** (Poly): An instance of the Poly class.
- **value** (float): Value of the nonlinear constraint.
custom : dict, optional
Dictionary containing additional custom callable arguments:
- **function** (Callable): The constraint function to be called.
- **jac_function** (Callable, *optional*): The gradient (or derivative) of the constraint.
- **hess_function** (Callable, *optional*): The Hessian of the constraint function.
"""
assert self.method == 'trust-constr' or 'SLSQP'
assert poly is not None or custom is not None
if poly is not None:
assert 'value' in poly
value = poly['value']
g = poly.get_polyfit_function()
jac = poly.get_polyfit_grad_function()
hess = poly.get_polyfit_hess_function()
constraint = lambda x: np.asscalar(g(x))
constraint_deriv = lambda x: jac(x)[:,0]
constraint_hess = lambda x, v: hess(x)[:,:,0]
if self.method == 'trust-constr':
self.constraints.append(optimize.NonlinearConstraint(constraint, value, value, jac=constraint_deriv, \
hess=constraint_hess))
else:
self.constraints.append({'type':'eq', 'fun': lambda x: constraint(x) - value, \
'jac': constraint_deriv})
elif custom is not None:
assert 'function' in custom
constraint = custom['function']
if 'jac_function' in custom:
constraint_deriv = custom['jac_function']
else:
constraint_deriv = '2-point'
if 'hess_function' in custom:
constraint_hess = lambda x, v: custom['hess_function'](x)
else:
constraint_hess = optimize.BFGS()
if self.method == 'trust-constr':
self.constraints.append(optimize.NonlinearConstraint(constraint, 0.0, 0.0, jac=constraint_deriv, \
hess=constraint_hess))
else:
if 'jac_function' in custom:
self.constraints.append({'type':'eq', 'fun': constraint, 'jac': constraint_deriv})
else:
self.constraints.append({'type':'eq', 'fun': constraint})
[docs] def optimise(self, x0, *args, **kwargs):
""" Performs optimisation on a specified function, provided the objective has been added using :meth:'~equadratures.optimisation.add_objective'
and constraints have been added using the relevant method.
Parameters
----------
x0 : numpy.ndarray
Starting point for optimiser.
del_k : float
Initial trust-region radius for ``trust-region`` or ``omorf`` methods
delmin : float
Minimum allowable trust-region radius for ``trust-region`` or ``omorf`` methods
delmax : float
Maximum allowable trust-region radius for ``trust-region`` or ``omorf`` methods
d : int
Reduced dimension for ``omorf`` method
subspace_method : str
Subspace method for ``omorf`` method with options ``variable-projection`` or ``active-subspaces``
Returns
-------
dict
A dictionary containing the optimisation result. Important attributes are: the solution array ``x``, and a Boolean flag ``success`` indicating
if the optimiser exited successfully.
"""
assert self.objective['function'] is not None
if self.method in ['Newton-CG', 'dogleg', 'trust-ncg', 'trust-krylov', 'trust-exact', 'trust-constr']:
sol = optimize.minimize(self.objective['function'], x0, method=self.method, bounds = self.bounds, \
jac=self.objective['gradient'], hess=self.objective['hessian'], \
constraints=self.constraints, options={'disp': False, 'maxiter': 10000})
sol = {'x': sol['x'], 'fun': sol['fun'], 'nfev': self.num_evals, 'status': sol['status']}
elif self.method in ['CG', 'BFGS', 'L-BFGS-B', 'TNC', 'SLSQP']:
sol = optimize.minimize(self.objective['function'], x0, method=self.method, bounds = self.bounds, \
jac=self.objective['gradient'], constraints=self.constraints, \
options={'disp': False, 'maxiter': 10000})
sol = {'x': sol['x'], 'fun': sol['fun'], 'nfev': self.num_evals, 'status': sol['status']}
elif self.method in ['trust-region']:
self._trust_region(x0, del_k=kwargs.get('del_k', None), rho_min=kwargs.get('rho_min', 1.0e-6), \
eta_1=kwargs.get('eta_1', 0.1), eta_2=kwargs.get('eta_2', 0.7), \
gam_dec=kwargs.get('gam_dec', 0.5), gam_inc=kwargs.get('gam_inc', 2.0), \
gam_inc_overline=kwargs.get('gam_inc_overline', 2.5), alpha_1=kwargs.get('alpha_1', 0.1), \
alpha_2=kwargs.get('alpha_2', 0.5), omega_s=kwargs.get('omega_s', 0.5), \
max_evals=kwargs.get('max_evals', 10000), random_initial=kwargs.get('random_initial', False), \
scale_bounds=kwargs.get('scale_bounds', False))
sol = {'x': self.s_old, 'fun': self.f_old, 'nfev': self.num_evals}
elif self.method in ['omorf']:
self._omorf(x0, d=kwargs.get('d', 1), subspace_method=kwargs.get('subspace_method', 'active-subspaces'), \
del_k=kwargs.get('del_k', None), rho_min=kwargs.get('rho_min', 1.0e-8), eta_1=kwargs.get('eta_1', 0.1), \
eta_2=kwargs.get('eta_2', 0.7), gam_dec=kwargs.get('gam_dec', 0.98), gam_inc=kwargs.get('gam_inc', 2.0), \
gam_inc_overline=kwargs.get('gam_inc_overline', 2.5), alpha_1=kwargs.get('alpha_1', 0.9), \
alpha_2=kwargs.get('alpha_2', 0.95), omega_s=kwargs.get('omega_s', 0.5), \
max_evals=kwargs.get('max_evals', 1000), random_initial=kwargs.get('random_initial', False), \
scale_bounds=kwargs.get('scale_bounds', False))
sol = {'x': self.s_old, 'fun': self.f_old, 'nfev': self.num_evals}
else:
sol = optimize.minimize(self.objective['function'], x0, method=self.method, bounds=self.bounds, \
constraints=self.constraints, options={'disp': False, 'maxiter': 10000})
sol = {'x': sol['x'], 'fun': sol['fun'], 'nfev': self.num_evals, 'status': sol['status']}
if self.maximise:
sol['fun'] *= -1.0
return sol
def _set_iterate(self):
ind_min = np.argmin(self.f)
self.s_old = self.S[ind_min,:]
self.f_old = np.asscalar(self.f[ind_min])
self._update_bounds()
def _set_del_k(self, value):
self.del_k = value
self._update_bounds()
def _set_rho_k(self, value):
self.rho_k = value
def _set_unsuccessful_iterate_counter(self, count):
self.count = count
def _set_ratio(self, r_k):
self.r_k = r_k
def _calculate_subspace(self, S, f):
parameters = [Parameter(distribution='uniform', lower=self.bounds_l[i], \
upper=self.bounds_u[i], order=1) for i in range(0, self.n)]
poly = Poly(parameters, basis=Basis('total-order'), method='least-squares', \
sampling_args={'sample-points': S, 'sample-outputs': f})
poly.set_model()
Subs = Subspaces(full_space_poly=poly, method='active-subspace', subspace_dimension=self.d)
U0 = Subs.get_subspace()[:,0].reshape(-1,1)
U1 = Subs.get_subspace()[:,1:]
for i in range(self.d-1):
R = []
for j in range(U1.shape[1]):
U = np.hstack((U0, U1[:, j].reshape(-1,1)))
Y = np.dot(S, U)
myParameters = [Parameter(distribution='uniform', lower=np.min(Y[:,k]), upper=np.max(Y[:,k]), \
order=2) for k in range(Y.shape[1])]
myBasis = Basis('total-order')
poly = Poly(myParameters, myBasis, method='least-squares', \
sampling_args={'sample-points':Y, 'sample-outputs':f})
poly.set_model()
_,_,r,_,_ = linregress(poly.get_polyfit(Y).flatten(),f.flatten())
R.append(r**2)
index = np.argmax(R)
U0 = np.hstack((U0, U1[:, index].reshape(-1,1)))
U1 = np.delete(U1, index, 1)
if self.subspace_method == 'variable-projection':
vp_args = {'U0': U0, 'maxiter': 2*self.d*self.n}
Subs = Subspaces(method='variable-projection', sample_points=S, sample_outputs=f, dr_args=vp_args)
self.U = Subs.get_subspace()[:, :self.d]
elif self.subspace_method == 'active-subspaces':
self.U = U0
def _blackbox_evaluation(self, s):
"""
Evaluates the point s for ``trust-region`` or ``omorf`` methods
"""
s = s.reshape(1,-1)
if self.S.size > 0 and np.unique(np.vstack((self.S, s)), axis=0).shape[0] == self.S.shape[0]:
ind_repeat = np.argmin(np.linalg.norm(self.S - s, ord=np.inf, axis=1))
f = self.f[ind_repeat]
else:
f = np.array([[self.objective['function'](self._remove_scaling(s.flatten()))]])
self.num_evals += 1
if self.f.size == 0:
self.S = s
self.f = f
else:
self.S = np.vstack((self.S, s))
self.f = np.vstack((self.f, f))
return np.asscalar(f)
def _update_bounds(self):
if self.bounds is not None:
if self.scale_bounds:
self.bounds_l = np.maximum(np.zeros(self.n), self.s_old-self.del_k)
self.bounds_u = np.minimum(np.ones(self.n), self.s_old+self.del_k)
else:
self.bounds_l = np.maximum(self.bounds[0], self.s_old-self.del_k)
self.bounds_u = np.minimum(self.bounds[1], self.s_old+self.del_k)
else:
self.bounds_l = self.s_old-self.del_k
self.bounds_u = self.s_old+self.del_k
return None
def _generate_set(self, num):
"""
Generates an initial set of samples using either coordinate directions or orthogonal, random directions
"""
if self.random_initial:
direcs = self._random_directions(num, self.bounds_l-self.s_old, self.bounds_u-self.s_old)
else:
direcs = self._coordinate_directions(num, self.bounds_l-self.s_old, self.bounds_u-self.s_old)
S = np.zeros((num, self.n))
S[0, :] = self.s_old
for i in range(1, num):
S[i, :] = self.s_old + np.minimum(np.maximum(self.bounds_l-self.s_old, direcs[i, :]), self.bounds_u-self.s_old)
return S
def _coordinate_directions(self, num_pnts, lower, upper):
"""
Generates coordinate directions
"""
at_lower_boundary = (lower > -1.e-8 * self.del_k)
at_upper_boundary = (upper < 1.e-8 * self.del_k)
direcs = np.zeros((num_pnts, self.n))
for i in range(1, num_pnts):
if 1 <= i < self.n + 1:
dirn = i - 1
step = self.del_k if not at_upper_boundary[dirn] else -self.del_k
direcs[i, dirn] = step
elif self.n + 1 <= i < 2*self.n + 1:
dirn = i - self.n - 1
step = -self.del_k
if at_lower_boundary[dirn]:
step = min(2.0*self.del_k, upper[dirn])
if at_upper_boundary[dirn]:
step = max(-2.0*self.del_k, lower[dirn])
direcs[i, dirn] = step
else:
itemp = (i - self.n - 1) // self.n
q = i - itemp*self.n - self.n
p = q + itemp
if p > self.n:
p, q = q, p - self.n
direcs[i, p-1] = direcs[p, p-1]
direcs[i, q-1] = direcs[q, q-1]
return direcs
def _random_directions(self, num_pnts, lower, upper):
"""
Generates orthogonal, random directions
"""
direcs = np.zeros((self.n, max(2*self.n+1, num_pnts)))
idx_l = (lower == 0)
idx_u = (upper == 0)
active = np.logical_or(idx_l, idx_u)
inactive = np.logical_not(active)
nactive = np.sum(active)
ninactive = self.n - nactive
if ninactive > 0:
A = np.random.normal(size=(ninactive, ninactive))
Qred = np.linalg.qr(A)[0]
Q = np.zeros((self.n, ninactive))
Q[inactive, :] = Qred
for i in range(ninactive):
scale = self._get_scale(Q[:,i], self.del_k, lower, upper)
direcs[:, i] = scale * Q[:,i]
scale = self._get_scale(-Q[:,i], self.del_k, lower, upper)
direcs[:, self.n+i] = -scale * Q[:,i]
idx_active = np.where(active)[0]
for i in range(nactive):
idx = idx_active[i]
direcs[idx, ninactive+i] = 1.0 if idx_l[idx] else -1.0
direcs[:, ninactive+i] = self._get_scale(direcs[:, ninactive+i], self.del_k, lower, upper) * direcs[:, ninactive+i]
sign = 1.0 if idx_l[idx] else -1.0
if upper[idx] - lower[idx] > self.del_k:
direcs[idx, self.n+ninactive+i] = 2.0*sign*self.del_k
else:
direcs[idx, self.n+ninactive+i] = 0.5*sign*(upper[idx] - lower[idx])
direcs[:, self.n+ninactive+i] = self._get_scale(direcs[:, self.n+ninactive+i], 1.0, lower, upper)*direcs[:, self.n+ninactive+i]
for i in range(num_pnts - 2*self.n):
dirn = np.random.normal(size=(self.n,))
for j in range(nactive):
idx = idx_active[j]
sign = 1.0 if idx_l[idx] else -1.0
if dirn[idx]*sign < 0.0:
dirn[idx] *= -1.0
dirn = dirn / np.linalg.norm(dirn)
scale = self._get_scale(dirn, self.del_k, lower, upper)
direcs[:, 2*self.n+i] = dirn * scale
return np.vstack((np.zeros(self.n), direcs[:, :num_pnts].T))
@staticmethod
def _get_scale(dirn, delta, lower, upper):
scale = delta
for j in range(len(dirn)):
if dirn[j] < 0.0:
scale = min(scale, lower[j] / dirn[j])
elif dirn[j] > 0.0:
scale = min(scale, upper[j] / dirn[j])
return scale
def _apply_scaling(self, S):
if self.bounds is not None and self.scale_bounds:
shift = self.bounds[0].copy()
scale = self.bounds[1] - self.bounds[0]
return np.divide((S - shift), scale)
else:
return S
def _remove_scaling(self, S):
if self.bounds is not None and self.scale_bounds:
shift = self.bounds[0].copy()
scale = self.bounds[1] - self.bounds[0]
return shift + np.multiply(S, scale)
else:
return S
def _update_geometry_trust_region(self, S, f):
if max(np.linalg.norm(S-self.s_old, axis=1, ord=np.inf)) > max(self.epsilon_1*self.del_k, self.epsilon_2*self.rho_k):
S, f = self._sample_set('improve', S, f)
elif self.del_k == self.rho_k:
self._set_del_k(self.alpha_2*self.rho_k)
if self.count >= 3 and self.r_k < 0:
if self.rho_k >= 250*self.rho_min:
self._set_rho_k(self.alpha_1*self.rho_k)
elif 16*self.rho_min < self.rho_k < 250*self.rho_min:
self._set_rho_k(np.sqrt(self.rho_k*self.rho_min))
else:
self._set_rho_k(self.rho_min)
return S, f
def _update_geometry_omorf(self, S_full, f_full, S_red, f_red):
dist = max(self.epsilon_1*self.del_k, self.epsilon_2*self.rho_k)
if max(np.linalg.norm(S_full-self.s_old, axis=1, ord=np.inf)) > dist:
S_full, f_full = self._sample_set('improve', S_full, f_full)
try:
self._calculate_subspace(S_full, f_full)
except:
pass
elif max(np.linalg.norm(S_red-self.s_old, axis=1, ord=np.inf)) > dist:
S_red, f_red = self._sample_set('improve', S_red, f_red, full_space=False)
elif self.del_k == self.rho_k:
self._set_del_k(self.alpha_2*self.rho_k)
if self.count >= 3 and self.r_k < 0:
if self.rho_k >= 250*self.rho_min:
self._set_rho_k(self.alpha_1*self.rho_k)
elif 16*self.rho_min < self.rho_k < 250*self.rho_min:
self._set_rho_k(np.sqrt(self.rho_k*self.rho_min))
else:
self._set_rho_k(self.rho_min)
return S_full, f_full, S_red, f_red
def _sample_set(self, method, S=None, f=None, s_new=None, f_new=None, full_space=True):
if full_space:
q = self.p
else:
q = self.q
dist = max(self.epsilon_1*self.del_k, self.epsilon_2*self.rho_k)
if method == 'replace':
S_hat = np.vstack((S, s_new))
f_hat = np.vstack((f, f_new))
if S_hat.shape != np.unique(S_hat, axis=0).shape:
S_hat, indices = np.unique(S_hat, axis=0, return_index=True)
f_hat = f_hat[indices]
elif f_hat.size > q and max(np.linalg.norm(S_hat-self.s_old, axis=1, ord=np.inf)) > dist:
S_hat, f_hat = self._remove_furthest_point(S_hat, f_hat, self.s_old)
S_hat, f_hat = self._remove_point_from_set(S_hat, f_hat, self.s_old)
S = np.zeros((q, self.n))
f = np.zeros((q, 1))
S[0, :] = self.s_old
f[0, :] = self.f_old
S, f = self._LU_pivoting(S, f, S_hat, f_hat, full_space)
elif method == 'improve':
S_hat = np.copy(S)
f_hat = np.copy(f)
if max(np.linalg.norm(S_hat-self.s_old, axis=1, ord=np.inf)) > dist:
S_hat, f_hat = self._remove_furthest_point(S_hat, f_hat, self.s_old)
S_hat, f_hat = self._remove_point_from_set(S_hat, f_hat, self.s_old)
S = np.zeros((q, self.n))
f = np.zeros((q, 1))
S[0, :] = self.s_old
f[0, :] = self.f_old
S, f = self._LU_pivoting(S, f, S_hat, f_hat, full_space, 'improve')
elif method == 'new':
S_hat = f_hat = np.array([])
S = np.zeros((q, self.n))
f = np.zeros((q, 1))
S[0, :] = self.s_old
f[0, :] = self.f_old
S, f = self._LU_pivoting(S, f, S_hat, f_hat, full_space, 'new')
return S, f
def _LU_pivoting(self, S, f, S_hat, f_hat, full_space, method=None):
psi_1 = 1.0e-4
if self.method == 'omorf' and full_space:
psi_2 = 1.0
else:
psi_2 = 0.25
phi_function, phi_function_deriv = self._get_phi_function_and_derivative(S_hat, full_space)
if full_space:
q = self.p
else:
q = self.q
# Initialise U matrix of LU factorisation of M matrix (see Conn et al.)
U = np.zeros((q,q))
U[0,:] = phi_function(self.s_old)
# Perform the LU factorisation algorithm for the rest of the points
for k in range(1, q):
flag = True
v = np.zeros(q)
for j in range(k):
v[j] = -U[j,k] / U[j,j]
v[k] = 1.0
# If there are still points to choose from, find if points meet criterion. If so, use the index to choose
# point with given index to be next point in regression/interpolation set
if f_hat.size > 0:
M = np.absolute(np.dot(phi_function(S_hat),v).flatten())
index = np.argmax(M)
if M[index] < psi_1:
flag = False
elif method == 'improve' and (k == q - 1 and M[index] < psi_2):
flag = False
elif method == 'new' and M[index] < psi_2:
flag = False
else:
flag = False
# If index exists, choose the point with that index and delete it from possible choices
if flag:
s = S_hat[index,:]
S[k, :] = s
f[k, :] = f_hat[index]
S_hat = np.delete(S_hat, index, 0)
f_hat = np.delete(f_hat, index, 0)
# If index doesn't exist, solve an optimisation problem to find the point in the range which best satisfies criterion
else:
try:
s = self._find_new_point(v, phi_function, phi_function_deriv, full_space)
if np.unique(np.vstack((S[:k, :], s)), axis=0).shape[0] != k+1:
s = self._find_new_point_alternative(v, phi_function, S[:k, :])
except:
s = self._find_new_point_alternative(v, phi_function, S[:k, :])
if f_hat.size > 0 and M[index] >= abs(np.dot(v, phi_function(s))):
s = S_hat[index,:]
S[k, :] = s
f[k, :] = f_hat[index]
S_hat = np.delete(S_hat, index, 0)
f_hat = np.delete(f_hat, index, 0)
else:
S[k, :] = s
f[k, :] = self._blackbox_evaluation(s)
# Update U factorisation in LU algorithm
phi = phi_function(s)
U[k,k] = np.dot(v, phi)
for i in range(k+1,q):
U[k,i] += phi[i]
for j in range(k):
U[k,i] -= (phi[j]*U[j,i]) / U[j,j]
return S, f
def _get_phi_function_and_derivative(self, S_hat, full_space):
Del_S = self.del_k
if self.method == 'trust-region':
if S_hat.size > 0:
Del_S = max(np.linalg.norm(S_hat-self.s_old, axis=1, ord=np.inf))
def phi_function(s):
s_tilde = np.divide((s - self.s_old), Del_S)
try:
m,n = s_tilde.shape
except:
m = 1
s_tilde = s_tilde.reshape(1,-1)
phi = np.zeros((m, self.q))
for k in range(self.q):
phi[:,k] = np.prod(np.divide(np.power(s_tilde, self.basis[k,:]), factorial(self.basis[k,:])), axis=1)
if m == 1:
return phi.flatten()
else:
return phi
def phi_function_deriv(s):
s_tilde = np.divide((s - self.s_old), Del_S)
phi_deriv = np.zeros((self.n, self.q))
for i in range(self.n):
for k in range(1, self.q):
if self.basis[k, i] != 0.0:
tmp = np.zeros(self.n)
tmp[i] = 1
phi_deriv[i,k] = self.basis[k, i] * np.prod(np.divide(np.power(s_tilde, self.basis[k,:]-tmp), \
factorial(self.basis[k,:])))
return np.divide(phi_deriv.T, Del_S).T
elif self.method == 'omorf' and full_space:
if S_hat.size > 0:
Del_S = max(np.linalg.norm(S_hat-self.s_old, axis=1, ord=np.inf))
def phi_function(s):
s_tilde = np.divide((s - self.s_old), Del_S)
try:
m,n = s_tilde.shape
except:
m = 1
s_tilde = s_tilde.reshape(1,-1)
phi = np.zeros((m, self.p))
phi[:, 0] = 1.0
phi[:, 1:] = s_tilde
if m == 1:
return phi.flatten()
else:
return phi
phi_function_deriv = None
elif self.method == 'omorf':
if S_hat.size > 0:
Del_S = max(np.linalg.norm(np.dot(S_hat-self.s_old,self.U), axis=1))
def phi_function(s):
u = np.divide(np.dot((s - self.s_old), self.U), Del_S)
try:
m,n = u.shape
except:
m = 1
u = u.reshape(1,-1)
phi = np.zeros((m, self.q))
for k in range(self.q):
phi[:,k] = np.prod(np.divide(np.power(u, self.basis[k,:]), factorial(self.basis[k,:])), axis=1)
if m == 1:
return phi.flatten()
else:
return phi
def phi_function_deriv(s):
u = np.divide(np.dot((s - self.s_old), self.U), Del_S)
phi_deriv = np.zeros((self.d, self.q))
for i in range(self.d):
for k in range(1, self.q):
if self.basis[k, i] != 0.0:
tmp = np.zeros(self.d)
tmp[i] = 1
phi_deriv[i,k] = self.basis[k, i] * np.prod(np.divide(np.power(u, self.basis[k,:]-tmp), \
factorial(self.basis[k,:])))
phi_deriv = np.divide(phi_deriv.T, Del_S).T
return np.dot(self.U, phi_deriv)
return phi_function, phi_function_deriv
def _find_new_point(self, v, phi_function, phi_function_deriv, full_space=False):
bounds = []
for i in range(self.n):
bounds.append((self.bounds_l[i], self.bounds_u[i]))
if self.method == 'omorf' and full_space:
c = v[1:]
res1 = optimize.linprog(c, bounds=bounds)
res2 = optimize.linprog(-c, bounds=bounds)
if abs(np.dot(v, phi_function(res1['x']))) > abs(np.dot(v, phi_function(res2['x']))):
s = res1['x']
else:
s = res2['x']
else:
obj1 = lambda s: np.dot(v, phi_function(s))
jac1 = lambda s: np.dot(phi_function_deriv(s), v)
obj2 = lambda s: -np.dot(v, phi_function(s))
jac2 = lambda s: -np.dot(phi_function_deriv(s), v)
res1 = optimize.minimize(obj1, self.s_old, method='TNC', jac=jac1, \
bounds=bounds, options={'disp': False})
res2 = optimize.minimize(obj2, self.s_old, method='TNC', jac=jac2, \
bounds=bounds, options={'disp': False})
if abs(res1['fun']) > abs(res2['fun']):
s = res1['x']
else:
s = res2['x']
return s
def _find_new_point_alternative(self, v, phi_function, S):
S_tmp = self._generate_set(int(0.5*(self.n+1)*(self.n+2)))
M = np.absolute(np.dot(phi_function(S_tmp), v).flatten())
indices = np.argsort(M)[::-1][:len(M)]
for index in indices:
s = S_tmp[index,:]
if np.unique(np.vstack((S, s)), axis=0).shape[0] == S.shape[0]+1:
return s
return S_tmp[indices[0], :]
@staticmethod
def _remove_point_from_set(S, f, s):
ind_current = np.where(np.linalg.norm(S-s, axis=1, ord=np.inf) == 0.0)[0]
S = np.delete(S, ind_current, 0)
f = np.delete(f, ind_current, 0)
return S, f
@staticmethod
def _remove_furthest_point(S, f, s):
ind_distant = np.argmax(np.linalg.norm(S-s, axis=1, ord=np.inf))
S = np.delete(S, ind_distant, 0)
f = np.delete(f, ind_distant, 0)
return S, f
def _remove_points_outside_limits(self):
ind_inside = np.where(np.linalg.norm(self.S-self.s_old, axis=1, ord=np.inf) <= max(self.epsilon_1*self.del_k, \
self.epsilon_2*self.rho_k))[0]
S = self.S[ind_inside, :]
f = self.f[ind_inside]
return S, f
def _build_model(self, S, f):
"""
Constructs quadratic model for ``trust-region`` or ``omorf`` methods
"""
if self.method == 'trust-region':
myParameters = [Parameter(distribution='uniform', lower=self.bounds_l[i], \
upper=self.bounds_u[i], order=2) for i in range(self.n)]
myBasis = Basis('total-order')
my_poly = Poly(myParameters, myBasis, method='least-squares', \
sampling_args={'sample-points':S, 'sample-outputs':f})
elif self.method == 'omorf':
Y = np.dot(S, self.U)
myParameters = [Parameter(distribution='uniform', lower=np.min(Y[:,i]), \
upper=np.max(Y[:,i]), order=2) for i in range(self.d)]
myBasis = Basis('total-order')
my_poly = Poly(myParameters, myBasis, method='least-squares', \
sampling_args={'sample-points':Y, 'sample-outputs':f})
my_poly.set_model()
return my_poly
def _compute_step(self, my_poly):
"""
Solves the trust-region subproblem for ``trust-region`` or ``omorf`` methods
"""
bounds = []
for i in range(self.n):
bounds.append((self.bounds_l[i], self.bounds_u[i]))
if self.method == 'trust-region':
res = optimize.minimize(lambda x: np.asscalar(my_poly.get_polyfit(x)), self.s_old, method='TNC', \
jac=lambda x: my_poly.get_polyfit_grad(x).flatten(), bounds=bounds, options={'disp': False})
elif self.method == 'omorf':
res = optimize.minimize(lambda x: np.asscalar(my_poly.get_polyfit(np.dot(x,self.U))), self.s_old, \
method='TNC', jac=lambda x: np.dot(self.U, my_poly.get_polyfit_grad(np.dot(x,self.U))).flatten(), \
bounds=bounds, options={'disp': False})
s_new = res.x
m_new = res.fun
return s_new, m_new
def _start(self, s_old, del_k, rho_min, random_initial, scale_bounds, alpha_1, alpha_2, d=None, subspace_method=None):
self.n = s_old.size
self.random_initial = random_initial
self.scale_bounds = scale_bounds
self.epsilon_1 = 2.0
self.epsilon_2 = 10.0
self.alpha_1 = alpha_1
self.alpha_2 = alpha_2
self.rho_min = rho_min
if self.method == 'trust-region':
self.q = int(0.5*(self.n+1)*(self.n+2))
self.p = int(0.5*(self.n+1)*(self.n+2))
Base = Basis('total-order', orders=np.tile([2], self.n))
self.basis = Base.get_basis()[:,range(self.n-1, -1, -1)]
elif self.method == 'omorf':
self.d = d
self.subspace_method = subspace_method
self.q = int(0.5*(self.d+1)*(self.d+2))
self.p = self.n+1
Base = Basis('total-order', orders=np.tile([2], self.d))
self.basis = Base.get_basis()[:,range(self.d-1, -1, -1)]
self.s_old = self._apply_scaling(s_old)
self.f_old = self._blackbox_evaluation(self.s_old)
if del_k is None:
if self.bounds is None:
self._set_del_k(max(0.1*np.linalg.norm(self.s_old, ord=np.inf), 1.0))
elif self.scale_bounds:
self._set_del_k(0.1)
else:
self._set_del_k(min(0.1*np.linalg.norm(self.bounds[1]-self.bounds[0], ord=np.inf), 1.0))
else:
self._set_del_k(del_k)
self._set_unsuccessful_iterate_counter(0)
self._set_rho_k(self.del_k)
def _finish(self):
self.S = self._remove_scaling(self.S)
self._set_iterate()
def _trust_region(self, s_old, del_k, rho_min, eta_1, eta_2, gam_dec, gam_inc, gam_inc_overline, alpha_1, alpha_2, \
omega_s, max_evals, random_initial, scale_bounds):
"""
Computes optimum using the ``trust-region`` method
"""
itermax = 10000
self._start(s_old, del_k, rho_min, random_initial, scale_bounds, alpha_1, alpha_2)
# Construct the sample set
S = self._generate_set(self.p)
f = np.zeros((self.p, 1))
f[0, :] = self.f_old
for i in range(1, self.p):
f[i, :] = self._blackbox_evaluation(S[i, :])
if self.num_evals >= max_evals:
self._finish()
return
for i in range(itermax):
if self.num_evals >= max_evals or self.rho_k <= rho_min:
break
try:
my_poly = self._build_model(S, f)
except:
S, f = self._sample_set('improve', S, f)
continue
s_new, m_new = self._compute_step(my_poly)
step_dist = np.linalg.norm(s_new - self.s_old, ord=np.inf)
# Safety step implemented in BOBYQA
if step_dist < omega_s*self.rho_k:
self._set_ratio(-0.1)
self._set_unsuccessful_iterate_counter(3)
self._set_del_k(max(gam_dec*self.del_k, self.rho_k))
S, f = self._update_geometry_trust_region(S, f)
continue
f_new = self._blackbox_evaluation(s_new)
if self.num_evals >= max_evals or self.rho_k <= self.rho_min:
self._finish()
return
S, f = self._sample_set('replace', S, f, s_new, f_new)
# Calculate trust-region factor
del_f = self.f_old - f_new
del_m = np.asscalar(my_poly.get_polyfit(self.s_old)) - m_new
if abs(del_m) < 100*np.finfo(float).eps:
self._set_ratio(1.0)
else:
self._set_ratio(del_f / del_m)
self._set_iterate()
if self.r_k >= eta_2:
self._set_unsuccessful_iterate_counter(0)
self._set_del_k(max(gam_inc*self.del_k, gam_inc_overline*step_dist))
elif self.r_k >= eta_1:
self._set_unsuccessful_iterate_counter(0)
self._set_del_k(max(gam_dec*self.del_k, step_dist, self.rho_k))
else:
self._set_unsuccessful_iterate_counter(self.count+1)
self._set_del_k(max(min(gam_dec*self.del_k, step_dist), self.rho_k))
S, f = self._update_geometry_trust_region(S, f)
self._finish()
return
def _omorf(self, s_old, d, subspace_method, del_k, rho_min, eta_1, eta_2, gam_dec, gam_inc, gam_inc_overline, \
alpha_1, alpha_2, omega_s, max_evals, random_initial, scale_bounds):
"""
Computes optimum using the ``omorf`` method
"""
itermax = 10000
self._start(s_old, del_k, rho_min, random_initial, scale_bounds, alpha_1, alpha_2, d, subspace_method)
# Construct the sample set
S_full = self._generate_set(self.p)
f_full = np.zeros((self.p, 1))
f_full[0, :] = self.f_old
for i in range(1, self.p):
f_full[i, :] = self._blackbox_evaluation(S_full[i, :])
if self.num_evals >= max_evals:
self._finish()
return
self._calculate_subspace(S_full, f_full)
S_red, f_red = self._sample_set('new', full_space=False)
for i in range(itermax):
if self.num_evals >= max_evals or self.rho_k <= self.rho_min:
self._finish()
return
try:
my_poly = self._build_model(S_red, f_red)
except:
S_red, f_red = self._sample_set('improve', S_red, f_red, full_space=False)
continue
s_new, m_new = self._compute_step(my_poly)
step_dist = np.linalg.norm(s_new - self.s_old, ord=np.inf)
# Safety step implemented in BOBYQA
if step_dist < omega_s*self.rho_k:
self._set_ratio(-0.1)
self._set_unsuccessful_iterate_counter(3)
self._set_del_k(max(0.5*self.del_k, self.rho_k))
S_full, f_full, S_red, f_red = self._update_geometry_omorf(S_full, f_full, S_red, f_red)
continue
f_new = self._blackbox_evaluation(s_new)
if self.num_evals >= max_evals or self.rho_k <= self.rho_min:
self._finish()
return
# Calculate trust-region factor
del_f = self.f_old - f_new
del_m = np.asscalar(my_poly.get_polyfit(np.dot(self.s_old,self.U))) - m_new
if abs(del_m) < 100*np.finfo(float).eps:
self._set_ratio(1.0)
else:
self._set_ratio(del_f / del_m)
self._set_iterate()
S_red, f_red = self._sample_set('replace', S_red, f_red, s_new, f_new, full_space=False)
S_full, f_full = self._sample_set('replace', S_full, f_full, s_new, f_new)
if self.r_k >= eta_2:
self._set_unsuccessful_iterate_counter(0)
self._set_del_k(max(gam_inc*self.del_k, gam_inc_overline*step_dist))
elif self.r_k >= eta_1:
self._set_unsuccessful_iterate_counter(0)
self._set_del_k(max(gam_dec*self.del_k, step_dist, self.rho_k))
else:
self._set_unsuccessful_iterate_counter(self.count+1)
self._set_del_k(max(min(gam_dec*self.del_k, step_dist), self.rho_k))
S_full, f_full, S_red, f_red = self._update_geometry_omorf(S_full, f_full, S_red, f_red)
self._finish()
return