torchzero 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_identical.py +2 -2
- tests/test_module_autograd.py +586 -0
- tests/test_objective.py +188 -0
- tests/test_opts.py +43 -33
- tests/test_tensorlist.py +0 -8
- tests/test_utils_optimizer.py +0 -1
- torchzero/__init__.py +1 -1
- torchzero/core/__init__.py +7 -4
- torchzero/core/chain.py +20 -23
- torchzero/core/functional.py +90 -24
- torchzero/core/modular.py +48 -52
- torchzero/core/module.py +130 -50
- torchzero/core/objective.py +948 -0
- torchzero/core/reformulation.py +55 -24
- torchzero/core/transform.py +261 -367
- torchzero/linalg/__init__.py +10 -0
- torchzero/linalg/eigh.py +34 -0
- torchzero/linalg/linalg_utils.py +14 -0
- torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
- torchzero/linalg/matrix_power.py +28 -0
- torchzero/linalg/orthogonalize.py +95 -0
- torchzero/{utils/linalg → linalg}/qr.py +4 -2
- torchzero/{utils/linalg → linalg}/solve.py +76 -88
- torchzero/linalg/svd.py +20 -0
- torchzero/linalg/torch_linalg.py +168 -0
- torchzero/modules/adaptive/__init__.py +1 -1
- torchzero/modules/adaptive/adagrad.py +163 -213
- torchzero/modules/adaptive/adahessian.py +74 -103
- torchzero/modules/adaptive/adam.py +53 -76
- torchzero/modules/adaptive/adan.py +49 -30
- torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
- torchzero/modules/adaptive/aegd.py +12 -12
- torchzero/modules/adaptive/esgd.py +98 -119
- torchzero/modules/adaptive/lion.py +5 -10
- torchzero/modules/adaptive/lmadagrad.py +87 -32
- torchzero/modules/adaptive/mars.py +5 -5
- torchzero/modules/adaptive/matrix_momentum.py +47 -51
- torchzero/modules/adaptive/msam.py +70 -52
- torchzero/modules/adaptive/muon.py +59 -124
- torchzero/modules/adaptive/natural_gradient.py +33 -28
- torchzero/modules/adaptive/orthograd.py +11 -15
- torchzero/modules/adaptive/rmsprop.py +83 -75
- torchzero/modules/adaptive/rprop.py +48 -47
- torchzero/modules/adaptive/sam.py +55 -45
- torchzero/modules/adaptive/shampoo.py +123 -129
- torchzero/modules/adaptive/soap.py +207 -143
- torchzero/modules/adaptive/sophia_h.py +106 -130
- torchzero/modules/clipping/clipping.py +15 -18
- torchzero/modules/clipping/ema_clipping.py +31 -25
- torchzero/modules/clipping/growth_clipping.py +14 -17
- torchzero/modules/conjugate_gradient/cg.py +26 -37
- torchzero/modules/experimental/__init__.py +2 -6
- torchzero/modules/experimental/coordinate_momentum.py +36 -0
- torchzero/modules/experimental/curveball.py +25 -41
- torchzero/modules/experimental/gradmin.py +2 -2
- torchzero/modules/experimental/higher_order_newton.py +14 -40
- torchzero/modules/experimental/newton_solver.py +22 -53
- torchzero/modules/experimental/newtonnewton.py +15 -12
- torchzero/modules/experimental/reduce_outward_lr.py +7 -7
- torchzero/modules/experimental/scipy_newton_cg.py +21 -24
- torchzero/modules/experimental/spsa1.py +3 -3
- torchzero/modules/experimental/structural_projections.py +1 -4
- torchzero/modules/functional.py +1 -1
- torchzero/modules/grad_approximation/forward_gradient.py +7 -7
- torchzero/modules/grad_approximation/grad_approximator.py +23 -16
- torchzero/modules/grad_approximation/rfdm.py +20 -17
- torchzero/modules/least_squares/gn.py +90 -42
- torchzero/modules/line_search/backtracking.py +2 -2
- torchzero/modules/line_search/line_search.py +32 -32
- torchzero/modules/line_search/strong_wolfe.py +2 -2
- torchzero/modules/misc/debug.py +12 -12
- torchzero/modules/misc/escape.py +10 -10
- torchzero/modules/misc/gradient_accumulation.py +10 -78
- torchzero/modules/misc/homotopy.py +16 -8
- torchzero/modules/misc/misc.py +120 -122
- torchzero/modules/misc/multistep.py +50 -48
- torchzero/modules/misc/regularization.py +49 -44
- torchzero/modules/misc/split.py +30 -28
- torchzero/modules/misc/switch.py +37 -32
- torchzero/modules/momentum/averaging.py +14 -14
- torchzero/modules/momentum/cautious.py +34 -28
- torchzero/modules/momentum/momentum.py +11 -11
- torchzero/modules/ops/__init__.py +4 -4
- torchzero/modules/ops/accumulate.py +21 -21
- torchzero/modules/ops/binary.py +67 -66
- torchzero/modules/ops/higher_level.py +19 -19
- torchzero/modules/ops/multi.py +44 -41
- torchzero/modules/ops/reduce.py +26 -23
- torchzero/modules/ops/unary.py +53 -53
- torchzero/modules/ops/utility.py +47 -46
- torchzero/modules/projections/galore.py +1 -1
- torchzero/modules/projections/projection.py +43 -43
- torchzero/modules/quasi_newton/damping.py +1 -1
- torchzero/modules/quasi_newton/lbfgs.py +7 -7
- torchzero/modules/quasi_newton/lsr1.py +7 -7
- torchzero/modules/quasi_newton/quasi_newton.py +10 -10
- torchzero/modules/quasi_newton/sg2.py +19 -19
- torchzero/modules/restarts/restars.py +26 -24
- torchzero/modules/second_order/__init__.py +2 -2
- torchzero/modules/second_order/ifn.py +31 -62
- torchzero/modules/second_order/inm.py +49 -53
- torchzero/modules/second_order/multipoint.py +40 -80
- torchzero/modules/second_order/newton.py +57 -90
- torchzero/modules/second_order/newton_cg.py +102 -154
- torchzero/modules/second_order/nystrom.py +157 -177
- torchzero/modules/second_order/rsn.py +106 -96
- torchzero/modules/smoothing/laplacian.py +13 -12
- torchzero/modules/smoothing/sampling.py +11 -10
- torchzero/modules/step_size/adaptive.py +23 -23
- torchzero/modules/step_size/lr.py +15 -15
- torchzero/modules/termination/termination.py +32 -30
- torchzero/modules/trust_region/cubic_regularization.py +2 -2
- torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
- torchzero/modules/trust_region/trust_cg.py +1 -1
- torchzero/modules/trust_region/trust_region.py +27 -22
- torchzero/modules/variance_reduction/svrg.py +21 -18
- torchzero/modules/weight_decay/__init__.py +2 -1
- torchzero/modules/weight_decay/reinit.py +83 -0
- torchzero/modules/weight_decay/weight_decay.py +12 -13
- torchzero/modules/wrappers/optim_wrapper.py +10 -10
- torchzero/modules/zeroth_order/cd.py +9 -6
- torchzero/optim/root.py +3 -3
- torchzero/optim/utility/split.py +2 -1
- torchzero/optim/wrappers/directsearch.py +27 -63
- torchzero/optim/wrappers/fcmaes.py +14 -35
- torchzero/optim/wrappers/mads.py +11 -31
- torchzero/optim/wrappers/moors.py +66 -0
- torchzero/optim/wrappers/nevergrad.py +4 -4
- torchzero/optim/wrappers/nlopt.py +31 -25
- torchzero/optim/wrappers/optuna.py +6 -13
- torchzero/optim/wrappers/pybobyqa.py +124 -0
- torchzero/optim/wrappers/scipy/__init__.py +7 -0
- torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
- torchzero/optim/wrappers/scipy/brute.py +48 -0
- torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
- torchzero/optim/wrappers/scipy/direct.py +69 -0
- torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
- torchzero/optim/wrappers/scipy/experimental.py +141 -0
- torchzero/optim/wrappers/scipy/minimize.py +151 -0
- torchzero/optim/wrappers/scipy/sgho.py +111 -0
- torchzero/optim/wrappers/wrapper.py +121 -0
- torchzero/utils/__init__.py +7 -25
- torchzero/utils/compile.py +2 -2
- torchzero/utils/derivatives.py +93 -69
- torchzero/utils/optimizer.py +4 -77
- torchzero/utils/python_tools.py +31 -0
- torchzero/utils/tensorlist.py +11 -5
- torchzero/utils/thoad_tools.py +68 -0
- {torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
- torchzero-0.4.0.dist-info/RECORD +191 -0
- tests/test_vars.py +0 -185
- torchzero/core/var.py +0 -376
- torchzero/modules/experimental/momentum.py +0 -160
- torchzero/optim/wrappers/scipy.py +0 -572
- torchzero/utils/linalg/__init__.py +0 -12
- torchzero/utils/linalg/matrix_funcs.py +0 -87
- torchzero/utils/linalg/orthogonalize.py +0 -12
- torchzero/utils/linalg/svd.py +0 -20
- torchzero/utils/ops.py +0 -10
- torchzero-0.3.15.dist-info/RECORD +0 -175
- /torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
- {torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
- {torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Literal, Any
|
|
2
3
|
from collections.abc import Mapping, Callable
|
|
3
4
|
from functools import partial
|
|
@@ -5,7 +6,8 @@ import numpy as np
|
|
|
5
6
|
import torch
|
|
6
7
|
|
|
7
8
|
import nlopt
|
|
8
|
-
from ...utils import
|
|
9
|
+
from ...utils import TensorList
|
|
10
|
+
from .wrapper import WrapperBase
|
|
9
11
|
|
|
10
12
|
_ALGOS_LITERAL = Literal[
|
|
11
13
|
"GN_DIRECT", # = _nlopt.GN_DIRECT
|
|
@@ -69,14 +71,14 @@ def _ensure_tensor(x):
|
|
|
69
71
|
inf = float('inf')
|
|
70
72
|
Closure = Callable[[bool], Any]
|
|
71
73
|
|
|
72
|
-
class NLOptWrapper(
|
|
74
|
+
class NLOptWrapper(WrapperBase):
|
|
73
75
|
"""Use nlopt as pytorch optimizer, with gradient supplied by pytorch autograd.
|
|
74
76
|
Note that this performs full minimization on each step,
|
|
75
77
|
so usually you would want to perform a single step, although performing multiple steps will refine the
|
|
76
78
|
solution.
|
|
77
79
|
|
|
78
80
|
Args:
|
|
79
|
-
params: iterable of parameters to optimize or dicts defining parameter groups.
|
|
81
|
+
params (Iterable): iterable of parameters to optimize or dicts defining parameter groups.
|
|
80
82
|
algorithm (int | _ALGOS_LITERAL): optimization algorithm from https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/
|
|
81
83
|
maxeval (int | None):
|
|
82
84
|
maximum allowed function evaluations, set to None to disable. But some stopping criterion
|
|
@@ -96,21 +98,30 @@ class NLOptWrapper(Optimizer):
|
|
|
96
98
|
algorithm: int | _ALGOS_LITERAL,
|
|
97
99
|
lb: float | None = None,
|
|
98
100
|
ub: float | None = None,
|
|
99
|
-
maxeval: int | None =
|
|
101
|
+
maxeval: int | None = None, # None can stall on some algos and because they are threaded C you can't even interrupt them
|
|
100
102
|
stopval: float | None = None,
|
|
101
103
|
ftol_rel: float | None = None,
|
|
102
104
|
ftol_abs: float | None = None,
|
|
103
105
|
xtol_rel: float | None = None,
|
|
104
106
|
xtol_abs: float | None = None,
|
|
105
107
|
maxtime: float | None = None,
|
|
108
|
+
require_criterion: bool = True,
|
|
106
109
|
):
|
|
110
|
+
if require_criterion:
|
|
111
|
+
if all(i is None for i in (maxeval, stopval, ftol_abs, ftol_rel, xtol_abs, xtol_rel)):
|
|
112
|
+
raise RuntimeError(
|
|
113
|
+
"Specify at least one stopping criterion out of "
|
|
114
|
+
"(maxeval, stopval, ftol_rel, ftol_abs, xtol_rel, xtol_abs, maxtime). "
|
|
115
|
+
"Pass `require_criterion=False` to suppress this error."
|
|
116
|
+
)
|
|
117
|
+
|
|
107
118
|
defaults = dict(lb=lb, ub=ub)
|
|
108
119
|
super().__init__(params, defaults)
|
|
109
120
|
|
|
110
121
|
self.opt: nlopt.opt | None = None
|
|
122
|
+
self.algorithm_name: str | int = algorithm
|
|
111
123
|
if isinstance(algorithm, str): algorithm = getattr(nlopt, algorithm.upper())
|
|
112
124
|
self.algorithm: int = algorithm # type:ignore
|
|
113
|
-
self.algorithm_name: str | None = None
|
|
114
125
|
|
|
115
126
|
self.maxeval = maxeval; self.stopval = stopval
|
|
116
127
|
self.ftol_rel = ftol_rel; self.ftol_abs = ftol_abs
|
|
@@ -119,7 +130,7 @@ class NLOptWrapper(Optimizer):
|
|
|
119
130
|
|
|
120
131
|
self._last_loss = None
|
|
121
132
|
|
|
122
|
-
def
|
|
133
|
+
def _objective(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
|
|
123
134
|
if self.raised:
|
|
124
135
|
if self.opt is not None: self.opt.force_stop()
|
|
125
136
|
return np.inf
|
|
@@ -132,7 +143,7 @@ class NLOptWrapper(Optimizer):
|
|
|
132
143
|
if grad.size > 0:
|
|
133
144
|
with torch.enable_grad(): loss = closure()
|
|
134
145
|
self._last_loss = _ensure_float(loss)
|
|
135
|
-
grad[:] = params.
|
|
146
|
+
grad[:] = params.grad.fill_none_(reference=params).to_vec().reshape(grad.shape).numpy(force=True)
|
|
136
147
|
return self._last_loss
|
|
137
148
|
|
|
138
149
|
self._last_loss = _ensure_float(closure(False))
|
|
@@ -147,25 +158,20 @@ class NLOptWrapper(Optimizer):
|
|
|
147
158
|
def step(self, closure: Closure): # pylint: disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
|
|
148
159
|
self.e = None
|
|
149
160
|
self.raised = False
|
|
150
|
-
params = self.
|
|
151
|
-
|
|
152
|
-
# make bounds
|
|
153
|
-
lb, ub = self.group_vals('lb', 'ub', cls=list)
|
|
154
|
-
lower = []
|
|
155
|
-
upper = []
|
|
156
|
-
for p, l, u in zip(params, lb, ub):
|
|
157
|
-
if l is None: l = -inf
|
|
158
|
-
if u is None: u = inf
|
|
159
|
-
lower.extend([l] * p.numel())
|
|
160
|
-
upper.extend([u] * p.numel())
|
|
161
|
+
params = TensorList(self._get_params())
|
|
162
|
+
x0 = params.to_vec().numpy(force=True)
|
|
161
163
|
|
|
162
|
-
|
|
164
|
+
plb, pub = self._get_per_parameter_lb_ub()
|
|
165
|
+
if all(i is None for i in plb) and all(i is None for i in pub):
|
|
166
|
+
lb = ub = None
|
|
167
|
+
else:
|
|
168
|
+
lb, ub = self._get_lb_ub(ld = {None: -np.inf}, ud = {None: np.inf})
|
|
163
169
|
|
|
164
170
|
self.opt = nlopt.opt(self.algorithm, x0.size)
|
|
165
171
|
self.opt.set_exceptions_enabled(False) # required
|
|
166
|
-
self.opt.set_min_objective(partial(self.
|
|
167
|
-
self.opt.set_lower_bounds(
|
|
168
|
-
self.opt.set_upper_bounds(
|
|
172
|
+
self.opt.set_min_objective(partial(self._objective, closure = closure, params = params))
|
|
173
|
+
if lb is not None: self.opt.set_lower_bounds(np.asarray(lb, dtype=x0.dtype))
|
|
174
|
+
if ub is not None: self.opt.set_upper_bounds(np.asarray(ub, dtype=x0.dtype))
|
|
169
175
|
|
|
170
176
|
if self.maxeval is not None: self.opt.set_maxeval(self.maxeval)
|
|
171
177
|
if self.stopval is not None: self.opt.set_stopval(self.stopval)
|
|
@@ -179,12 +185,12 @@ class NLOptWrapper(Optimizer):
|
|
|
179
185
|
x = None
|
|
180
186
|
try:
|
|
181
187
|
x = self.opt.optimize(x0)
|
|
182
|
-
except SystemError:
|
|
183
|
-
|
|
188
|
+
# except SystemError as s:
|
|
189
|
+
# warnings.warn(f"{self.algorithm_name} raised {s}")
|
|
184
190
|
except Exception as e:
|
|
185
191
|
raise e from None
|
|
186
192
|
|
|
187
|
-
if x is not None: params.from_vec_(torch.
|
|
193
|
+
if x is not None: params.from_vec_(torch.as_tensor(x, device = params[0].device, dtype=params[0].dtype))
|
|
188
194
|
if self.e is not None: raise self.e from None
|
|
189
195
|
|
|
190
196
|
if self._last_loss is None or x is None: return closure(False)
|
|
@@ -1,23 +1,16 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections import abc
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
1
|
+
import optuna
|
|
5
2
|
import torch
|
|
6
3
|
|
|
7
|
-
import
|
|
4
|
+
from ...utils import TensorList, tofloat, totensor
|
|
5
|
+
from .wrapper import WrapperBase
|
|
8
6
|
|
|
9
|
-
from ...utils import Optimizer, totensor, tofloat
|
|
10
7
|
|
|
11
8
|
def silence_optuna():
|
|
12
9
|
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
|
13
10
|
|
|
14
|
-
def _ensure_float(x) -> float:
|
|
15
|
-
if isinstance(x, torch.Tensor): return x.detach().cpu().item()
|
|
16
|
-
if isinstance(x, np.ndarray): return float(x.item())
|
|
17
|
-
return float(x)
|
|
18
11
|
|
|
19
12
|
|
|
20
|
-
class OptunaSampler(
|
|
13
|
+
class OptunaSampler(WrapperBase):
|
|
21
14
|
"""Optimize your next SOTA model using hyperparameter optimization.
|
|
22
15
|
|
|
23
16
|
Note - optuna is surprisingly scalable to large number of parameters (up to 10,000), despite literally requiring a for-loop because it only supports scalars. Default TPESampler is good for BBO. Maybe not for NNs...
|
|
@@ -38,7 +31,7 @@ class OptunaSampler(Optimizer):
|
|
|
38
31
|
silence: bool = True,
|
|
39
32
|
):
|
|
40
33
|
if silence: silence_optuna()
|
|
41
|
-
super().__init__(params, lb=lb, ub=ub)
|
|
34
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
42
35
|
|
|
43
36
|
if isinstance(sampler, type): sampler = sampler()
|
|
44
37
|
self.sampler = sampler
|
|
@@ -47,7 +40,7 @@ class OptunaSampler(Optimizer):
|
|
|
47
40
|
@torch.no_grad
|
|
48
41
|
def step(self, closure):
|
|
49
42
|
|
|
50
|
-
params = self.
|
|
43
|
+
params = TensorList(self._get_params())
|
|
51
44
|
if self.study is None:
|
|
52
45
|
self.study = optuna.create_study(sampler=self.sampler)
|
|
53
46
|
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import torch
|
|
7
|
+
import pybobyqa
|
|
8
|
+
|
|
9
|
+
from ...utils import TensorList
|
|
10
|
+
from .wrapper import WrapperBase
|
|
11
|
+
|
|
12
|
+
Closure = Callable[[bool], Any]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PyBobyqaWrapper(WrapperBase):
|
|
16
|
+
"""Use Py-BOBYQA is PyTorch optimizer.
|
|
17
|
+
|
|
18
|
+
Note that this performs full minimization on each step,
|
|
19
|
+
so usually you would want to perform a single step, although performing multiple steps will refine the
|
|
20
|
+
solution.
|
|
21
|
+
|
|
22
|
+
See https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/userguide.html for detailed descriptions of arguments.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
params (Iterable): iterable of parameters to optimize or dicts defining parameter groups.
|
|
26
|
+
lb (float | None, optional): optional lower bounds. Defaults to None.
|
|
27
|
+
ub (float | None, optional): optional upper bounds. Defaults to None.
|
|
28
|
+
projections (list[Callable] | None, optional):
|
|
29
|
+
a list of functions defining the Euclidean projections for each general convex constraint C_i.
|
|
30
|
+
Each element of the list projections is a function that takes an input vector x (numpy array)
|
|
31
|
+
and returns the closest point to that is in C_i. Defaults to None.
|
|
32
|
+
npt (int | None, optional): the number of interpolation points to use. Defaults to None.
|
|
33
|
+
rhobeg (float | None, optional):
|
|
34
|
+
the initial value of the trust region radius. Defaults to None.
|
|
35
|
+
rhoend (float | None, optional):
|
|
36
|
+
minimum allowed value of trust region radius, which determines when a successful
|
|
37
|
+
termination occurs. Defaults to 1e-8.
|
|
38
|
+
maxfun (int | None, optional):
|
|
39
|
+
the maximum number of objective evaluations the algorithm may request,
|
|
40
|
+
default is min(100(n+1), 1000). Defaults to None.
|
|
41
|
+
nsamples (Callable | None, optional):
|
|
42
|
+
a Python function nsamples(delta, rho, iter, nrestarts)
|
|
43
|
+
which returns the number of times to evaluate objfun at a given point.
|
|
44
|
+
This is only applicable for objectives with stochastic noise,
|
|
45
|
+
when averaging multiple evaluations at the same point produces a more accurate value.
|
|
46
|
+
The input parameters are the trust region radius (delta),
|
|
47
|
+
the lower bound on the trust region radius (rho),
|
|
48
|
+
how many iterations the algorithm has been running for (iter),
|
|
49
|
+
and how many restarts have been performed (nrestarts).
|
|
50
|
+
Default is no averaging (i.e. nsamples(delta, rho, iter, nrestarts)=1).
|
|
51
|
+
Defaults to None.
|
|
52
|
+
user_params (dict | None, optional):
|
|
53
|
+
dictionary of advanced parameters,
|
|
54
|
+
see https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/advanced.html).
|
|
55
|
+
Defaults to None.
|
|
56
|
+
objfun_has_noise (bool, optional):
|
|
57
|
+
a flag to indicate whether or not objfun has stochastic noise;
|
|
58
|
+
i.e. will calling objfun(x) multiple times at the same value of x give different results?
|
|
59
|
+
This is used to set some sensible default parameters (including using multiple restarts),
|
|
60
|
+
all of which can be overridden by the values provided in user_params. Defaults to False.
|
|
61
|
+
seek_global_minimum (bool, optional):
|
|
62
|
+
a flag to indicate whether to search for a global minimum, rather than a local minimum.
|
|
63
|
+
This is used to set some sensible default parameters,
|
|
64
|
+
all of which can be overridden by the values provided in user_params.
|
|
65
|
+
If True, both upper and lower bounds must be set.
|
|
66
|
+
Note that Py-BOBYQA only implements a heuristic method,
|
|
67
|
+
so there are no guarantees it will find a global minimum.
|
|
68
|
+
However, by using this flag, it is more likely to escape local minima
|
|
69
|
+
if there are better values nearby. The method used is a multiple restart mechanism,
|
|
70
|
+
where we repeatedly re-initialize Py-BOBYQA from the best point found so far,
|
|
71
|
+
but where we use a larger trust reigon radius each time
|
|
72
|
+
(note: this is different to more common multi-start approach to global optimization).
|
|
73
|
+
Defaults to False.
|
|
74
|
+
scaling_within_bounds (bool, optional):
|
|
75
|
+
a flag to indicate whether the algorithm should internally shift and scale the entries of x
|
|
76
|
+
so that the bounds become 0 <= x <= 1. This is useful is you are setting bounds and the
|
|
77
|
+
bounds have different orders of magnitude. If scaling_within_bounds=True,
|
|
78
|
+
the values of rhobeg and rhoend apply to the shifted variables. Defaults to False.
|
|
79
|
+
do_logging (bool, optional):
|
|
80
|
+
a flag to indicate whether logging output should be produced.
|
|
81
|
+
This is not automatically visible unless you use the Python logging module. Defaults to True.
|
|
82
|
+
print_progress (bool, optional):
|
|
83
|
+
a flag to indicate whether to print a per-iteration progress log to terminal. Defaults to False.
|
|
84
|
+
"""
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
params,
|
|
88
|
+
lb: float | None = None,
|
|
89
|
+
ub: float | None = None,
|
|
90
|
+
projections = None,
|
|
91
|
+
npt: int | None = None,
|
|
92
|
+
rhobeg: float | None = None,
|
|
93
|
+
rhoend: float = 1e-8,
|
|
94
|
+
maxfun: int | None = None,
|
|
95
|
+
nsamples: Callable | None | None = None,
|
|
96
|
+
user_params: dict[str, Any] | None = None,
|
|
97
|
+
objfun_has_noise: bool = False,
|
|
98
|
+
seek_global_minimum: bool = False,
|
|
99
|
+
scaling_within_bounds: bool = False,
|
|
100
|
+
do_logging: bool = True,
|
|
101
|
+
print_progress: bool = False,
|
|
102
|
+
):
|
|
103
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
104
|
+
kwargs = locals().copy()
|
|
105
|
+
for k in ["self", "__class__", "params", "lb", "ub"]:
|
|
106
|
+
del kwargs[k]
|
|
107
|
+
self._kwargs = kwargs
|
|
108
|
+
|
|
109
|
+
@torch.no_grad
|
|
110
|
+
def step(self, closure: Closure):
|
|
111
|
+
params = TensorList(self._get_params())
|
|
112
|
+
x0 = params.to_vec().numpy(force=True)
|
|
113
|
+
bounds = self._get_bounds()
|
|
114
|
+
|
|
115
|
+
soln: pybobyqa.solver.OptimResults = pybobyqa.solve(
|
|
116
|
+
objfun=partial(self._f, closure=closure, params=params),
|
|
117
|
+
x0=x0,
|
|
118
|
+
bounds=bounds,
|
|
119
|
+
**self._kwargs
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
params.from_vec_(torch.as_tensor(soln.x, device = params[0].device, dtype=params[0].dtype,))
|
|
123
|
+
return soln.f
|
|
124
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from .basin_hopping import ScipyBasinHopping
|
|
2
|
+
from .brute import ScipyBrute
|
|
3
|
+
from .differential_evolution import ScipyDE
|
|
4
|
+
from .direct import ScipyDIRECT
|
|
5
|
+
from .dual_annealing import ScipyDualAnnealing
|
|
6
|
+
from .minimize import ScipyMinimize
|
|
7
|
+
from .sgho import ScipySHGO
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import scipy.optimize
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
from ....utils import TensorList
|
|
10
|
+
from ..wrapper import WrapperBase
|
|
11
|
+
from .minimize import _use_jac_hess_hessp
|
|
12
|
+
|
|
13
|
+
Closure = Callable[[bool], Any]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ScipyBasinHopping(WrapperBase):
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
params,
|
|
20
|
+
niter: int = 100,
|
|
21
|
+
T: float = 1,
|
|
22
|
+
stepsize: float = 0.5,
|
|
23
|
+
minimizer_kwargs: dict | None = None,
|
|
24
|
+
take_step: Callable | None = None,
|
|
25
|
+
accept_test: Callable | None = None,
|
|
26
|
+
callback: Callable | None = None,
|
|
27
|
+
interval: int = 50,
|
|
28
|
+
disp: bool = False,
|
|
29
|
+
niter_success: int | None = None,
|
|
30
|
+
rng: int | np.random.Generator | None = None,
|
|
31
|
+
lb:float | None = None,
|
|
32
|
+
ub:float | None = None,
|
|
33
|
+
method: Literal['nelder-mead', 'powell', 'cg', 'bfgs', 'newton-cg',
|
|
34
|
+
'l-bfgs-b', 'tnc', 'cobyla', 'cobyqa', 'slsqp',
|
|
35
|
+
'trust-constr', 'dogleg', 'trust-ncg', 'trust-exact',
|
|
36
|
+
'trust-krylov'] | str | None = None,
|
|
37
|
+
jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
|
|
38
|
+
hess: Literal['2-point', '3-point', 'cs', 'autograd'] | scipy.optimize.HessianUpdateStrategy = 'autograd',
|
|
39
|
+
use_hessp: bool = True,
|
|
40
|
+
|
|
41
|
+
*,
|
|
42
|
+
target_accept_rate: float = 0.5,
|
|
43
|
+
stepwise_factor: float = 0.9
|
|
44
|
+
):
|
|
45
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
46
|
+
|
|
47
|
+
kwargs = locals().copy()
|
|
48
|
+
del kwargs['self'], kwargs['params'], kwargs['__class__'], kwargs["minimizer_kwargs"]
|
|
49
|
+
del kwargs['method'], kwargs["jac"], kwargs['hess'], kwargs['use_hessp']
|
|
50
|
+
del kwargs["lb"], kwargs["ub"]
|
|
51
|
+
self._kwargs = kwargs
|
|
52
|
+
|
|
53
|
+
self._minimizer_kwargs = minimizer_kwargs
|
|
54
|
+
self.method = method
|
|
55
|
+
self.hess = hess
|
|
56
|
+
self.jac, self.use_jac_autograd, self.use_hess_autograd, self.use_hessp = _use_jac_hess_hessp(method, jac, hess, use_hessp)
|
|
57
|
+
|
|
58
|
+
def _jac(self, x: np.ndarray, params: list[torch.Tensor], closure):
|
|
59
|
+
f,g = self._f_g(x, params, closure)
|
|
60
|
+
return g
|
|
61
|
+
|
|
62
|
+
def _objective(self, x: np.ndarray, params: list[torch.Tensor], closure):
|
|
63
|
+
if self.use_jac_autograd:
|
|
64
|
+
f, g = self._f_g(x, params, closure)
|
|
65
|
+
if self.method is not None and self.method.lower() == 'slsqp': g = g.astype(np.float64) # slsqp requires float64
|
|
66
|
+
return f, g
|
|
67
|
+
|
|
68
|
+
return self._f(x, params, closure)
|
|
69
|
+
|
|
70
|
+
def _hess(self, x: np.ndarray, params: list[torch.Tensor], closure):
|
|
71
|
+
f,g,H = self._f_g_H(x, params, closure)
|
|
72
|
+
return H
|
|
73
|
+
|
|
74
|
+
def _hessp(self, x: np.ndarray, p:np.ndarray, params: list[torch.Tensor], closure):
|
|
75
|
+
f,g,Hvp = self._f_g_Hvp(x, p, params, closure)
|
|
76
|
+
return Hvp
|
|
77
|
+
|
|
78
|
+
@torch.no_grad
|
|
79
|
+
def step(self, closure: Closure):
|
|
80
|
+
params = TensorList(self._get_params())
|
|
81
|
+
x0 = params.to_vec().numpy(force=True)
|
|
82
|
+
bounds = self._get_bounds()
|
|
83
|
+
|
|
84
|
+
# determine hess argument
|
|
85
|
+
hess = self.hess
|
|
86
|
+
hessp = None
|
|
87
|
+
if hess == 'autograd':
|
|
88
|
+
if self.use_hess_autograd:
|
|
89
|
+
if self.use_hessp:
|
|
90
|
+
hessp = partial(self._hessp, params=params, closure=closure)
|
|
91
|
+
hess = None
|
|
92
|
+
else:
|
|
93
|
+
hess = partial(self._hess, params=params, closure=closure)
|
|
94
|
+
# hess = 'autograd' but method doesn't use hess
|
|
95
|
+
else:
|
|
96
|
+
hess = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if self.method is not None and (self.method.lower() == 'tnc' or self.method.lower() == 'slsqp'):
|
|
100
|
+
x0 = x0.astype(np.float64) # those methods error without this
|
|
101
|
+
|
|
102
|
+
minimizer_kwargs = self._minimizer_kwargs.copy() if self._minimizer_kwargs is not None else {}
|
|
103
|
+
minimizer_kwargs.setdefault("method", self.method)
|
|
104
|
+
minimizer_kwargs.setdefault("jac", self.jac)
|
|
105
|
+
minimizer_kwargs.setdefault("hess", hess)
|
|
106
|
+
minimizer_kwargs.setdefault("hessp", hessp)
|
|
107
|
+
minimizer_kwargs.setdefault("bounds", bounds)
|
|
108
|
+
|
|
109
|
+
res = scipy.optimize.basinhopping(
|
|
110
|
+
partial(self._objective, params = params, closure = closure),
|
|
111
|
+
x0 = params.to_vec().numpy(force=True),
|
|
112
|
+
minimizer_kwargs=minimizer_kwargs,
|
|
113
|
+
**self._kwargs
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
|
|
117
|
+
return res.fun
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import scipy.optimize
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
from ....utils import TensorList
|
|
10
|
+
from ..wrapper import WrapperBase
|
|
11
|
+
|
|
12
|
+
Closure = Callable[[bool], Any]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ScipyBrute(WrapperBase):
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
params,
|
|
20
|
+
lb: float,
|
|
21
|
+
ub: float,
|
|
22
|
+
Ns: int = 20,
|
|
23
|
+
finish = scipy.optimize.fmin,
|
|
24
|
+
disp: bool = False,
|
|
25
|
+
workers: int = 1
|
|
26
|
+
):
|
|
27
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
28
|
+
|
|
29
|
+
kwargs = locals().copy()
|
|
30
|
+
del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
|
|
31
|
+
self._kwargs = kwargs
|
|
32
|
+
|
|
33
|
+
@torch.no_grad
|
|
34
|
+
def step(self, closure: Closure):
|
|
35
|
+
params = TensorList(self._get_params())
|
|
36
|
+
bounds = self._get_bounds()
|
|
37
|
+
assert bounds is not None
|
|
38
|
+
|
|
39
|
+
res,fval,grid,Jout = scipy.optimize.brute(
|
|
40
|
+
partial(self._f, params = params, closure = closure),
|
|
41
|
+
ranges=bounds,
|
|
42
|
+
full_output=True,
|
|
43
|
+
**self._kwargs
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
params.from_vec_(torch.as_tensor(res, device = params[0].device, dtype=params[0].dtype))
|
|
47
|
+
|
|
48
|
+
return fval
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import scipy.optimize
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
from ....utils import TensorList
|
|
10
|
+
from ..wrapper import WrapperBase
|
|
11
|
+
|
|
12
|
+
Closure = Callable[[bool], Any]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ScipyDE(WrapperBase):
|
|
19
|
+
"""Use scipy.minimize.differential_evolution as pytorch optimizer. Note that this performs full minimization on each step,
|
|
20
|
+
so usually you would want to perform a single step. This also requires bounds to be specified.
|
|
21
|
+
|
|
22
|
+
Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
|
|
23
|
+
for all other args.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
params: iterable of parameters to optimize or dicts defining parameter groups.
|
|
27
|
+
bounds (tuple[float,float], optional): tuple with lower and upper bounds.
|
|
28
|
+
DE requires bounds to be specified. Defaults to None.
|
|
29
|
+
|
|
30
|
+
other args:
|
|
31
|
+
refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
|
|
32
|
+
"""
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
params,
|
|
36
|
+
lb: float,
|
|
37
|
+
ub: float,
|
|
38
|
+
strategy: Literal['best1bin', 'best1exp', 'rand1bin', 'rand1exp', 'rand2bin', 'rand2exp',
|
|
39
|
+
'randtobest1bin', 'randtobest1exp', 'currenttobest1bin', 'currenttobest1exp',
|
|
40
|
+
'best2exp', 'best2bin'] = 'best1bin',
|
|
41
|
+
maxiter: int = 1000,
|
|
42
|
+
popsize: int = 15,
|
|
43
|
+
tol: float = 0.01,
|
|
44
|
+
mutation = (0.5, 1),
|
|
45
|
+
recombination: float = 0.7,
|
|
46
|
+
seed = None,
|
|
47
|
+
callback = None,
|
|
48
|
+
disp: bool = False,
|
|
49
|
+
polish: bool = True,
|
|
50
|
+
init: str = 'latinhypercube',
|
|
51
|
+
atol: int = 0,
|
|
52
|
+
updating: str = 'immediate',
|
|
53
|
+
workers: int = 1,
|
|
54
|
+
constraints = (),
|
|
55
|
+
*,
|
|
56
|
+
integrality = None,
|
|
57
|
+
|
|
58
|
+
):
|
|
59
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
60
|
+
|
|
61
|
+
kwargs = locals().copy()
|
|
62
|
+
del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
|
|
63
|
+
self._kwargs = kwargs
|
|
64
|
+
|
|
65
|
+
@torch.no_grad
|
|
66
|
+
def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
|
|
67
|
+
params = TensorList(self._get_params())
|
|
68
|
+
x0 = params.to_vec().numpy(force=True)
|
|
69
|
+
bounds = self._get_bounds()
|
|
70
|
+
assert bounds is not None
|
|
71
|
+
|
|
72
|
+
res = scipy.optimize.differential_evolution(
|
|
73
|
+
partial(self._f, params = params, closure = closure),
|
|
74
|
+
x0 = x0,
|
|
75
|
+
bounds=bounds,
|
|
76
|
+
**self._kwargs
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
|
|
80
|
+
return res.fun
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import scipy.optimize
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
from ....utils import TensorList
|
|
10
|
+
from ..wrapper import WrapperBase
|
|
11
|
+
|
|
12
|
+
Closure = Callable[[bool], Any]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ScipyDIRECT(WrapperBase):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
params,
|
|
21
|
+
lb: float,
|
|
22
|
+
ub: float,
|
|
23
|
+
maxfun: int | None = 1000,
|
|
24
|
+
maxiter: int = 1000,
|
|
25
|
+
eps: float = 0.0001,
|
|
26
|
+
locally_biased: bool = True,
|
|
27
|
+
f_min: float = -np.inf,
|
|
28
|
+
f_min_rtol: float = 0.0001,
|
|
29
|
+
vol_tol: float = 1e-16,
|
|
30
|
+
len_tol: float = 0.000001,
|
|
31
|
+
callback = None,
|
|
32
|
+
):
|
|
33
|
+
super().__init__(params, dict(lb=lb, ub=ub))
|
|
34
|
+
|
|
35
|
+
kwargs = locals().copy()
|
|
36
|
+
del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
|
|
37
|
+
self._kwargs = kwargs
|
|
38
|
+
|
|
39
|
+
def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
|
|
40
|
+
if self.raised: return np.inf
|
|
41
|
+
try:
|
|
42
|
+
return self._f(x, params, closure)
|
|
43
|
+
|
|
44
|
+
except Exception as e:
|
|
45
|
+
# this makes exceptions work in fcmaes and scipy direct
|
|
46
|
+
self.e = e
|
|
47
|
+
self.raised = True
|
|
48
|
+
return np.inf
|
|
49
|
+
|
|
50
|
+
@torch.no_grad
|
|
51
|
+
def step(self, closure: Closure):
|
|
52
|
+
self.raised = False
|
|
53
|
+
self.e = None
|
|
54
|
+
|
|
55
|
+
params = TensorList(self._get_params())
|
|
56
|
+
bounds = self._get_bounds()
|
|
57
|
+
assert bounds is not None
|
|
58
|
+
|
|
59
|
+
res = scipy.optimize.direct(
|
|
60
|
+
partial(self._objective, params=params, closure=closure),
|
|
61
|
+
bounds=bounds,
|
|
62
|
+
**self._kwargs
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
|
|
66
|
+
|
|
67
|
+
if self.e is not None: raise self.e from None
|
|
68
|
+
return res.fun
|
|
69
|
+
|