torchzero 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. tests/test_opts.py +55 -22
  2. tests/test_tensorlist.py +3 -3
  3. tests/test_vars.py +61 -61
  4. torchzero/core/__init__.py +2 -3
  5. torchzero/core/module.py +49 -49
  6. torchzero/core/transform.py +219 -158
  7. torchzero/modules/__init__.py +1 -0
  8. torchzero/modules/clipping/clipping.py +10 -10
  9. torchzero/modules/clipping/ema_clipping.py +14 -13
  10. torchzero/modules/clipping/growth_clipping.py +16 -18
  11. torchzero/modules/experimental/__init__.py +12 -3
  12. torchzero/modules/experimental/absoap.py +50 -156
  13. torchzero/modules/experimental/adadam.py +15 -14
  14. torchzero/modules/experimental/adamY.py +17 -27
  15. torchzero/modules/experimental/adasoap.py +20 -130
  16. torchzero/modules/experimental/curveball.py +12 -12
  17. torchzero/modules/experimental/diagonal_higher_order_newton.py +225 -0
  18. torchzero/modules/experimental/eigendescent.py +117 -0
  19. torchzero/modules/experimental/etf.py +172 -0
  20. torchzero/modules/experimental/gradmin.py +2 -2
  21. torchzero/modules/experimental/newton_solver.py +11 -11
  22. torchzero/modules/experimental/newtonnewton.py +88 -0
  23. torchzero/modules/experimental/reduce_outward_lr.py +8 -5
  24. torchzero/modules/experimental/soapy.py +19 -146
  25. torchzero/modules/experimental/spectral.py +79 -204
  26. torchzero/modules/experimental/structured_newton.py +111 -0
  27. torchzero/modules/experimental/subspace_preconditioners.py +13 -10
  28. torchzero/modules/experimental/tada.py +38 -0
  29. torchzero/modules/grad_approximation/fdm.py +2 -2
  30. torchzero/modules/grad_approximation/forward_gradient.py +5 -5
  31. torchzero/modules/grad_approximation/grad_approximator.py +21 -21
  32. torchzero/modules/grad_approximation/rfdm.py +28 -15
  33. torchzero/modules/higher_order/__init__.py +1 -0
  34. torchzero/modules/higher_order/higher_order_newton.py +256 -0
  35. torchzero/modules/line_search/backtracking.py +42 -23
  36. torchzero/modules/line_search/line_search.py +40 -40
  37. torchzero/modules/line_search/scipy.py +18 -3
  38. torchzero/modules/line_search/strong_wolfe.py +21 -32
  39. torchzero/modules/line_search/trust_region.py +18 -6
  40. torchzero/modules/lr/__init__.py +1 -1
  41. torchzero/modules/lr/{step_size.py → adaptive.py} +22 -26
  42. torchzero/modules/lr/lr.py +20 -16
  43. torchzero/modules/momentum/averaging.py +25 -10
  44. torchzero/modules/momentum/cautious.py +73 -35
  45. torchzero/modules/momentum/ema.py +92 -41
  46. torchzero/modules/momentum/experimental.py +21 -13
  47. torchzero/modules/momentum/matrix_momentum.py +96 -54
  48. torchzero/modules/momentum/momentum.py +24 -4
  49. torchzero/modules/ops/accumulate.py +51 -21
  50. torchzero/modules/ops/binary.py +36 -36
  51. torchzero/modules/ops/debug.py +7 -7
  52. torchzero/modules/ops/misc.py +128 -129
  53. torchzero/modules/ops/multi.py +19 -19
  54. torchzero/modules/ops/reduce.py +16 -16
  55. torchzero/modules/ops/split.py +26 -26
  56. torchzero/modules/ops/switch.py +4 -4
  57. torchzero/modules/ops/unary.py +20 -20
  58. torchzero/modules/ops/utility.py +37 -37
  59. torchzero/modules/optimizers/adagrad.py +33 -24
  60. torchzero/modules/optimizers/adam.py +31 -34
  61. torchzero/modules/optimizers/lion.py +4 -4
  62. torchzero/modules/optimizers/muon.py +6 -6
  63. torchzero/modules/optimizers/orthograd.py +4 -5
  64. torchzero/modules/optimizers/rmsprop.py +13 -16
  65. torchzero/modules/optimizers/rprop.py +52 -49
  66. torchzero/modules/optimizers/shampoo.py +17 -23
  67. torchzero/modules/optimizers/soap.py +12 -19
  68. torchzero/modules/optimizers/sophia_h.py +13 -13
  69. torchzero/modules/projections/dct.py +4 -4
  70. torchzero/modules/projections/fft.py +6 -6
  71. torchzero/modules/projections/galore.py +1 -1
  72. torchzero/modules/projections/projection.py +57 -57
  73. torchzero/modules/projections/structural.py +17 -17
  74. torchzero/modules/quasi_newton/__init__.py +33 -4
  75. torchzero/modules/quasi_newton/cg.py +76 -26
  76. torchzero/modules/quasi_newton/experimental/modular_lbfgs.py +24 -24
  77. torchzero/modules/quasi_newton/lbfgs.py +15 -15
  78. torchzero/modules/quasi_newton/lsr1.py +18 -17
  79. torchzero/modules/quasi_newton/olbfgs.py +19 -19
  80. torchzero/modules/quasi_newton/quasi_newton.py +257 -48
  81. torchzero/modules/second_order/newton.py +38 -21
  82. torchzero/modules/second_order/newton_cg.py +13 -12
  83. torchzero/modules/second_order/nystrom.py +19 -19
  84. torchzero/modules/smoothing/gaussian.py +21 -21
  85. torchzero/modules/smoothing/laplacian.py +7 -9
  86. torchzero/modules/weight_decay/__init__.py +1 -1
  87. torchzero/modules/weight_decay/weight_decay.py +43 -9
  88. torchzero/modules/wrappers/optim_wrapper.py +11 -11
  89. torchzero/optim/wrappers/directsearch.py +244 -0
  90. torchzero/optim/wrappers/fcmaes.py +97 -0
  91. torchzero/optim/wrappers/mads.py +90 -0
  92. torchzero/optim/wrappers/nevergrad.py +4 -4
  93. torchzero/optim/wrappers/nlopt.py +28 -14
  94. torchzero/optim/wrappers/optuna.py +70 -0
  95. torchzero/optim/wrappers/scipy.py +162 -13
  96. torchzero/utils/__init__.py +2 -6
  97. torchzero/utils/derivatives.py +2 -1
  98. torchzero/utils/optimizer.py +55 -74
  99. torchzero/utils/python_tools.py +17 -4
  100. {torchzero-0.3.8.dist-info → torchzero-0.3.10.dist-info}/METADATA +14 -14
  101. torchzero-0.3.10.dist-info/RECORD +139 -0
  102. {torchzero-0.3.8.dist-info → torchzero-0.3.10.dist-info}/WHEEL +1 -1
  103. torchzero/core/preconditioner.py +0 -138
  104. torchzero/modules/experimental/algebraic_newton.py +0 -145
  105. torchzero/modules/experimental/tropical_newton.py +0 -136
  106. torchzero-0.3.8.dist-info/RECORD +0 -130
  107. {torchzero-0.3.8.dist-info → torchzero-0.3.10.dist-info}/licenses/LICENSE +0 -0
  108. {torchzero-0.3.8.dist-info → torchzero-0.3.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,97 @@
1
+ from collections.abc import Callable
2
+ from functools import partial
3
+ from typing import Any, Literal
4
+
5
+ import fcmaes
6
+ import fcmaes.optimizer
7
+ import fcmaes.retry
8
+ import numpy as np
9
+ import torch
10
+
11
+ from ...utils import Optimizer, TensorList
12
+
13
+ Closure = Callable[[bool], Any]
14
+
15
+
16
+ def _ensure_float(x) -> float:
17
+ if isinstance(x, torch.Tensor): return x.detach().cpu().item()
18
+ if isinstance(x, np.ndarray): return float(x.item())
19
+ return float(x)
20
+
21
+ def silence_fcmaes():
22
+ fcmaes.retry.logger.disable('fcmaes')
23
+
24
+ class FcmaesWrapper(Optimizer):
25
+ """Use fcmaes as pytorch optimizer. Particularly fcmaes has BITEOPT which appears to win in many benchmarks.
26
+
27
+ Note that this performs full minimization on each step, so only perform one step with this.
28
+
29
+ Args:
30
+ params (_type_): _description_
31
+ lb (float): _description_
32
+ ub (float): _description_
33
+ optimizer (fcmaes.optimizer.Optimizer | None, optional): _description_. Defaults to None.
34
+ max_evaluations (int | None, optional): _description_. Defaults to 50000.
35
+ value_limit (float | None, optional): _description_. Defaults to np.inf.
36
+ num_retries (int | None, optional): _description_. Defaults to 1.
37
+ workers (int, optional): _description_. Defaults to 1.
38
+ popsize (int | None, optional): _description_. Defaults to 31.
39
+ capacity (int | None, optional): _description_. Defaults to 500.
40
+ stop_fitness (float | None, optional): _description_. Defaults to -np.inf.
41
+ statistic_num (int | None, optional): _description_. Defaults to 0.
42
+ """
43
+ def __init__(
44
+ self,
45
+ params,
46
+ lb: float,
47
+ ub: float,
48
+ optimizer: fcmaes.optimizer.Optimizer | None = None,
49
+ max_evaluations: int | None = 50000,
50
+ value_limit: float | None = np.inf,
51
+ num_retries: int | None = 1,
52
+ workers: int = 1,
53
+ popsize: int | None = 31,
54
+ capacity: int | None = 500,
55
+ stop_fitness: float | None = -np.inf,
56
+ statistic_num: int | None = 0
57
+ ):
58
+ super().__init__(params, lb=lb, ub=ub)
59
+ silence_fcmaes()
60
+ kwargs = locals().copy()
61
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
62
+ self._kwargs = kwargs
63
+
64
+ def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
65
+ if self.raised: return np.inf
66
+ try:
67
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
68
+ return _ensure_float(closure(False))
69
+ except Exception as e:
70
+ # ha ha, I found a way to make exceptions work in fcmaes and scipy direct
71
+ self.e = e
72
+ self.raised = True
73
+ return np.inf
74
+
75
+ @torch.no_grad
76
+ def step(self, closure: Closure):
77
+ self.raised = False
78
+ self.e = None
79
+
80
+ params = self.get_params()
81
+
82
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
83
+ bounds = []
84
+ for p, l, u in zip(params, lb, ub):
85
+ bounds.extend([[l, u]] * p.numel())
86
+
87
+ res = fcmaes.retry.minimize(
88
+ partial(self._objective, params=params, closure=closure), # pyright:ignore[reportArgumentType]
89
+ bounds=bounds, # pyright:ignore[reportArgumentType]
90
+ **self._kwargs
91
+ )
92
+
93
+ params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
94
+
95
+ if self.e is not None: raise self.e from None
96
+ return res.fun
97
+
@@ -0,0 +1,90 @@
1
+ from collections.abc import Callable
2
+ from functools import partial
3
+ from typing import Any, Literal
4
+
5
+ import numpy as np
6
+ import torch
7
+ from mads.mads import orthomads
8
+
9
+ from ...utils import Optimizer, TensorList
10
+
11
+
12
+ def _ensure_float(x):
13
+ if isinstance(x, torch.Tensor): return x.detach().cpu().item()
14
+ if isinstance(x, np.ndarray): return x.item()
15
+ return float(x)
16
+
17
+ def _ensure_numpy(x):
18
+ if isinstance(x, torch.Tensor): return x.detach().cpu()
19
+ if isinstance(x, np.ndarray): return x
20
+ return np.array(x)
21
+
22
+
23
+ Closure = Callable[[bool], Any]
24
+
25
+
26
+ class MADS(Optimizer):
27
+ """Use mads.orthomads as pytorch optimizer.
28
+
29
+ Note that this performs full minimization on each step,
30
+ so usually you would want to perform a single step, although performing multiple steps will refine the
31
+ solution.
32
+
33
+ Args:
34
+ params (params): params
35
+ lb (float): lower bounds
36
+ ub (float): upper bounds
37
+ dp (float, optional): Initial poll size as percent of bounds. Defaults to 0.1.
38
+ dm (float, optional): Initial mesh size as percent of bounds. Defaults to 0.01.
39
+ dp_tol (_type_, optional): Minimum poll size stopping criteria. Defaults to -float('inf').
40
+ nitermax (_type_, optional): Maximum objective function evaluations. Defaults to float('inf').
41
+ displog (bool, optional): whether to show log. Defaults to False.
42
+ savelog (bool, optional): whether to save log. Defaults to False.
43
+
44
+ """
45
+ def __init__(
46
+ self,
47
+ params,
48
+ lb: float,
49
+ ub: float,
50
+ dp = 0.1,
51
+ dm = 0.01,
52
+ dp_tol = -float('inf'),
53
+ nitermax = float('inf'),
54
+ displog = False,
55
+ savelog = False,
56
+ ):
57
+ super().__init__(params, lb=lb, ub=ub)
58
+
59
+ kwargs = locals().copy()
60
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
61
+ self._kwargs = kwargs
62
+
63
+ def _objective(self, x: np.ndarray, params: TensorList, closure):
64
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
65
+ return _ensure_float(closure(False))
66
+
67
+ @torch.no_grad
68
+ def step(self, closure: Closure):
69
+ params = self.get_params()
70
+
71
+ x0 = params.to_vec().detach().cpu().numpy()
72
+
73
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
74
+ bounds_lower = []
75
+ bounds_upper = []
76
+ for p, l, u in zip(params, lb, ub):
77
+ bounds_lower.extend([l] * p.numel())
78
+ bounds_upper.extend([u] * p.numel())
79
+
80
+ f, x = orthomads(
81
+ design_variables=x0,
82
+ bounds_upper=np.asarray(bounds_upper),
83
+ bounds_lower=np.asarray(bounds_lower),
84
+ objective_function=partial(self._objective, params = params, closure = closure),
85
+ **self._kwargs
86
+ )
87
+
88
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
89
+ return f
90
+
@@ -9,12 +9,12 @@ import nevergrad as ng
9
9
  from ...utils import Optimizer
10
10
 
11
11
 
12
- def _ensure_float(x):
12
+ def _ensure_float(x) -> float:
13
13
  if isinstance(x, torch.Tensor): return x.detach().cpu().item()
14
- if isinstance(x, np.ndarray): return x.item()
14
+ if isinstance(x, np.ndarray): return float(x.item())
15
15
  return float(x)
16
16
 
17
- class NevergradOptimizer(Optimizer):
17
+ class NevergradWrapper(Optimizer):
18
18
  """Use nevergrad optimizer as pytorch optimizer.
19
19
  Note that it is recommended to specify `budget` to the number of iterations you expect to run,
20
20
  as some nevergrad optimizers will error without it.
@@ -56,7 +56,7 @@ class NevergradOptimizer(Optimizer):
56
56
  self.budget = budget
57
57
 
58
58
  @torch.no_grad
59
- def step(self, closure): # type:ignore # pylint:disable=signature-differs
59
+ def step(self, closure): # pylint:disable=signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
60
60
  params = self.get_params()
61
61
  if self.opt is None:
62
62
  ng_params = []
@@ -69,7 +69,7 @@ def _ensure_tensor(x):
69
69
  inf = float('inf')
70
70
  Closure = Callable[[bool], Any]
71
71
 
72
- class NLOptOptimizer(Optimizer):
72
+ class NLOptWrapper(Optimizer):
73
73
  """Use nlopt as pytorch optimizer, with gradient supplied by pytorch autograd.
74
74
  Note that this performs full minimization on each step,
75
75
  so usually you would want to perform a single step, although performing multiple steps will refine the
@@ -96,9 +96,9 @@ class NLOptOptimizer(Optimizer):
96
96
  self,
97
97
  params,
98
98
  algorithm: int | _ALGOS_LITERAL,
99
- maxeval: int | None,
100
99
  lb: float | None = None,
101
100
  ub: float | None = None,
101
+ maxeval: int | None = 10000, # None can stall on some algos and because they are threaded C you can't even interrupt them
102
102
  stopval: float | None = None,
103
103
  ftol_rel: float | None = None,
104
104
  ftol_abs: float | None = None,
@@ -122,22 +122,33 @@ class NLOptOptimizer(Optimizer):
122
122
  self._last_loss = None
123
123
 
124
124
  def _f(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
125
- t = _ensure_tensor(x)
126
- if t is None:
125
+ if self.raised:
127
126
  if self.opt is not None: self.opt.force_stop()
128
- return None
129
- params.from_vec_(t.to(params[0], copy=False))
130
- if grad.size > 0:
131
- with torch.enable_grad(): loss = closure()
132
- self._last_loss = _ensure_float(loss)
133
- grad[:] = params.ensure_grad_().grad.to_vec().reshape(grad.shape).detach().cpu().numpy()
127
+ return np.inf
128
+ try:
129
+ t = _ensure_tensor(x)
130
+ if t is None:
131
+ if self.opt is not None: self.opt.force_stop()
132
+ return None
133
+ params.from_vec_(t.to(params[0], copy=False))
134
+ if grad.size > 0:
135
+ with torch.enable_grad(): loss = closure()
136
+ self._last_loss = _ensure_float(loss)
137
+ grad[:] = params.ensure_grad_().grad.to_vec().reshape(grad.shape).detach().cpu().numpy()
138
+ return self._last_loss
139
+
140
+ self._last_loss = _ensure_float(closure(False))
134
141
  return self._last_loss
135
-
136
- self._last_loss = _ensure_float(closure(False))
137
- return self._last_loss
142
+ except Exception as e:
143
+ self.e = e
144
+ self.raised = True
145
+ if self.opt is not None: self.opt.force_stop()
146
+ return np.inf
138
147
 
139
148
  @torch.no_grad
140
149
  def step(self, closure: Closure): # pylint: disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
150
+ self.e = None
151
+ self.raised = False
141
152
  params = self.get_params()
142
153
 
143
154
  # make bounds
@@ -175,6 +186,9 @@ class NLOptOptimizer(Optimizer):
175
186
  except Exception as e:
176
187
  raise e from None
177
188
 
189
+ if x is not None: params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
190
+ if self.e is not None: raise self.e from None
191
+
178
192
  if self._last_loss is None or x is None: return closure(False)
179
- params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
193
+
180
194
  return self._last_loss
@@ -0,0 +1,70 @@
1
+ import typing
2
+ from collections import abc
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ import optuna
8
+
9
+ from ...utils import Optimizer
10
+
11
+ def silence_optuna():
12
+ optuna.logging.set_verbosity(optuna.logging.WARNING)
13
+
14
+ def _ensure_float(x) -> float:
15
+ if isinstance(x, torch.Tensor): return x.detach().cpu().item()
16
+ if isinstance(x, np.ndarray): return float(x.item())
17
+ return float(x)
18
+
19
+
20
+ class OptunaSampler(Optimizer):
21
+ """Optimize your next SOTA model using hyperparameter optimization.
22
+
23
+ Note - optuna is surprisingly scalable to large number of parameters (up to 10,000), despite literally requiring a for-loop because it only supports scalars. Default TPESampler is good for BBO. Maybe not for NNs...
24
+
25
+ Args:
26
+ params (_type_): parameters
27
+ lb (float): lower bounds.
28
+ ub (float): upper bounds.
29
+ sampler (optuna.samplers.BaseSampler | type[optuna.samplers.BaseSampler] | None, optional): sampler. Defaults to None.
30
+ silence (bool, optional): makes optuna not write a lot of very useful information to console. Defaults to True.
31
+ """
32
+ def __init__(
33
+ self,
34
+ params,
35
+ lb: float,
36
+ ub: float,
37
+ sampler: "optuna.samplers.BaseSampler | type[optuna.samplers.BaseSampler] | None" = None,
38
+ silence: bool = True,
39
+ ):
40
+ if silence: silence_optuna()
41
+ super().__init__(params, lb=lb, ub=ub)
42
+
43
+ if isinstance(sampler, type): sampler = sampler()
44
+ self.sampler = sampler
45
+ self.study = None
46
+
47
+ @torch.no_grad
48
+ def step(self, closure):
49
+
50
+ params = self.get_params()
51
+ if self.study is None:
52
+ self.study = optuna.create_study(sampler=self.sampler)
53
+
54
+ # some optuna samplers use torch
55
+ with torch.enable_grad():
56
+ trial = self.study.ask()
57
+
58
+ suggested = []
59
+ for gi,g in enumerate(self.param_groups):
60
+ for pi,p in enumerate(g['params']):
61
+ lb, ub = g['lb'], g['ub']
62
+ suggested.extend(trial.suggest_float(f'g{gi}_p{pi}_w{i}', lb, ub) for i in range(p.numel()))
63
+
64
+ vec = torch.as_tensor(suggested).to(params[0])
65
+ params.from_vec_(vec)
66
+
67
+ loss = closure()
68
+ with torch.enable_grad(): self.study.tell(trial, loss)
69
+
70
+ return loss
@@ -11,9 +11,9 @@ from ...utils import Optimizer, TensorList
11
11
  from ...utils.derivatives import jacobian_and_hessian_mat_wrt, jacobian_wrt
12
12
  from ...modules.second_order.newton import tikhonov_
13
13
 
14
- def _ensure_float(x):
14
+ def _ensure_float(x) -> float:
15
15
  if isinstance(x, torch.Tensor): return x.detach().cpu().item()
16
- if isinstance(x, np.ndarray): return x.item()
16
+ if isinstance(x, np.ndarray): return float(x.item())
17
17
  return float(x)
18
18
 
19
19
  def _ensure_numpy(x):
@@ -265,7 +265,8 @@ class ScipyDE(Optimizer):
265
265
  def __init__(
266
266
  self,
267
267
  params,
268
- bounds: tuple[float,float],
268
+ lb: float,
269
+ ub: float,
269
270
  strategy: Literal['best1bin', 'best1exp', 'rand1bin', 'rand1exp', 'rand2bin', 'rand2exp',
270
271
  'randtobest1bin', 'randtobest1exp', 'currenttobest1bin', 'currenttobest1exp',
271
272
  'best2exp', 'best2bin'] = 'best1bin',
@@ -287,12 +288,11 @@ class ScipyDE(Optimizer):
287
288
  integrality = None,
288
289
 
289
290
  ):
290
- super().__init__(params, {})
291
+ super().__init__(params, lb=lb, ub=ub)
291
292
 
292
293
  kwargs = locals().copy()
293
- del kwargs['self'], kwargs['params'], kwargs['bounds'], kwargs['__class__']
294
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
294
295
  self._kwargs = kwargs
295
- self._lb, self._ub = bounds
296
296
 
297
297
  def _objective(self, x: np.ndarray, params: TensorList, closure):
298
298
  params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
@@ -303,7 +303,11 @@ class ScipyDE(Optimizer):
303
303
  params = self.get_params()
304
304
 
305
305
  x0 = params.to_vec().detach().cpu().numpy()
306
- bounds = [(self._lb, self._ub)] * len(x0)
306
+
307
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
308
+ bounds = []
309
+ for p, l, u in zip(params, lb, ub):
310
+ bounds.extend([(l, u)] * p.numel())
307
311
 
308
312
  res = scipy.optimize.differential_evolution(
309
313
  partial(self._objective, params = params, closure = closure),
@@ -321,7 +325,8 @@ class ScipyDualAnnealing(Optimizer):
321
325
  def __init__(
322
326
  self,
323
327
  params,
324
- bounds: tuple[float, float],
328
+ lb: float,
329
+ ub: float,
325
330
  maxiter=1000,
326
331
  minimizer_kwargs=None,
327
332
  initial_temp=5230.0,
@@ -332,23 +337,25 @@ class ScipyDualAnnealing(Optimizer):
332
337
  rng=None,
333
338
  no_local_search=False,
334
339
  ):
335
- super().__init__(params, {})
340
+ super().__init__(params, lb=lb, ub=ub)
336
341
 
337
342
  kwargs = locals().copy()
338
- del kwargs['self'], kwargs['params'], kwargs['bounds'], kwargs['__class__']
343
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
339
344
  self._kwargs = kwargs
340
- self._lb, self._ub = bounds
341
345
 
342
346
  def _objective(self, x: np.ndarray, params: TensorList, closure):
343
347
  params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
344
348
  return _ensure_float(closure(False))
345
349
 
346
350
  @torch.no_grad
347
- def step(self, closure: Closure):# pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
351
+ def step(self, closure: Closure):
348
352
  params = self.get_params()
349
353
 
350
354
  x0 = params.to_vec().detach().cpu().numpy()
351
- bounds = [(self._lb, self._ub)] * len(x0)
355
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
356
+ bounds = []
357
+ for p, l, u in zip(params, lb, ub):
358
+ bounds.extend([(l, u)] * p.numel())
352
359
 
353
360
  res = scipy.optimize.dual_annealing(
354
361
  partial(self._objective, params = params, closure = closure),
@@ -360,3 +367,145 @@ class ScipyDualAnnealing(Optimizer):
360
367
  params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
361
368
  return res.fun
362
369
 
370
+
371
+
372
+ class ScipySHGO(Optimizer):
373
+ def __init__(
374
+ self,
375
+ params,
376
+ lb: float,
377
+ ub: float,
378
+ constraints = None,
379
+ n: int = 100,
380
+ iters: int = 1,
381
+ callback = None,
382
+ minimizer_kwargs = None,
383
+ options = None,
384
+ sampling_method: str = 'simplicial',
385
+ ):
386
+ super().__init__(params, lb=lb, ub=ub)
387
+
388
+ kwargs = locals().copy()
389
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
390
+ self._kwargs = kwargs
391
+
392
+ def _objective(self, x: np.ndarray, params: TensorList, closure):
393
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
394
+ return _ensure_float(closure(False))
395
+
396
+ @torch.no_grad
397
+ def step(self, closure: Closure):
398
+ params = self.get_params()
399
+
400
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
401
+ bounds = []
402
+ for p, l, u in zip(params, lb, ub):
403
+ bounds.extend([(l, u)] * p.numel())
404
+
405
+ res = scipy.optimize.shgo(
406
+ partial(self._objective, params = params, closure = closure),
407
+ bounds=bounds,
408
+ **self._kwargs
409
+ )
410
+
411
+ params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
412
+ return res.fun
413
+
414
+
415
+ class ScipyDIRECT(Optimizer):
416
+ def __init__(
417
+ self,
418
+ params,
419
+ lb: float,
420
+ ub: float,
421
+ maxfun: int | None = 1000,
422
+ maxiter: int = 1000,
423
+ eps: float = 0.0001,
424
+ locally_biased: bool = True,
425
+ f_min: float = -np.inf,
426
+ f_min_rtol: float = 0.0001,
427
+ vol_tol: float = 1e-16,
428
+ len_tol: float = 0.000001,
429
+ callback = None,
430
+ ):
431
+ super().__init__(params, lb=lb, ub=ub)
432
+
433
+ kwargs = locals().copy()
434
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
435
+ self._kwargs = kwargs
436
+
437
+ def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
438
+ if self.raised: return np.inf
439
+ try:
440
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
441
+ return _ensure_float(closure(False))
442
+ except Exception as e:
443
+ # he he he ha, I found a way to make exceptions work in fcmaes and scipy direct
444
+ self.e = e
445
+ self.raised = True
446
+ return np.inf
447
+
448
+ @torch.no_grad
449
+ def step(self, closure: Closure):
450
+ self.raised = False
451
+ self.e = None
452
+
453
+ params = self.get_params()
454
+
455
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
456
+ bounds = []
457
+ for p, l, u in zip(params, lb, ub):
458
+ bounds.extend([(l, u)] * p.numel())
459
+
460
+ res = scipy.optimize.direct(
461
+ partial(self._objective, params=params, closure=closure),
462
+ bounds=bounds,
463
+ **self._kwargs
464
+ )
465
+
466
+ params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
467
+
468
+ if self.e is not None: raise self.e from None
469
+ return res.fun
470
+
471
+
472
+
473
+
474
+ class ScipyBrute(Optimizer):
475
+ def __init__(
476
+ self,
477
+ params,
478
+ lb: float,
479
+ ub: float,
480
+ Ns: int = 20,
481
+ full_output: int = 0,
482
+ finish = scipy.optimize.fmin,
483
+ disp: bool = False,
484
+ workers: int = 1
485
+ ):
486
+ super().__init__(params, lb=lb, ub=ub)
487
+
488
+ kwargs = locals().copy()
489
+ del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
490
+ self._kwargs = kwargs
491
+
492
+ def _objective(self, x: np.ndarray, params: TensorList, closure):
493
+ params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
494
+ return _ensure_float(closure(False))
495
+
496
+ @torch.no_grad
497
+ def step(self, closure: Closure):
498
+ params = self.get_params()
499
+
500
+ lb, ub = self.group_vals('lb', 'ub', cls=list)
501
+ bounds = []
502
+ for p, l, u in zip(params, lb, ub):
503
+ bounds.extend([(l, u)] * p.numel())
504
+
505
+ x0 = scipy.optimize.brute(
506
+ partial(self._objective, params = params, closure = closure),
507
+ ranges=bounds,
508
+ **self._kwargs
509
+ )
510
+ params.from_vec_(torch.from_numpy(x0).to(device = params[0].device, dtype=params[0].dtype, copy=False))
511
+ return None
@@ -9,11 +9,7 @@ from .optimizer import (
9
9
  get_group_vals,
10
10
  get_params,
11
11
  get_state_vals,
12
- grad_at_params,
13
- grad_vec_at_params,
14
- loss_at_params,
15
- loss_grad_at_params,
16
- loss_grad_vec_at_params,
12
+ unpack_states,
17
13
  )
18
14
  from .params import (
19
15
  Params,
@@ -22,6 +18,6 @@ from .params import (
22
18
  _copy_param_groups,
23
19
  _make_param_groups,
24
20
  )
25
- from .python_tools import flatten, generic_eq, reduce_dim
21
+ from .python_tools import flatten, generic_eq, reduce_dim, unpack_dicts
26
22
  from .tensorlist import TensorList, as_tensorlist, Distributions, generic_clamp, generic_numel, generic_vector_norm, generic_zeros_like, generic_randn_like
27
23
  from .torch_tools import tofloat, tolist, tonumpy, totensor, vec_to_tensors, vec_to_tensors_, set_storage_
@@ -2,6 +2,7 @@ from collections.abc import Iterable, Sequence
2
2
 
3
3
  import torch
4
4
  import torch.autograd.forward_ad as fwAD
5
+ from typing import Literal
5
6
 
6
7
  from .torch_tools import swap_tensors_no_use_count_check, vec_to_tensors
7
8
 
@@ -510,4 +511,4 @@ def hvp_fd_forward(
510
511
  torch._foreach_div_(hvp_, h)
511
512
 
512
513
  if normalize: torch._foreach_mul_(hvp_, vec_norm)
513
- return loss, hvp_
514
+ return loss, hvp_