PyPI - torchzero - Versions diffs - 0.1.8__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

torchzero 0.1.8py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

docs/source/conf.py +57 -0
tests/test_identical.py +230 -0
tests/test_module.py +50 -0
tests/test_opts.py +884 -0
tests/test_tensorlist.py +1787 -0
tests/test_utils_optimizer.py +170 -0
tests/test_vars.py +184 -0
torchzero/__init__.py +4 -4
torchzero/core/__init__.py +3 -13
torchzero/core/module.py +629 -510
torchzero/core/preconditioner.py +137 -0
torchzero/core/transform.py +252 -0
torchzero/modules/__init__.py +13 -21
torchzero/modules/clipping/__init__.py +3 -0
torchzero/modules/clipping/clipping.py +320 -0
torchzero/modules/clipping/ema_clipping.py +135 -0
torchzero/modules/clipping/growth_clipping.py +187 -0
torchzero/modules/experimental/__init__.py +13 -18
torchzero/modules/experimental/absoap.py +350 -0
torchzero/modules/experimental/adadam.py +111 -0
torchzero/modules/experimental/adamY.py +135 -0
torchzero/modules/experimental/adasoap.py +282 -0
torchzero/modules/experimental/algebraic_newton.py +145 -0
torchzero/modules/experimental/curveball.py +89 -0
torchzero/modules/experimental/dsoap.py +290 -0
torchzero/modules/experimental/gradmin.py +85 -0
torchzero/modules/experimental/reduce_outward_lr.py +35 -0
torchzero/modules/experimental/spectral.py +286 -0
torchzero/modules/experimental/subspace_preconditioners.py +128 -0
torchzero/modules/experimental/tropical_newton.py +136 -0
torchzero/modules/functional.py +209 -0
torchzero/modules/grad_approximation/__init__.py +4 -0
torchzero/modules/grad_approximation/fdm.py +120 -0
torchzero/modules/grad_approximation/forward_gradient.py +81 -0
torchzero/modules/grad_approximation/grad_approximator.py +66 -0
torchzero/modules/grad_approximation/rfdm.py +259 -0
torchzero/modules/line_search/__init__.py +5 -30
torchzero/modules/line_search/backtracking.py +186 -0
torchzero/modules/line_search/line_search.py +181 -0
torchzero/modules/line_search/scipy.py +37 -0
torchzero/modules/line_search/strong_wolfe.py +260 -0
torchzero/modules/line_search/trust_region.py +61 -0
torchzero/modules/lr/__init__.py +2 -0
torchzero/modules/lr/lr.py +59 -0
torchzero/modules/lr/step_size.py +97 -0
torchzero/modules/momentum/__init__.py +14 -4
torchzero/modules/momentum/averaging.py +78 -0
torchzero/modules/momentum/cautious.py +181 -0
torchzero/modules/momentum/ema.py +173 -0
torchzero/modules/momentum/experimental.py +189 -0
torchzero/modules/momentum/matrix_momentum.py +124 -0
torchzero/modules/momentum/momentum.py +43 -106
torchzero/modules/ops/__init__.py +103 -0
torchzero/modules/ops/accumulate.py +65 -0
torchzero/modules/ops/binary.py +240 -0
torchzero/modules/ops/debug.py +25 -0
torchzero/modules/ops/misc.py +419 -0
torchzero/modules/ops/multi.py +137 -0
torchzero/modules/ops/reduce.py +149 -0
torchzero/modules/ops/split.py +75 -0
torchzero/modules/ops/switch.py +68 -0
torchzero/modules/ops/unary.py +115 -0
torchzero/modules/ops/utility.py +112 -0
torchzero/modules/optimizers/__init__.py +18 -10
torchzero/modules/optimizers/adagrad.py +146 -49
torchzero/modules/optimizers/adam.py +112 -118
torchzero/modules/optimizers/lion.py +18 -11
torchzero/modules/optimizers/muon.py +222 -0
torchzero/modules/optimizers/orthograd.py +55 -0
torchzero/modules/optimizers/rmsprop.py +103 -51
torchzero/modules/optimizers/rprop.py +342 -99
torchzero/modules/optimizers/shampoo.py +197 -0
torchzero/modules/optimizers/soap.py +286 -0
torchzero/modules/optimizers/sophia_h.py +129 -0
torchzero/modules/projections/__init__.py +5 -0
torchzero/modules/projections/dct.py +73 -0
torchzero/modules/projections/fft.py +73 -0
torchzero/modules/projections/galore.py +10 -0
torchzero/modules/projections/projection.py +218 -0
torchzero/modules/projections/structural.py +151 -0
torchzero/modules/quasi_newton/__init__.py +7 -4
torchzero/modules/quasi_newton/cg.py +218 -0
torchzero/modules/quasi_newton/experimental/__init__.py +1 -0
torchzero/modules/quasi_newton/experimental/modular_lbfgs.py +265 -0
torchzero/modules/quasi_newton/lbfgs.py +228 -0
torchzero/modules/quasi_newton/lsr1.py +170 -0
torchzero/modules/quasi_newton/olbfgs.py +196 -0
torchzero/modules/quasi_newton/quasi_newton.py +475 -0
torchzero/modules/second_order/__init__.py +3 -4
torchzero/modules/second_order/newton.py +142 -165
torchzero/modules/second_order/newton_cg.py +84 -0
torchzero/modules/second_order/nystrom.py +168 -0
torchzero/modules/smoothing/__init__.py +2 -5
torchzero/modules/smoothing/gaussian.py +164 -0
torchzero/modules/smoothing/{laplacian_smoothing.py → laplacian.py} +115 -128
torchzero/modules/weight_decay/__init__.py +1 -0
torchzero/modules/weight_decay/weight_decay.py +52 -0
torchzero/modules/wrappers/__init__.py +1 -0
torchzero/modules/wrappers/optim_wrapper.py +91 -0
torchzero/optim/__init__.py +2 -10
torchzero/optim/utility/__init__.py +1 -0
torchzero/optim/utility/split.py +45 -0
torchzero/optim/wrappers/nevergrad.py +2 -28
torchzero/optim/wrappers/nlopt.py +31 -16
torchzero/optim/wrappers/scipy.py +79 -156
torchzero/utils/__init__.py +27 -0
torchzero/utils/compile.py +175 -37
torchzero/utils/derivatives.py +513 -99
torchzero/utils/linalg/__init__.py +5 -0
torchzero/utils/linalg/matrix_funcs.py +87 -0
torchzero/utils/linalg/orthogonalize.py +11 -0
torchzero/utils/linalg/qr.py +71 -0
torchzero/utils/linalg/solve.py +168 -0
torchzero/utils/linalg/svd.py +20 -0
torchzero/utils/numberlist.py +132 -0
torchzero/utils/ops.py +10 -0
torchzero/utils/optimizer.py +284 -0
torchzero/utils/optuna_tools.py +40 -0
torchzero/utils/params.py +149 -0
torchzero/utils/python_tools.py +40 -25
torchzero/utils/tensorlist.py +1081 -0
torchzero/utils/torch_tools.py +48 -12
torchzero-0.3.2.dist-info/METADATA +379 -0
torchzero-0.3.2.dist-info/RECORD +128 -0
{torchzero-0.1.8.dist-info → torchzero-0.3.2.dist-info}/WHEEL +1 -1
{torchzero-0.1.8.dist-info → torchzero-0.3.2.dist-info/licenses}/LICENSE +0 -0
torchzero-0.3.2.dist-info/top_level.txt +3 -0
torchzero/core/tensorlist_optimizer.py +0 -219
torchzero/modules/adaptive/__init__.py +0 -4
torchzero/modules/adaptive/adaptive.py +0 -192
torchzero/modules/experimental/experimental.py +0 -294
torchzero/modules/experimental/quad_interp.py +0 -104
torchzero/modules/experimental/subspace.py +0 -259
torchzero/modules/gradient_approximation/__init__.py +0 -7
torchzero/modules/gradient_approximation/_fd_formulas.py +0 -3
torchzero/modules/gradient_approximation/base_approximator.py +0 -105
torchzero/modules/gradient_approximation/fdm.py +0 -125
torchzero/modules/gradient_approximation/forward_gradient.py +0 -163
torchzero/modules/gradient_approximation/newton_fdm.py +0 -198
torchzero/modules/gradient_approximation/rfdm.py +0 -125
torchzero/modules/line_search/armijo.py +0 -56
torchzero/modules/line_search/base_ls.py +0 -139
torchzero/modules/line_search/directional_newton.py +0 -217
torchzero/modules/line_search/grid_ls.py +0 -158
torchzero/modules/line_search/scipy_minimize_scalar.py +0 -62
torchzero/modules/meta/__init__.py +0 -12
torchzero/modules/meta/alternate.py +0 -65
torchzero/modules/meta/grafting.py +0 -195
torchzero/modules/meta/optimizer_wrapper.py +0 -173
torchzero/modules/meta/return_overrides.py +0 -46
torchzero/modules/misc/__init__.py +0 -10
torchzero/modules/misc/accumulate.py +0 -43
torchzero/modules/misc/basic.py +0 -115
torchzero/modules/misc/lr.py +0 -96
torchzero/modules/misc/multistep.py +0 -51
torchzero/modules/misc/on_increase.py +0 -53
torchzero/modules/operations/__init__.py +0 -29
torchzero/modules/operations/multi.py +0 -298
torchzero/modules/operations/reduction.py +0 -134
torchzero/modules/operations/singular.py +0 -113
torchzero/modules/optimizers/sgd.py +0 -54
torchzero/modules/orthogonalization/__init__.py +0 -2
torchzero/modules/orthogonalization/newtonschulz.py +0 -159
torchzero/modules/orthogonalization/svd.py +0 -86
torchzero/modules/regularization/__init__.py +0 -22
torchzero/modules/regularization/dropout.py +0 -34
torchzero/modules/regularization/noise.py +0 -77
torchzero/modules/regularization/normalization.py +0 -328
torchzero/modules/regularization/ortho_grad.py +0 -78
torchzero/modules/regularization/weight_decay.py +0 -92
torchzero/modules/scheduling/__init__.py +0 -2
torchzero/modules/scheduling/lr_schedulers.py +0 -131
torchzero/modules/scheduling/step_size.py +0 -80
torchzero/modules/smoothing/gaussian_smoothing.py +0 -90
torchzero/modules/weight_averaging/__init__.py +0 -2
torchzero/modules/weight_averaging/ema.py +0 -72
torchzero/modules/weight_averaging/swa.py +0 -171
torchzero/optim/experimental/__init__.py +0 -20
torchzero/optim/experimental/experimental.py +0 -343
torchzero/optim/experimental/ray_search.py +0 -83
torchzero/optim/first_order/__init__.py +0 -18
torchzero/optim/first_order/cautious.py +0 -158
torchzero/optim/first_order/forward_gradient.py +0 -70
torchzero/optim/first_order/optimizers.py +0 -570
torchzero/optim/modular.py +0 -148
torchzero/optim/quasi_newton/__init__.py +0 -1
torchzero/optim/quasi_newton/directional_newton.py +0 -58
torchzero/optim/second_order/__init__.py +0 -1
torchzero/optim/second_order/newton.py +0 -94
torchzero/optim/zeroth_order/__init__.py +0 -4
torchzero/optim/zeroth_order/fdm.py +0 -87
torchzero/optim/zeroth_order/newton_fdm.py +0 -146
torchzero/optim/zeroth_order/rfdm.py +0 -217
torchzero/optim/zeroth_order/rs.py +0 -85
torchzero/random/__init__.py +0 -1
torchzero/random/random.py +0 -46
torchzero/tensorlist.py +0 -826
torchzero-0.1.8.dist-info/METADATA +0 -130
torchzero-0.1.8.dist-info/RECORD +0 -104
torchzero-0.1.8.dist-info/top_level.txt +0 -1

torchzero/optim/wrappers/nlopt.py CHANGED Viewed

@@ -1,12 +1,11 @@
-from typing import Literal
+from typing import Literal, Any
 from collections.abc import Mapping, Callable
 from functools import partial
 import numpy as np
 import torch
 import nlopt
-from ...core import TensorListOptimizer, _ClosureType
-from ...tensorlist import TensorList
+from ...utils import Optimizer, TensorList
 _ALGOS_LITERAL = Literal[
     "GN_DIRECT",  # = _nlopt.GN_DIRECT
@@ -56,18 +55,21 @@ _ALGOS_LITERAL = Literal[
 ]
 def _ensure_float(x):
-    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
-    if isinstance(x, np.ndarray): return x.item()
+    if isinstance(x, torch.Tensor): return float(x.detach().cpu().item())
+    if isinstance(x, np.ndarray): return float(x.item())
     return float(x)
 def _ensure_tensor(x):
-    if isinstance(x, np.ndarray):
-        x.setflags(write=True)
-        return torch.from_numpy(x)
+    try:
+        if isinstance(x, np.ndarray): return torch.as_tensor(x.copy())
+    except SystemError:
+        return None
     return torch.tensor(x, dtype=torch.float32)
 inf = float('inf')
-class NLOptOptimizer(TensorListOptimizer):
+Closure = Callable[[bool], Any]
+class NLOptOptimizer(Optimizer):
     """Use nlopt as pytorch optimizer, with gradient supplied by pytorch autograd.
     Note that this performs full minimization on each step,
     so usually you would want to perform a single step, although performing multiple steps will refine the
@@ -119,8 +121,12 @@ class NLOptOptimizer(TensorListOptimizer):
         self._last_loss = None
-    def _f(self, x: np.ndarray, grad: np.ndarray, closure: _ClosureType, params: TensorList):
-        params.from_vec_(_ensure_tensor(x).to(params[0], copy=False))
+    def _f(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
+        t = _ensure_tensor(x)
+        if t is None:
+            if self.opt is not None: self.opt.force_stop()
+            return None
+        params.from_vec_(t.to(params[0], copy=False))
         if grad.size > 0:
             with torch.enable_grad(): loss = closure()
             self._last_loss = _ensure_float(loss)
@@ -131,12 +137,11 @@ class NLOptOptimizer(TensorListOptimizer):
         return self._last_loss
     @torch.no_grad
-    def step(self, closure: _ClosureType): # pylint: disable = signature-differs
+    def step(self, closure: Closure): # pylint: disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         params = self.get_params()
         # make bounds
-        lb, ub = self.get_group_keys('lb', 'ub', cls=list)
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
         lower = []
         upper = []
         for p, l, u in zip(params, lb, ub):
@@ -145,9 +150,10 @@ class NLOptOptimizer(TensorListOptimizer):
             lower.extend([l] * p.numel())
             upper.extend([u] * p.numel())
-        x0 = params.to_vec().detach().cpu().numpy()
+        x0 = params.to_vec().detach().cpu().numpy().astype(np.float64)
         self.opt = nlopt.opt(self.algorithm, x0.size)
+        self.opt.set_exceptions_enabled(False) # required
         self.opt.set_min_objective(partial(self._f, closure = closure, params = params))
         self.opt.set_lower_bounds(lower)
         self.opt.set_upper_bounds(upper)
@@ -160,6 +166,15 @@ class NLOptOptimizer(TensorListOptimizer):
         if self.xtol_abs is not None: self.opt.set_xtol_abs(self.xtol_abs)
         if self.maxtime is not None: self.opt.set_maxtime(self.maxtime)
-        x = self.opt.optimize(x0)
+        self._last_loss = None
+        x = None
+        try:
+            x = self.opt.optimize(x0)
+        except SystemError:
+            pass
+        except Exception as e:
+            raise e from None
+        if self._last_loss is None or x is None: return closure(False)
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         return self._last_loss

torchzero/optim/wrappers/scipy.py CHANGED Viewed

@@ -1,20 +1,15 @@
-from typing import Literal, Any
 from collections import abc
+from collections.abc import Callable
 from functools import partial
+from typing import Any, Literal
 import numpy as np
-import torch
 import scipy.optimize
+import torch
-from ...core import _ClosureType, TensorListOptimizer
-from ...utils.derivatives import jacobian, jacobian_list_to_vec, hessian, hessian_list_to_mat, jacobian_and_hessian
-from ...modules import WrapClosure
-from ...modules.experimental.subspace import Projection, Proj2Masks, ProjGrad, ProjNormalize, Subspace
-from ...modules.second_order.newton import regularize_hessian_
-from ...tensorlist import TensorList
-from ..modular import Modular
+from ...utils import Optimizer, TensorList
+from ...utils.derivatives import jacobian_and_hessian_mat_wrt, jacobian_wrt
+from ...modules.second_order.newton import tikhonov_
 def _ensure_float(x):
     if isinstance(x, torch.Tensor): return x.detach().cpu().item()
@@ -26,7 +21,17 @@ def _ensure_numpy(x):
     if isinstance(x, np.ndarray): return x
     return np.array(x)
-class ScipyMinimize(TensorListOptimizer):
+def matrix_clamp(H: torch.Tensor, reg: float):
+    try:
+        eigvals, eigvecs = torch.linalg.eigh(H) # pylint:disable=not-callable
+        eigvals.clamp_(min=reg)
+        return eigvecs @ torch.diag(eigvals) @ eigvecs.mH
+    except Exception:
+        return H
+Closure = Callable[[bool], Any]
+class ScipyMinimize(Optimizer):
     """Use scipy.minimize.optimize as pytorch optimizer. Note that this performs full minimization on each step,
     so usually you would want to perform a single step, although performing multiple steps will refine the
     solution.
@@ -71,7 +76,8 @@ class ScipyMinimize(TensorListOptimizer):
         options = None,
         jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
         hess: Literal['2-point', '3-point', 'cs', 'autograd'] | scipy.optimize.HessianUpdateStrategy = 'autograd',
-        tikhonov: float | Literal['eig'] = 0,
+        tikhonov: float | None = 0,
+        min_eigval: float | None = None,
     ):
         defaults = dict(lb=lb, ub=ub)
         super().__init__(params, defaults)
@@ -79,11 +85,12 @@ class ScipyMinimize(TensorListOptimizer):
         self.constraints = constraints
         self.tol = tol
         self.callback = callback
+        self.min_eigval = min_eigval
         self.options = options
         self.jac = jac
         self.hess = hess
-        self.tikhonov: float | Literal['eig'] = tikhonov
+        self.tikhonov: float | None = tikhonov
         self.use_jac_autograd = jac.lower() == 'autograd' and (method is None or method.lower() in [
             'cg', 'bfgs', 'newton-cg', 'l-bfgs-b', 'tnc', 'slsqp', 'dogleg',
@@ -93,21 +100,22 @@ class ScipyMinimize(TensorListOptimizer):
             'newton-cg', 'dogleg', 'trust-ncg', 'trust-krylov', 'trust-exact'
         ]
+        # jac in scipy is '2-point', '3-point', 'cs', True or None.
         if self.jac == 'autograd':
             if self.use_jac_autograd: self.jac = True
             else: self.jac = None
-    def _hess(self, x: np.ndarray, params: TensorList, closure: _ClosureType): # type:ignore
+    def _hess(self, x: np.ndarray, params: TensorList, closure):
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         with torch.enable_grad():
             value = closure(False)
-            H = hessian([value], wrt = params) # type:ignore
-        Hmat =  hessian_list_to_mat(H)
-        regularize_hessian_(Hmat, self.tikhonov)
-        return Hmat.detach().cpu().numpy()
+            _, H = jacobian_and_hessian_mat_wrt([value], wrt = params)
+        if self.tikhonov is not None: H = tikhonov_(H, self.tikhonov)
+        if self.min_eigval is not None: H = matrix_clamp(H, self.min_eigval)
+        return H.detach().cpu().numpy()
-    def _objective(self, x: np.ndarray, params: TensorList, closure: _ClosureType):
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
         # set params to x
         params.from_vec_(torch.from_numpy(x).to(params[0], copy=False))
@@ -118,7 +126,7 @@ class ScipyMinimize(TensorListOptimizer):
         return _ensure_float(closure(False))
     @torch.no_grad
-    def step(self, closure: _ClosureType): # type:ignore # pylint:disable = signature-differs
+    def step(self, closure: Closure):# pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         params = self.get_params()
         # determine hess argument
@@ -130,7 +138,7 @@ class ScipyMinimize(TensorListOptimizer):
         x0 = params.to_vec().detach().cpu().numpy()
         # make bounds
-        lb, ub = self.get_group_keys('lb', 'ub', cls=list)
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
         bounds = []
         for p, l, u in zip(params, lb, ub):
             bounds.extend([(l, u)] * p.numel())
@@ -156,8 +164,8 @@ class ScipyMinimize(TensorListOptimizer):
-class ScipyRoot(TensorListOptimizer):
-    """Find a root of a vector function (UNTESTED!).
+class ScipyRootOptimization(Optimizer):
+    """Optimization via using scipy.root on gradients, mainly for experimenting!
     Args:
         params: iterable of parameters to optimize or dicts defining parameter groups.
@@ -196,94 +204,11 @@ class ScipyRoot(TensorListOptimizer):
         self.jac = jac
         if self.jac == 'autograd': self.jac = True
-    def _objective(self, x: np.ndarray, params: TensorList, closure: _ClosureType):
-        # set params to x
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        # return value and maybe gradients
-        if self.jac:
-            with torch.enable_grad():
-                value = closure(False)
-                if not isinstance(value, torch.Tensor):
-                    raise TypeError(f"Autograd jacobian requires closure to return torch.Tensor, got {type(value)}")
-            jac = jacobian_list_to_vec(jacobian([value], wrt=params))
-            return _ensure_numpy(value), jac.detach().cpu().numpy()
-        return _ensure_numpy(closure(False))
-    @torch.no_grad
-    def step(self, closure: _ClosureType): # type:ignore # pylint:disable = signature-differs
-        params = self.get_params()
-        x0 = params.to_vec().detach().cpu().numpy()
-        res = scipy.optimize.root(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            method=self.method,
-            tol=self.tol,
-            callback=self.callback,
-            options=self.options,
-            jac = self.jac,
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyRootOptimization(TensorListOptimizer):
-    """Optimization via finding roots of the gradient with `scipy.optimize.root` (for experiments, won't work well on most problems).
-    Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
-        method (str, optional): one of methods from https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.root.html#scipy.optimize.root. Defaults to 'hybr'.
-        tol (float | None, optional): tolerance. Defaults to None.
-        callback (_type_, optional): callback. Defaults to None.
-        options (_type_, optional): options for optimizer. Defaults to None.
-        jac (Literal[&#39;2, optional): jacobian calculation method. Defaults to 'autograd'.
-        tikhonov (float | Literal[&#39;eig&#39;], optional): tikhonov regularization (only for 'hybr' and 'lm'). Defaults to 0.
-        add_loss (float, optional): adds loss value to jacobian multiplied by this to try to avoid finding maxima. Defaults to 0.
-        mul_loss (float, optional): multiplies jacobian by loss value multiplied by this to try to avoid finding maxima. Defaults to 0.
-    """
-    def __init__(
-        self,
-        params,
-        method: Literal[
-            "hybr",
-            "lm",
-            "broyden1",
-            "broyden2",
-            "anderson",
-            "linearmixing",
-            "diagbroyden",
-            "excitingmixing",
-            "krylov",
-            "df-sane",
-        ] = 'hybr',
-        tol: float | None = None,
-        callback = None,
-        options = None,
-        jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
-        tikhonov: float | Literal['eig'] = 0,
-        add_loss: float = 0,
-        mul_loss: float = 0,
-    ):
-        super().__init__(params, {})
-        self.method = method
-        self.tol = tol
-        self.callback = callback
-        self.options = options
-        self.value = None
-        self.tikhonov: float | Literal['eig'] = tikhonov
-        self.add_loss = add_loss
-        self.mul_loss = mul_loss
-        self.jac = jac == 'autograd'
         # those don't require jacobian
         if self.method.lower() in ('broyden1', 'broyden2', 'anderson', 'linearmixing', 'diagbroyden', 'excitingmixing', 'krylov', 'df-sane'):
             self.jac = None
-    def _objective(self, x: np.ndarray, params: TensorList, closure: _ClosureType):
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
         # set params to x
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
@@ -293,23 +218,16 @@ class ScipyRootOptimization(TensorListOptimizer):
                 self.value = closure(False)
                 if not isinstance(self.value, torch.Tensor):
                     raise TypeError(f"Autograd jacobian requires closure to return torch.Tensor, got {type(self.value)}")
-                jac_list, hess_list = jacobian_and_hessian([self.value], wrt=params)
-            jac = jacobian_list_to_vec(jac_list)
-            hess = hessian_list_to_mat(hess_list)
-            regularize_hessian_(hess, self.tikhonov)
-            if self.mul_loss != 0: jac *= self.value * self.mul_loss
-            if self.add_loss != 0: jac += self.value * self.add_loss
-            return jac.detach().cpu().numpy(), hess.detach().cpu().numpy()
+                g, H = jacobian_and_hessian_mat_wrt([self.value], wrt=params)
+            return g.detach().cpu().numpy(), H.detach().cpu().numpy()
         # return the gradients
         with torch.enable_grad(): self.value = closure()
         jac = params.ensure_grad_().grad.to_vec()
-        if self.mul_loss != 0: jac *= self.value * self.mul_loss
-        if self.add_loss != 0: jac += self.value * self.add_loss
         return jac.detach().cpu().numpy()
     @torch.no_grad
-    def step(self, closure: _ClosureType): # type:ignore # pylint:disable = signature-differs
+    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         params = self.get_params()
         x0 = params.to_vec().detach().cpu().numpy()
@@ -325,9 +243,11 @@ class ScipyRootOptimization(TensorListOptimizer):
         )
         params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return self.value
+        return res.fun
-class ScipyDE(TensorListOptimizer):
+class ScipyDE(Optimizer):
     """Use scipy.minimize.differential_evolution as pytorch optimizer. Note that this performs full minimization on each step,
     so usually you would want to perform a single step. This also requires bounds to be specified.
@@ -374,12 +294,12 @@ class ScipyDE(TensorListOptimizer):
         self._kwargs = kwargs
         self._lb, self._ub = bounds
-    def _objective(self, x: np.ndarray, params: TensorList, closure: _ClosureType):
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         return _ensure_float(closure(False))
     @torch.no_grad
-    def step(self, closure: _ClosureType): # type:ignore # pylint:disable = signature-differs
+    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         params = self.get_params()
         x0 = params.to_vec().detach().cpu().numpy()
@@ -396,44 +316,47 @@ class ScipyDE(TensorListOptimizer):
         return res.fun
-class ScipyMinimizeSubspace(Modular):
-    """for experiments and won't work well on most problems.
-    explanation - optimizes in a small subspace using scipy.optimize.minimize, but doesnt seem to work well"""
+class ScipyDualAnnealing(Optimizer):
     def __init__(
         self,
         params,
-        projections: Projection | abc.Iterable[Projection] = (
-            Proj2Masks(5),
-            ProjNormalize(
-                ProjGrad(),
-            )
-        ),
-        method=None,
-        lb = None,
-        ub = None,
-        constraints=(),
-        tol=None,
-        callback=None,
-        options=None,
-        jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
-        hess: Literal['2-point', '3-point', 'cs', 'autograd'] | scipy.optimize.HessianUpdateStrategy = '2-point',
+        bounds: tuple[float, float],
+        maxiter=1000,
+        minimizer_kwargs=None,
+        initial_temp=5230.0,
+        restart_temp_ratio=2.0e-5,
+        visit=2.62,
+        accept=-5.0,
+        maxfun=1e7,
+        rng=None,
+        no_local_search=False,
     ):
+        super().__init__(params, {})
-        scopt = WrapClosure(
-                ScipyMinimize,
-                method = method,
-                lb = lb,
-                ub = ub,
-                constraints = constraints,
-                tol = tol,
-                callback = callback,
-                options = options,
-                jac = jac,
-                hess = hess
-            )
-        modules = [
-            Subspace(scopt, projections),
-        ]
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['bounds'], kwargs['__class__']
+        self._kwargs = kwargs
+        self._lb, self._ub = bounds
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):# pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        bounds = [(self._lb, self._ub)] * len(x0)
+        res = scipy.optimize.dual_annealing(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.fun
-        super().__init__(params, modules)

torchzero/utils/__init__.py CHANGED Viewed

@@ -0,0 +1,27 @@
+from . import tensorlist as tl
+from .compile import _optional_compiler, benchmark_compile_cpu, benchmark_compile_cuda, set_compilation, enable_compilation
+from .numberlist import NumberList
+from .optimizer import (
+    Init,
+    ListLike,
+    Optimizer,
+    ParamFilter,
+    get_group_vals,
+    get_params,
+    get_state_vals,
+    grad_at_params,
+    grad_vec_at_params,
+    loss_at_params,
+    loss_grad_at_params,
+    loss_grad_vec_at_params,
+)
+from .params import (
+    Params,
+    _add_defaults_to_param_groups_,
+    _add_updates_grads_to_param_groups_,
+    _copy_param_groups,
+    _make_param_groups,
+)
+from .python_tools import flatten, generic_eq, reduce_dim
+from .tensorlist import TensorList, as_tensorlist, Distributions, generic_clamp, generic_numel, generic_vector_norm, generic_zeros_like, generic_randn_like
+from .torch_tools import tofloat, tolist, tonumpy, totensor, vec_to_tensors, vec_to_tensors_, set_storage_

torchzero 0.1.8__py3-none-any.whl → 0.3.2__py3-none-any.whl

torchzero 0.1.8py3-none-any.whl → 0.3.2py3-none-any.whl