PyPI - torchzero - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

docs/source/conf.py +6 -4
docs/source/docstring template.py +46 -0
tests/test_identical.py +2 -3
tests/test_opts.py +115 -68
tests/test_tensorlist.py +2 -2
tests/test_vars.py +62 -61
torchzero/core/__init__.py +2 -3
torchzero/core/module.py +185 -53
torchzero/core/transform.py +327 -159
torchzero/modules/__init__.py +3 -1
torchzero/modules/clipping/clipping.py +120 -23
torchzero/modules/clipping/ema_clipping.py +37 -22
torchzero/modules/clipping/growth_clipping.py +20 -21
torchzero/modules/experimental/__init__.py +30 -4
torchzero/modules/experimental/absoap.py +53 -156
torchzero/modules/experimental/adadam.py +22 -15
torchzero/modules/experimental/adamY.py +21 -25
torchzero/modules/experimental/adam_lambertw.py +149 -0
torchzero/modules/{line_search/trust_region.py → experimental/adaptive_step_size.py} +37 -8
torchzero/modules/experimental/adasoap.py +24 -129
torchzero/modules/experimental/cosine.py +214 -0
torchzero/modules/experimental/cubic_adam.py +97 -0
torchzero/modules/experimental/curveball.py +12 -12
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/experimental/eigendescent.py +120 -0
torchzero/modules/experimental/etf.py +195 -0
torchzero/modules/experimental/exp_adam.py +113 -0
torchzero/modules/experimental/expanded_lbfgs.py +141 -0
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/hnewton.py +85 -0
torchzero/modules/{quasi_newton/experimental → experimental}/modular_lbfgs.py +49 -50
torchzero/modules/experimental/newton_solver.py +11 -11
torchzero/modules/experimental/newtonnewton.py +92 -0
torchzero/modules/experimental/parabolic_search.py +220 -0
torchzero/modules/experimental/reduce_outward_lr.py +10 -7
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +12 -54
torchzero/modules/experimental/subspace_preconditioners.py +20 -10
torchzero/modules/experimental/tensor_adagrad.py +42 -0
torchzero/modules/functional.py +12 -2
torchzero/modules/grad_approximation/fdm.py +31 -4
torchzero/modules/grad_approximation/forward_gradient.py +17 -7
torchzero/modules/grad_approximation/grad_approximator.py +69 -24
torchzero/modules/grad_approximation/rfdm.py +310 -50
torchzero/modules/higher_order/__init__.py +1 -0
torchzero/modules/higher_order/higher_order_newton.py +319 -0
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/adaptive.py +99 -0
torchzero/modules/line_search/backtracking.py +75 -31
torchzero/modules/line_search/line_search.py +107 -49
torchzero/modules/line_search/polynomial.py +233 -0
torchzero/modules/line_search/scipy.py +20 -5
torchzero/modules/line_search/strong_wolfe.py +52 -36
torchzero/modules/misc/__init__.py +27 -0
torchzero/modules/misc/debug.py +48 -0
torchzero/modules/misc/escape.py +60 -0
torchzero/modules/misc/gradient_accumulation.py +70 -0
torchzero/modules/misc/misc.py +316 -0
torchzero/modules/misc/multistep.py +158 -0
torchzero/modules/misc/regularization.py +171 -0
torchzero/modules/misc/split.py +103 -0
torchzero/modules/{ops → misc}/switch.py +48 -7
torchzero/modules/momentum/__init__.py +1 -1
torchzero/modules/momentum/averaging.py +25 -10
torchzero/modules/momentum/cautious.py +115 -40
torchzero/modules/momentum/ema.py +92 -41
torchzero/modules/momentum/experimental.py +21 -13
torchzero/modules/momentum/matrix_momentum.py +145 -76
torchzero/modules/momentum/momentum.py +25 -4
torchzero/modules/ops/__init__.py +3 -31
torchzero/modules/ops/accumulate.py +51 -25
torchzero/modules/ops/binary.py +108 -62
torchzero/modules/ops/multi.py +95 -34
torchzero/modules/ops/reduce.py +31 -23
torchzero/modules/ops/unary.py +37 -21
torchzero/modules/ops/utility.py +53 -45
torchzero/modules/optimizers/__init__.py +12 -3
torchzero/modules/optimizers/adagrad.py +48 -29
torchzero/modules/optimizers/adahessian.py +223 -0
torchzero/modules/optimizers/adam.py +35 -37
torchzero/modules/optimizers/adan.py +110 -0
torchzero/modules/optimizers/adaptive_heavyball.py +57 -0
torchzero/modules/optimizers/esgd.py +171 -0
torchzero/modules/optimizers/ladagrad.py +183 -0
torchzero/modules/optimizers/lion.py +4 -4
torchzero/modules/optimizers/mars.py +91 -0
torchzero/modules/optimizers/msam.py +186 -0
torchzero/modules/optimizers/muon.py +32 -7
torchzero/modules/optimizers/orthograd.py +4 -5
torchzero/modules/optimizers/rmsprop.py +19 -19
torchzero/modules/optimizers/rprop.py +89 -52
torchzero/modules/optimizers/sam.py +163 -0
torchzero/modules/optimizers/shampoo.py +55 -27
torchzero/modules/optimizers/soap.py +40 -37
torchzero/modules/optimizers/sophia_h.py +82 -25
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +4 -2
torchzero/modules/projections/projection.py +212 -118
torchzero/modules/quasi_newton/__init__.py +44 -5
torchzero/modules/quasi_newton/cg.py +190 -39
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +163 -0
torchzero/modules/quasi_newton/lbfgs.py +154 -97
torchzero/modules/quasi_newton/lsr1.py +102 -58
torchzero/modules/quasi_newton/quasi_newton.py +1032 -177
torchzero/modules/quasi_newton/trust_region.py +397 -0
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/newton.py +245 -54
torchzero/modules/second_order/newton_cg.py +311 -21
torchzero/modules/second_order/nystrom.py +124 -21
torchzero/modules/smoothing/gaussian.py +55 -21
torchzero/modules/smoothing/laplacian.py +20 -12
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +122 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +126 -10
torchzero/modules/wrappers/optim_wrapper.py +40 -12
torchzero/optim/wrappers/directsearch.py +281 -0
torchzero/optim/wrappers/fcmaes.py +105 -0
torchzero/optim/wrappers/mads.py +89 -0
torchzero/optim/wrappers/nevergrad.py +20 -5
torchzero/optim/wrappers/nlopt.py +28 -14
torchzero/optim/wrappers/optuna.py +70 -0
torchzero/optim/wrappers/scipy.py +167 -16
torchzero/utils/__init__.py +3 -7
torchzero/utils/derivatives.py +5 -4
torchzero/utils/linalg/__init__.py +1 -1
torchzero/utils/linalg/solve.py +251 -12
torchzero/utils/numberlist.py +2 -0
torchzero/utils/optimizer.py +55 -74
torchzero/utils/python_tools.py +27 -4
torchzero/utils/tensorlist.py +40 -28
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/METADATA +76 -51
torchzero-0.3.11.dist-info/RECORD +159 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/WHEEL +1 -1
torchzero/core/preconditioner.py +0 -138
torchzero/modules/experimental/algebraic_newton.py +0 -145
torchzero/modules/experimental/soapy.py +0 -290
torchzero/modules/experimental/spectral.py +0 -288
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/experimental/tropical_newton.py +0 -136
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/lr.py +0 -59
torchzero/modules/lr/step_size.py +0 -97
torchzero/modules/ops/debug.py +0 -25
torchzero/modules/ops/misc.py +0 -419
torchzero/modules/ops/split.py +0 -75
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero-0.3.9.dist-info/RECORD +0 -131
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/licenses/LICENSE +0 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/top_level.txt +0 -0

torchzero/modules/wrappers/optim_wrapper.py CHANGED Viewed

@@ -7,7 +7,35 @@ from ...utils import Params, _copy_param_groups, _make_param_groups
 class Wrap(Module):
-    """Custom param groups are supported only by `set_param_groups`. Settings passed to Modular will be ignored."""
+    """
+    Wraps a pytorch optimizer to use it as a module.
+    .. note::
+        Custom param groups are supported only by `set_param_groups`, settings passed to Modular will be ignored.
+    Args:
+        opt_fn (Callable[..., torch.optim.Optimizer] | torch.optim.Optimizer):
+            function that takes in parameters and returns the optimizer, for example :code:`torch.optim.Adam`
+            or :code:`lambda parameters: torch.optim.Adam(parameters, lr=1e-3)`
+        *args:
+        **kwargs:
+            Extra args to be passed to opt_fn. The function is called as :code:`opt_fn(parameters, *args, **kwargs)`.
+    Example:
+        wrapping pytorch_optimizer.StableAdamW
+        .. code-block:: py
+            from pytorch_optimizer import StableAdamW
+            opt = tz.Modular(
+                model.parameters(),
+                tz.m.Wrap(StableAdamW, lr=1),
+                tz.m.Cautious(),
+                tz.m.LR(1e-2)
+            )
+    """
     def __init__(self, opt_fn: Callable[..., torch.optim.Optimizer] | torch.optim.Optimizer, *args, **kwargs):
         super().__init__()
         self._opt_fn = opt_fn
@@ -24,8 +52,8 @@ class Wrap(Module):
         return super().set_param_groups(param_groups)
     @torch.no_grad
-    def step(self, vars):
-        params = vars.params
+    def step(self, var):
+        params = var.params
         # initialize opt on 1st step
         if self.optimizer is None:
@@ -35,18 +63,18 @@ class Wrap(Module):
         # set grad to update
         orig_grad = [p.grad for p in params]
-        for p, u in zip(params, vars.get_update()):
+        for p, u in zip(params, var.get_update()):
             p.grad = u
         # if this module is last, can step with _opt directly
         # direct step can't be applied if next module is LR but _opt doesn't support lr,
         # and if there are multiple different per-parameter lrs (would be annoying to support)
-        if vars.is_last and (
-            (vars.last_module_lrs is None)
+        if var.is_last and (
+            (var.last_module_lrs is None)
             or
-            (('lr' in self.optimizer.defaults) and (len(set(vars.last_module_lrs)) == 1))
+            (('lr' in self.optimizer.defaults) and (len(set(var.last_module_lrs)) == 1))
         ):
-            lr = 1 if vars.last_module_lrs is None else vars.last_module_lrs[0]
+            lr = 1 if var.last_module_lrs is None else var.last_module_lrs[0]
             # update optimizer lr with desired lr
             if lr != 1:
@@ -68,19 +96,19 @@ class Wrap(Module):
             for p, g in zip(params, orig_grad):
                 p.grad = g
-            vars.stop = True; vars.skip_update = True
-            return vars
+            var.stop = True; var.skip_update = True
+            return var
         # this is not the last module, meaning update is difference in parameters
         params_before_step = [p.clone() for p in params]
         self.optimizer.step() # step and update params
         for p, g in zip(params, orig_grad):
             p.grad = g
-        vars.update = list(torch._foreach_sub(params_before_step, params)) # set update to difference between params
+        var.update = list(torch._foreach_sub(params_before_step, params)) # set update to difference between params
         for p, o in zip(params, params_before_step):
             p.set_(o) # pyright: ignore[reportArgumentType]
-        return vars
+        return var
     def reset(self):
         super().reset()

torchzero/optim/wrappers/directsearch.py ADDED Viewed

@@ -0,0 +1,281 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import directsearch
+import numpy as np
+import torch
+from directsearch.ds import DEFAULT_PARAMS
+from ...modules.second_order.newton import tikhonov_
+from ...utils import Optimizer, TensorList
+def _ensure_float(x):
+    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
+    if isinstance(x, np.ndarray): return x.item()
+    return float(x)
+def _ensure_numpy(x):
+    if isinstance(x, torch.Tensor): return x.detach().cpu()
+    if isinstance(x, np.ndarray): return x
+    return np.array(x)
+Closure = Callable[[bool], Any]
+class DirectSearch(Optimizer):
+    """Use directsearch as pytorch optimizer.
+    Note that this performs full minimization on each step,
+    so usually you would want to perform a single step, although performing multiple steps will refine the
+    solution.
+    Args:
+        params: iterable of parameters to optimize or dicts defining parameter groups.
+        rho: Choice of the forcing function.
+        sketch_dim: Reduced dimension to generate polling directions in.
+        sketch_type: Sketching technique to be used.
+        maxevals: Maximum number of calls to f performed by the algorithm.
+        poll_type: Type of polling directions generated in the reduced spaces.
+        alpha0: Initial value for the stepsize parameter.
+        alpha_max: Maximum value for the stepsize parameter.
+        alpha_min: Minimum value for the stepsize parameter.
+        gamma_inc: Increase factor for the stepsize update.
+        gamma_dec: Decrease factor for the stepsize update.
+        verbose:
+            Boolean indicating whether information should be displayed during an algorithmic run.
+        print_freq:
+            Value indicating how frequently information should be displayed.
+        use_stochastic_three_points:
+            Boolean indicating whether the specific stochastic three points method should be used.
+        poll_scale_prob: Probability of scaling the polling directions.
+        poll_scale_factor: Factor used to scale the polling directions.
+        rho_uses_normd:
+            Boolean indicating whether the forcing function should account for the norm of the direction.
+    """
+    def __init__(
+        self,
+        params,
+        maxevals = DEFAULT_PARAMS['maxevals'], # Maximum number of function evaluations
+        rho = DEFAULT_PARAMS['rho'], # Forcing function
+        sketch_dim = DEFAULT_PARAMS['sketch_dim'], # Target dimension for sketching
+        sketch_type = DEFAULT_PARAMS['sketch_type'], # Sketching technique
+        poll_type = DEFAULT_PARAMS['poll_type'], # Polling direction type
+        alpha0 = DEFAULT_PARAMS['alpha0'], # Original stepsize value
+        alpha_max = DEFAULT_PARAMS['alpha_max'], # Maximum value for the stepsize
+        alpha_min = DEFAULT_PARAMS['alpha_min'], # Minimum value for the stepsize
+        gamma_inc = DEFAULT_PARAMS['gamma_inc'], # Increasing factor for the stepsize
+        gamma_dec = DEFAULT_PARAMS['gamma_dec'], # Decreasing factor for the stepsize
+        verbose = DEFAULT_PARAMS['verbose'], # Display information about the method
+        print_freq = DEFAULT_PARAMS['print_freq'], # How frequently to display information
+        use_stochastic_three_points = DEFAULT_PARAMS['use_stochastic_three_points'], # Boolean for a specific method
+        rho_uses_normd = DEFAULT_PARAMS['rho_uses_normd'], # Forcing function based on direction norm
+    ):
+        super().__init__(params, {})
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        res = directsearch.solve(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.f
+class DirectSearchDS(Optimizer):
+    def __init__(
+        self,
+        params,
+        maxevals = DEFAULT_PARAMS['maxevals'], # Maximum number of function evaluations
+        rho = DEFAULT_PARAMS['rho'], # Forcing function
+        poll_type = DEFAULT_PARAMS['poll_type'], # Polling direction type
+        alpha0 = DEFAULT_PARAMS['alpha0'], # Original stepsize value
+        alpha_max = DEFAULT_PARAMS['alpha_max'], # Maximum value for the stepsize
+        alpha_min = DEFAULT_PARAMS['alpha_min'], # Minimum value for the stepsize
+        gamma_inc = DEFAULT_PARAMS['gamma_inc'], # Increasing factor for the stepsize
+        gamma_dec = DEFAULT_PARAMS['gamma_dec'], # Decreasing factor for the stepsize
+        verbose = DEFAULT_PARAMS['verbose'], # Display information about the method
+        print_freq = DEFAULT_PARAMS['print_freq'], # How frequently to display information
+        rho_uses_normd = DEFAULT_PARAMS['rho_uses_normd'], # Forcing function based on direction norm
+    ):
+        super().__init__(params, {})
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        res = directsearch.solve_directsearch(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.f
+class DirectSearchProbabilistic(Optimizer):
+    def __init__(
+        self,
+        params,
+        maxevals = DEFAULT_PARAMS['maxevals'], # Maximum number of function evaluations
+        rho = DEFAULT_PARAMS['rho'], # Forcing function
+        alpha0 = DEFAULT_PARAMS['alpha0'], # Original stepsize value
+        alpha_max = DEFAULT_PARAMS['alpha_max'], # Maximum value for the stepsize
+        alpha_min = DEFAULT_PARAMS['alpha_min'], # Minimum value for the stepsize
+        gamma_inc = DEFAULT_PARAMS['gamma_inc'], # Increasing factor for the stepsize
+        gamma_dec = DEFAULT_PARAMS['gamma_dec'], # Decreasing factor for the stepsize
+        verbose = DEFAULT_PARAMS['verbose'], # Display information about the method
+        print_freq = DEFAULT_PARAMS['print_freq'], # How frequently to display information
+        rho_uses_normd = DEFAULT_PARAMS['rho_uses_normd'], # Forcing function based on direction norm
+    ):
+        super().__init__(params, {})
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        res = directsearch.solve_probabilistic_directsearch(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.f
+class DirectSearchSubspace(Optimizer):
+    def __init__(
+        self,
+        params,
+        maxevals = DEFAULT_PARAMS['maxevals'], # Maximum number of function evaluations
+        rho = DEFAULT_PARAMS['rho'], # Forcing function
+        sketch_dim = DEFAULT_PARAMS['sketch_dim'], # Target dimension for sketching
+        sketch_type = DEFAULT_PARAMS['sketch_type'], # Sketching technique
+        poll_type = DEFAULT_PARAMS['poll_type'], # Polling direction type
+        alpha0 = DEFAULT_PARAMS['alpha0'], # Original stepsize value
+        alpha_max = DEFAULT_PARAMS['alpha_max'], # Maximum value for the stepsize
+        alpha_min = DEFAULT_PARAMS['alpha_min'], # Minimum value for the stepsize
+        gamma_inc = DEFAULT_PARAMS['gamma_inc'], # Increasing factor for the stepsize
+        gamma_dec = DEFAULT_PARAMS['gamma_dec'], # Decreasing factor for the stepsize
+        verbose = DEFAULT_PARAMS['verbose'], # Display information about the method
+        print_freq = DEFAULT_PARAMS['print_freq'], # How frequently to display information
+        rho_uses_normd = DEFAULT_PARAMS['rho_uses_normd'], # Forcing function based on direction norm
+    ):
+        super().__init__(params, {})
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        res = directsearch.solve_subspace_directsearch(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.f
+class DirectSearchSTP(Optimizer):
+    def __init__(
+        self,
+        params,
+        maxevals = DEFAULT_PARAMS['maxevals'], # Maximum number of function evaluations
+        alpha0 = DEFAULT_PARAMS['alpha0'], # Original stepsize value
+        alpha_min = DEFAULT_PARAMS['alpha_min'], # Minimum value for the stepsize
+        verbose = DEFAULT_PARAMS['verbose'], # Display information about the method
+        print_freq = DEFAULT_PARAMS['print_freq'], # How frequently to display information
+    ):
+        super().__init__(params, {})
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        res = directsearch.solve_stp(
+            partial(self._objective, params = params, closure = closure),
+            x0 = x0,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.f

torchzero/optim/wrappers/fcmaes.py ADDED Viewed

@@ -0,0 +1,105 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import fcmaes
+import fcmaes.optimizer
+import fcmaes.retry
+import numpy as np
+import torch
+from ...utils import Optimizer, TensorList
+Closure = Callable[[bool], Any]
+def _ensure_float(x) -> float:
+    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
+    if isinstance(x, np.ndarray): return float(x.item())
+    return float(x)
+def silence_fcmaes():
+    fcmaes.retry.logger.disable('fcmaes')
+class FcmaesWrapper(Optimizer):
+    """Use fcmaes as pytorch optimizer. Particularly fcmaes has BITEOPT which appears to win in many benchmarks.
+    Note that this performs full minimization on each step, so only perform one step with this.
+    Args:
+        params: iterable of parameters to optimize or dicts defining parameter groups.
+        lb (float): lower bounds, this can also be specified in param_groups.
+        ub (float): upper bounds, this can also be specified in param_groups.
+        optimizer (fcmaes.optimizer.Optimizer | None, optional):
+            optimizer to use. Default is a sequence of differential evolution and CMA-ES.
+        max_evaluations (int | None, optional):
+            Forced termination of all optimization runs after `max_evaluations` function evaluations.
+            Only used if optimizer is undefined, otherwise this setting is defined in the optimizer. Defaults to 50000.
+        value_limit (float | None, optional): Upper limit for optimized function values to be stored. Defaults to np.inf.
+        num_retries (int | None, optional): Number of optimization retries. Defaults to 1.
+        popsize (int | None, optional):
+            CMA-ES population size used for all CMA-ES runs.
+            Not used for differential evolution.
+            Ignored if parameter optimizer is defined. Defaults to 31.
+        capacity (int | None, optional): capacity of the evaluation store.. Defaults to 500.
+        stop_fitness (float | None, optional):
+            Limit for fitness value. optimization runs terminate if this value is reached. Defaults to -np.inf.
+        statistic_num (int | None, optional):
+            if > 0 stores the progress of the optimization. Defines the size of this store. Defaults to 0.
+    """
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        optimizer: fcmaes.optimizer.Optimizer | None = None,
+        max_evaluations: int | None = 50000,
+        value_limit: float | None = np.inf,
+        num_retries: int | None = 1,
+        # workers: int = 1,
+        popsize: int | None = 31,
+        capacity: int | None = 500,
+        stop_fitness: float | None = -np.inf,
+        statistic_num: int | None = 0
+    ):
+        super().__init__(params, lb=lb, ub=ub)
+        silence_fcmaes()
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+        self._kwargs['workers'] = 1
+    def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
+        if self.raised: return np.inf
+        try:
+            params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+            return _ensure_float(closure(False))
+        except Exception as e:
+            # ha ha, I found a way to make exceptions work in fcmaes and scipy direct
+            self.e = e
+            self.raised = True
+            return np.inf
+    @torch.no_grad
+    def step(self, closure: Closure):
+        self.raised = False
+        self.e = None
+        params = self.get_params()
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([[l, u]] * p.numel())
+        res = fcmaes.retry.minimize(
+            partial(self._objective, params=params, closure=closure), # pyright:ignore[reportArgumentType]
+            bounds=bounds, # pyright:ignore[reportArgumentType]
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        if self.e is not None: raise self.e from None
+        return res.fun

torchzero/optim/wrappers/mads.py ADDED Viewed

@@ -0,0 +1,89 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import torch
+from mads.mads import orthomads
+from ...utils import Optimizer, TensorList
+def _ensure_float(x):
+    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
+    if isinstance(x, np.ndarray): return x.item()
+    return float(x)
+def _ensure_numpy(x):
+    if isinstance(x, torch.Tensor): return x.detach().cpu()
+    if isinstance(x, np.ndarray): return x
+    return np.array(x)
+Closure = Callable[[bool], Any]
+class MADS(Optimizer):
+    """Use mads.orthomads as pytorch optimizer.
+    Note that this performs full minimization on each step,
+    so usually you would want to perform a single step, although performing multiple steps will refine the
+    solution.
+    Args:
+        params: iterable of parameters to optimize or dicts defining parameter groups.
+        lb (float): lower bounds, this can also be specified in param_groups.
+        ub (float): upper bounds, this can also be specified in param_groups.
+        dp (float, optional): Initial poll size as percent of bounds. Defaults to 0.1.
+        dm (float, optional): Initial mesh size as percent of bounds. Defaults to 0.01.
+        dp_tol (float, optional): Minimum poll size stopping criteria. Defaults to -float('inf').
+        nitermax (float, optional): Maximum objective function evaluations. Defaults to float('inf').
+        displog (bool, optional): whether to show log. Defaults to False.
+        savelog (bool, optional): whether to save log. Defaults to False.
+    """
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        dp = 0.1,
+        dm = 0.01,
+        dp_tol = -float('inf'),
+        nitermax = float('inf'),
+        displog = False,
+        savelog = False,
+    ):
+        super().__init__(params, lb=lb, ub=ub)
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        x0 = params.to_vec().detach().cpu().numpy()
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds_lower = []
+        bounds_upper = []
+        for p, l, u in zip(params, lb, ub):
+            bounds_lower.extend([l] * p.numel())
+            bounds_upper.extend([u] * p.numel())
+        f, x = orthomads(
+            design_variables=x0,
+            bounds_upper=np.asarray(bounds_upper),
+            bounds_lower=np.asarray(bounds_lower),
+            objective_function=partial(self._objective, params = params, closure = closure),
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return f

torchzero/optim/wrappers/nevergrad.py CHANGED Viewed

@@ -9,12 +9,12 @@ import nevergrad as ng
 from ...utils import Optimizer
-def _ensure_float(x):
+def _ensure_float(x) -> float:
     if isinstance(x, torch.Tensor): return x.detach().cpu().item()
-    if isinstance(x, np.ndarray): return x.item()
+    if isinstance(x, np.ndarray): return float(x.item())
     return float(x)
-class NevergradOptimizer(Optimizer):
+class NevergradWrapper(Optimizer):
     """Use nevergrad optimizer as pytorch optimizer.
     Note that it is recommended to specify `budget` to the number of iterations you expect to run,
     as some nevergrad optimizers will error without it.
@@ -29,6 +29,12 @@ class NevergradOptimizer(Optimizer):
             use certain rule for first 50% of the steps, and then switch to another rule.
             This parameter doesn't actually limit the maximum number of steps!
             But it doesn't have to be exact. Defaults to None.
+        lb (float | None, optional):
+            lower bounds, this can also be specified in param_groups. Bounds are optional, however
+            some nevergrad algorithms will raise an exception of bounds are not specified.
+        ub (float, optional):
+            upper bounds, this can also be specified in param_groups. Bounds are optional, however
+            some nevergrad algorithms will raise an exception of bounds are not specified.
         mutable_sigma (bool, optional):
             nevergrad parameter, sets whether the mutation standard deviation must mutate as well
             (for mutation based algorithms). Defaults to False.
@@ -44,11 +50,20 @@ class NevergradOptimizer(Optimizer):
         params,
         opt_cls:"type[ng.optimizers.base.Optimizer] | abc.Callable[..., ng.optimizers.base.Optimizer]",
         budget: int | None = None,
-        mutable_sigma = False,
         lb: float | None = None,
         ub: float | None = None,
+        mutable_sigma = False,
         use_init = True,
     ):
+        """_summary_
+        Args:
+            params (_type_): _description_
+            opt_cls (type[ng.optimizers.base.Optimizer] | abc.Callable[..., ng.optimizers.base.Optimizer]): _description_
+            budget (int | None, optional): _description_. Defaults to None.
+            mutable_sigma (bool, optional): _description_. Defaults to False.
+            use_init (bool, optional): _description_. Defaults to True.
+        """
         defaults = dict(lb=lb, ub=ub, use_init=use_init, mutable_sigma=mutable_sigma)
         super().__init__(params, defaults)
         self.opt_cls = opt_cls
@@ -56,7 +71,7 @@ class NevergradOptimizer(Optimizer):
         self.budget = budget
     @torch.no_grad
-    def step(self, closure): # type:ignore # pylint:disable=signature-differs
+    def step(self, closure): # pylint:disable=signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         params = self.get_params()
         if self.opt is None:
             ng_params = []

torchzero 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl