PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +43 -33
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +48 -52
torchzero/core/module.py +130 -50
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +2 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +15 -12
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +1 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/backtracking.py +2 -2
torchzero/modules/line_search/line_search.py +32 -32
torchzero/modules/line_search/strong_wolfe.py +2 -2
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +50 -48
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +10 -10
torchzero/modules/quasi_newton/sg2.py +19 -19
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +49 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +57 -90
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +157 -177
torchzero/modules/second_order/rsn.py +106 -96
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +10 -10
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +93 -69
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/optim/wrappers/nlopt.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import warnings
 from typing import Literal, Any
 from collections.abc import Mapping, Callable
 from functools import partial
@@ -5,7 +6,8 @@ import numpy as np
 import torch
 import nlopt
-from ...utils import Optimizer, TensorList
+from ...utils import TensorList
+from .wrapper import WrapperBase
 _ALGOS_LITERAL = Literal[
     "GN_DIRECT",  # = _nlopt.GN_DIRECT
@@ -69,14 +71,14 @@ def _ensure_tensor(x):
 inf = float('inf')
 Closure = Callable[[bool], Any]
-class NLOptWrapper(Optimizer):
+class NLOptWrapper(WrapperBase):
     """Use nlopt as pytorch optimizer, with gradient supplied by pytorch autograd.
     Note that this performs full minimization on each step,
     so usually you would want to perform a single step, although performing multiple steps will refine the
     solution.
     Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
+        params (Iterable): iterable of parameters to optimize or dicts defining parameter groups.
         algorithm (int | _ALGOS_LITERAL): optimization algorithm from https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/
         maxeval (int | None):
             maximum allowed function evaluations, set to None to disable. But some stopping criterion
@@ -96,21 +98,30 @@ class NLOptWrapper(Optimizer):
         algorithm: int | _ALGOS_LITERAL,
         lb: float | None = None,
         ub: float | None = None,
-        maxeval: int | None = 10000, # None can stall on some algos and because they are threaded C you can't even interrupt them
+        maxeval: int | None = None, # None can stall on some algos and because they are threaded C you can't even interrupt them
         stopval: float | None = None,
         ftol_rel: float | None = None,
         ftol_abs: float | None = None,
         xtol_rel: float | None = None,
         xtol_abs: float | None = None,
         maxtime: float | None = None,
+        require_criterion: bool = True,
     ):
+        if require_criterion:
+            if all(i is None for i in (maxeval, stopval, ftol_abs, ftol_rel, xtol_abs, xtol_rel)):
+                raise RuntimeError(
+                    "Specify at least one stopping criterion out of "
+                    "(maxeval, stopval, ftol_rel, ftol_abs, xtol_rel, xtol_abs, maxtime). "
+                    "Pass `require_criterion=False` to suppress this error."
+                )
         defaults = dict(lb=lb, ub=ub)
         super().__init__(params, defaults)
         self.opt: nlopt.opt | None = None
+        self.algorithm_name: str | int = algorithm
         if isinstance(algorithm, str): algorithm = getattr(nlopt, algorithm.upper())
         self.algorithm: int = algorithm # type:ignore
-        self.algorithm_name: str | None = None
         self.maxeval = maxeval; self.stopval = stopval
         self.ftol_rel = ftol_rel; self.ftol_abs = ftol_abs
@@ -119,7 +130,7 @@ class NLOptWrapper(Optimizer):
         self._last_loss = None
-    def _f(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
+    def _objective(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
         if self.raised:
             if self.opt is not None: self.opt.force_stop()
             return np.inf
@@ -132,7 +143,7 @@ class NLOptWrapper(Optimizer):
             if grad.size > 0:
                 with torch.enable_grad(): loss = closure()
                 self._last_loss = _ensure_float(loss)
-                grad[:] = params.ensure_grad_().grad.to_vec().reshape(grad.shape).detach().cpu().numpy()
+                grad[:] = params.grad.fill_none_(reference=params).to_vec().reshape(grad.shape).numpy(force=True)
                 return self._last_loss
             self._last_loss = _ensure_float(closure(False))
@@ -147,25 +158,20 @@ class NLOptWrapper(Optimizer):
     def step(self, closure: Closure): # pylint: disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
         self.e = None
         self.raised = False
-        params = self.get_params()
-        # make bounds
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        lower = []
-        upper = []
-        for p, l, u in zip(params, lb, ub):
-            if l is None: l = -inf
-            if u is None: u = inf
-            lower.extend([l] * p.numel())
-            upper.extend([u] * p.numel())
+        params = TensorList(self._get_params())
+        x0 = params.to_vec().numpy(force=True)
-        x0 = params.to_vec().detach().cpu().numpy().astype(np.float64)
+        plb, pub = self._get_per_parameter_lb_ub()
+        if all(i is None for i in plb) and all(i is None for i in pub):
+            lb = ub = None
+        else:
+            lb, ub = self._get_lb_ub(ld = {None: -np.inf}, ud = {None: np.inf})
         self.opt = nlopt.opt(self.algorithm, x0.size)
         self.opt.set_exceptions_enabled(False) # required
-        self.opt.set_min_objective(partial(self._f, closure = closure, params = params))
-        self.opt.set_lower_bounds(lower)
-        self.opt.set_upper_bounds(upper)
+        self.opt.set_min_objective(partial(self._objective, closure = closure, params = params))
+        if lb is not None: self.opt.set_lower_bounds(np.asarray(lb, dtype=x0.dtype))
+        if ub is not None: self.opt.set_upper_bounds(np.asarray(ub, dtype=x0.dtype))
         if self.maxeval is not None: self.opt.set_maxeval(self.maxeval)
         if self.stopval is not None: self.opt.set_stopval(self.stopval)
@@ -179,12 +185,12 @@ class NLOptWrapper(Optimizer):
         x = None
         try:
             x = self.opt.optimize(x0)
-        except SystemError:
-            pass
+        # except SystemError as s:
+        #     warnings.warn(f"{self.algorithm_name} raised {s}")
         except Exception as e:
             raise e from None
-        if x is not None: params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        if x is not None: params.from_vec_(torch.as_tensor(x, device = params[0].device, dtype=params[0].dtype))
         if self.e is not None: raise self.e from None
         if self._last_loss is None or x is None: return closure(False)

torchzero/optim/wrappers/optuna.py CHANGED Viewed

@@ -1,23 +1,16 @@
-import typing
-from collections import abc
-import numpy as np
+import optuna
 import torch
-import optuna
+from ...utils import TensorList, tofloat, totensor
+from .wrapper import WrapperBase
-from ...utils import Optimizer, totensor, tofloat
 def silence_optuna():
     optuna.logging.set_verbosity(optuna.logging.WARNING)
-def _ensure_float(x) -> float:
-    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
-    if isinstance(x, np.ndarray): return float(x.item())
-    return float(x)
-class OptunaSampler(Optimizer):
+class OptunaSampler(WrapperBase):
     """Optimize your next SOTA model using hyperparameter optimization.
     Note - optuna is surprisingly scalable to large number of parameters (up to 10,000), despite literally requiring a for-loop because it only supports scalars. Default TPESampler is good for BBO. Maybe not for NNs...
@@ -38,7 +31,7 @@ class OptunaSampler(Optimizer):
         silence: bool = True,
     ):
         if silence: silence_optuna()
-        super().__init__(params, lb=lb, ub=ub)
+        super().__init__(params, dict(lb=lb, ub=ub))
         if isinstance(sampler, type): sampler = sampler()
         self.sampler = sampler
@@ -47,7 +40,7 @@ class OptunaSampler(Optimizer):
     @torch.no_grad
     def step(self, closure):
-        params = self.get_params()
+        params = TensorList(self._get_params())
         if self.study is None:
             self.study = optuna.create_study(sampler=self.sampler)

torchzero/optim/wrappers/pybobyqa.py ADDED Viewed

@@ -0,0 +1,124 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import torch
+import pybobyqa
+from ...utils import TensorList
+from .wrapper import WrapperBase
+Closure = Callable[[bool], Any]
+class PyBobyqaWrapper(WrapperBase):
+    """Use Py-BOBYQA is PyTorch optimizer.
+    Note that this performs full minimization on each step,
+    so usually you would want to perform a single step, although performing multiple steps will refine the
+    solution.
+    See https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/userguide.html for detailed descriptions of arguments.
+    Args:
+        params (Iterable): iterable of parameters to optimize or dicts defining parameter groups.
+        lb (float | None, optional): optional lower bounds. Defaults to None.
+        ub (float | None, optional): optional upper bounds. Defaults to None.
+        projections (list[Callable] | None, optional):
+            a list of functions defining the Euclidean projections for each general convex constraint C_i.
+            Each element of the list projections is a function that takes an input vector x (numpy array)
+            and returns the closest point to that is in C_i. Defaults to None.
+        npt (int | None, optional): the number of interpolation points to use. Defaults to None.
+        rhobeg (float | None, optional):
+            the initial value of the trust region radius. Defaults to None.
+        rhoend (float | None, optional):
+            minimum allowed value of trust region radius, which determines when a successful
+            termination occurs. Defaults to 1e-8.
+        maxfun (int | None, optional):
+            the maximum number of objective evaluations the algorithm may request,
+            default is min(100(n+1), 1000). Defaults to None.
+        nsamples (Callable | None, optional):
+            a Python function nsamples(delta, rho, iter, nrestarts)
+            which returns the number of times to evaluate objfun at a given point.
+            This is only applicable for objectives with stochastic noise,
+            when averaging multiple evaluations at the same point produces a more accurate value.
+            The input parameters are the trust region radius (delta),
+            the lower bound on the trust region radius (rho),
+            how many iterations the algorithm has been running for (iter),
+            and how many restarts have been performed (nrestarts).
+            Default is no averaging (i.e. nsamples(delta, rho, iter, nrestarts)=1).
+            Defaults to None.
+        user_params (dict | None, optional):
+            dictionary of advanced parameters,
+            see https://numericalalgorithmsgroup.github.io/pybobyqa/build/html/advanced.html).
+            Defaults to None.
+        objfun_has_noise (bool, optional):
+            a flag to indicate whether or not objfun has stochastic noise;
+            i.e. will calling objfun(x) multiple times at the same value of x give different results?
+            This is used to set some sensible default parameters (including using multiple restarts),
+            all of which can be overridden by the values provided in user_params. Defaults to False.
+        seek_global_minimum (bool, optional):
+            a flag to indicate whether to search for a global minimum, rather than a local minimum.
+            This is used to set some sensible default parameters,
+            all of which can be overridden by the values provided in user_params.
+            If True, both upper and lower bounds must be set.
+            Note that Py-BOBYQA only implements a heuristic method,
+            so there are no guarantees it will find a global minimum.
+            However, by using this flag, it is more likely to escape local minima
+            if there are better values nearby. The method used is a multiple restart mechanism,
+            where we repeatedly re-initialize Py-BOBYQA from the best point found so far,
+            but where we use a larger trust reigon radius each time
+            (note: this is different to more common multi-start approach to global optimization).
+            Defaults to False.
+        scaling_within_bounds (bool, optional):
+            a flag to indicate whether the algorithm should internally shift and scale the entries of x
+            so that the bounds become 0 <= x <= 1. This is useful is you are setting bounds and the
+            bounds have different orders of magnitude. If scaling_within_bounds=True,
+            the values of rhobeg and rhoend apply to the shifted variables. Defaults to False.
+        do_logging (bool, optional):
+            a flag to indicate whether logging output should be produced.
+            This is not automatically visible unless you use the Python logging module. Defaults to True.
+        print_progress (bool, optional):
+            a flag to indicate whether to print a per-iteration progress log to terminal. Defaults to False.
+    """
+    def __init__(
+        self,
+        params,
+        lb: float | None = None,
+        ub: float | None = None,
+        projections = None,
+        npt: int | None = None,
+        rhobeg: float | None = None,
+        rhoend: float = 1e-8,
+        maxfun: int | None = None,
+        nsamples: Callable | None | None = None,
+        user_params: dict[str, Any] | None = None,
+        objfun_has_noise: bool = False,
+        seek_global_minimum: bool = False,
+        scaling_within_bounds: bool = False,
+        do_logging: bool = True,
+        print_progress: bool = False,
+    ):
+        super().__init__(params, dict(lb=lb, ub=ub))
+        kwargs = locals().copy()
+        for k in ["self", "__class__", "params", "lb", "ub"]:
+            del kwargs[k]
+        self._kwargs = kwargs
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = TensorList(self._get_params())
+        x0 = params.to_vec().numpy(force=True)
+        bounds = self._get_bounds()
+        soln: pybobyqa.solver.OptimResults = pybobyqa.solve(
+            objfun=partial(self._f, closure=closure, params=params),
+            x0=x0,
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.as_tensor(soln.x, device = params[0].device, dtype=params[0].dtype,))
+        return soln.f

torchzero/optim/wrappers/scipy/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .basin_hopping import ScipyBasinHopping
+from .brute import ScipyBrute
+from .differential_evolution import ScipyDE
+from .direct import ScipyDIRECT
+from .dual_annealing import ScipyDualAnnealing
+from .minimize import ScipyMinimize
+from .sgho import ScipySHGO

torchzero/optim/wrappers/scipy/basin_hopping.py ADDED Viewed

@@ -0,0 +1,117 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import scipy.optimize
+import torch
+from ....utils import TensorList
+from ..wrapper import WrapperBase
+from .minimize import _use_jac_hess_hessp
+Closure = Callable[[bool], Any]
+class ScipyBasinHopping(WrapperBase):
+    def __init__(
+        self,
+        params,
+        niter: int = 100,
+        T: float = 1,
+        stepsize: float = 0.5,
+        minimizer_kwargs: dict | None = None,
+        take_step: Callable | None = None,
+        accept_test: Callable | None = None,
+        callback: Callable | None = None,
+        interval: int = 50,
+        disp: bool = False,
+        niter_success: int | None = None,
+        rng: int | np.random.Generator | None = None,
+        lb:float | None = None,
+        ub:float | None = None,
+        method: Literal['nelder-mead', 'powell', 'cg', 'bfgs', 'newton-cg',
+                    'l-bfgs-b', 'tnc', 'cobyla', 'cobyqa', 'slsqp',
+                    'trust-constr', 'dogleg', 'trust-ncg', 'trust-exact',
+                    'trust-krylov'] | str | None = None,
+        jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
+        hess: Literal['2-point', '3-point', 'cs', 'autograd'] | scipy.optimize.HessianUpdateStrategy = 'autograd',
+        use_hessp: bool = True,
+        *,
+        target_accept_rate: float = 0.5,
+        stepwise_factor: float = 0.9
+    ):
+        super().__init__(params, dict(lb=lb, ub=ub))
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['__class__'], kwargs["minimizer_kwargs"]
+        del kwargs['method'], kwargs["jac"], kwargs['hess'], kwargs['use_hessp']
+        del kwargs["lb"], kwargs["ub"]
+        self._kwargs = kwargs
+        self._minimizer_kwargs = minimizer_kwargs
+        self.method = method
+        self.hess = hess
+        self.jac, self.use_jac_autograd, self.use_hess_autograd, self.use_hessp = _use_jac_hess_hessp(method, jac, hess, use_hessp)
+    def _jac(self, x: np.ndarray, params: list[torch.Tensor], closure):
+        f,g = self._f_g(x, params, closure)
+        return g
+    def _objective(self, x: np.ndarray, params: list[torch.Tensor], closure):
+        if self.use_jac_autograd:
+            f, g = self._f_g(x, params, closure)
+            if self.method is not None and self.method.lower() == 'slsqp': g = g.astype(np.float64) #  slsqp requires float64
+            return f, g
+        return self._f(x, params, closure)
+    def _hess(self, x: np.ndarray, params: list[torch.Tensor], closure):
+        f,g,H = self._f_g_H(x, params, closure)
+        return H
+    def _hessp(self, x: np.ndarray, p:np.ndarray, params: list[torch.Tensor], closure):
+        f,g,Hvp = self._f_g_Hvp(x, p, params, closure)
+        return Hvp
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = TensorList(self._get_params())
+        x0 = params.to_vec().numpy(force=True)
+        bounds = self._get_bounds()
+        # determine hess argument
+        hess = self.hess
+        hessp = None
+        if hess == 'autograd':
+            if self.use_hess_autograd:
+                if self.use_hessp:
+                    hessp = partial(self._hessp, params=params, closure=closure)
+                    hess = None
+                else:
+                    hess = partial(self._hess, params=params, closure=closure)
+            # hess = 'autograd' but method doesn't use hess
+            else:
+                hess = None
+        if self.method is not None and (self.method.lower() == 'tnc' or self.method.lower() == 'slsqp'):
+            x0 = x0.astype(np.float64) # those methods error without this
+        minimizer_kwargs = self._minimizer_kwargs.copy() if self._minimizer_kwargs is not None else {}
+        minimizer_kwargs.setdefault("method", self.method)
+        minimizer_kwargs.setdefault("jac", self.jac)
+        minimizer_kwargs.setdefault("hess", hess)
+        minimizer_kwargs.setdefault("hessp", hessp)
+        minimizer_kwargs.setdefault("bounds", bounds)
+        res = scipy.optimize.basinhopping(
+            partial(self._objective, params = params, closure = closure),
+            x0 = params.to_vec().numpy(force=True),
+            minimizer_kwargs=minimizer_kwargs,
+            **self._kwargs
+        )
+        params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
+        return res.fun

torchzero/optim/wrappers/scipy/brute.py ADDED Viewed

@@ -0,0 +1,48 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import scipy.optimize
+import torch
+from ....utils import TensorList
+from ..wrapper import WrapperBase
+Closure = Callable[[bool], Any]
+class ScipyBrute(WrapperBase):
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        Ns: int = 20,
+        finish = scipy.optimize.fmin,
+        disp: bool = False,
+        workers: int = 1
+    ):
+        super().__init__(params, dict(lb=lb,  ub=ub))
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = TensorList(self._get_params())
+        bounds = self._get_bounds()
+        assert bounds is not None
+        res,fval,grid,Jout = scipy.optimize.brute(
+            partial(self._f, params = params, closure = closure),
+            ranges=bounds,
+            full_output=True,
+            **self._kwargs
+        )
+        params.from_vec_(torch.as_tensor(res, device = params[0].device, dtype=params[0].dtype))
+        return fval

torchzero/optim/wrappers/scipy/differential_evolution.py ADDED Viewed

@@ -0,0 +1,80 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import scipy.optimize
+import torch
+from ....utils import TensorList
+from ..wrapper import WrapperBase
+Closure = Callable[[bool], Any]
+class ScipyDE(WrapperBase):
+    """Use scipy.minimize.differential_evolution as pytorch optimizer. Note that this performs full minimization on each step,
+    so usually you would want to perform a single step. This also requires bounds to be specified.
+    Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
+    for all other args.
+    Args:
+        params: iterable of parameters to optimize or dicts defining parameter groups.
+        bounds (tuple[float,float], optional): tuple with lower and upper bounds.
+            DE requires bounds to be specified. Defaults to None.
+        other args:
+            refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
+    """
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        strategy: Literal['best1bin', 'best1exp', 'rand1bin', 'rand1exp', 'rand2bin', 'rand2exp',
+            'randtobest1bin', 'randtobest1exp', 'currenttobest1bin', 'currenttobest1exp',
+            'best2exp', 'best2bin'] = 'best1bin',
+        maxiter: int = 1000,
+        popsize: int = 15,
+        tol: float = 0.01,
+        mutation = (0.5, 1),
+        recombination: float = 0.7,
+        seed = None,
+        callback = None,
+        disp: bool = False,
+        polish: bool = True,
+        init: str = 'latinhypercube',
+        atol: int = 0,
+        updating: str = 'immediate',
+        workers: int = 1,
+        constraints = (),
+        *,
+        integrality = None,
+    ):
+        super().__init__(params, dict(lb=lb, ub=ub))
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    @torch.no_grad
+    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
+        params = TensorList(self._get_params())
+        x0 = params.to_vec().numpy(force=True)
+        bounds = self._get_bounds()
+        assert bounds is not None
+        res = scipy.optimize.differential_evolution(
+            partial(self._f, params = params, closure = closure),
+            x0 = x0,
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
+        return res.fun

torchzero/optim/wrappers/scipy/direct.py ADDED Viewed

@@ -0,0 +1,69 @@
+from collections.abc import Callable
+from functools import partial
+from typing import Any, Literal
+import numpy as np
+import scipy.optimize
+import torch
+from ....utils import TensorList
+from ..wrapper import WrapperBase
+Closure = Callable[[bool], Any]
+class ScipyDIRECT(WrapperBase):
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        maxfun: int | None = 1000,
+        maxiter: int = 1000,
+        eps: float = 0.0001,
+        locally_biased: bool = True,
+        f_min: float = -np.inf,
+        f_min_rtol: float = 0.0001,
+        vol_tol: float = 1e-16,
+        len_tol: float = 0.000001,
+        callback = None,
+    ):
+        super().__init__(params, dict(lb=lb, ub=ub))
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
+        if self.raised: return np.inf
+        try:
+            return self._f(x, params, closure)
+        except Exception as e:
+            # this makes exceptions work in fcmaes and scipy direct
+            self.e = e
+            self.raised = True
+            return np.inf
+    @torch.no_grad
+    def step(self, closure: Closure):
+        self.raised = False
+        self.e = None
+        params = TensorList(self._get_params())
+        bounds = self._get_bounds()
+        assert bounds is not None
+        res = scipy.optimize.direct(
+            partial(self._objective, params=params, closure=closure),
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.as_tensor(res.x, device = params[0].device, dtype=params[0].dtype))
+        if self.e is not None: raise self.e from None
+        return res.fun

torchzero 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl