PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

tests/test_identical.py +22 -22
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +225 -214
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +2 -2
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +53 -57
torchzero/core/module.py +132 -52
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +11 -0
torchzero/linalg/eigh.py +253 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +93 -0
torchzero/{utils/linalg → linalg}/qr.py +16 -2
torchzero/{utils/linalg → linalg}/solve.py +74 -88
torchzero/linalg/svd.py +47 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +167 -217
torchzero/modules/adaptive/adahessian.py +76 -105
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +50 -31
torchzero/modules/adaptive/adaptive_heavyball.py +12 -7
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +7 -11
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +7 -7
torchzero/modules/adaptive/matrix_momentum.py +48 -52
torchzero/modules/adaptive/msam.py +71 -53
torchzero/modules/adaptive/muon.py +67 -129
torchzero/modules/adaptive/natural_gradient.py +63 -41
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +149 -130
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +22 -25
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +27 -38
torchzero/modules/experimental/__init__.py +7 -6
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +23 -54
torchzero/modules/experimental/newtonnewton.py +45 -48
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +24 -21
torchzero/modules/least_squares/gn.py +121 -50
torchzero/modules/line_search/backtracking.py +4 -4
torchzero/modules/line_search/line_search.py +33 -33
torchzero/modules/line_search/strong_wolfe.py +4 -4
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +11 -79
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +121 -123
torchzero/modules/misc/multistep.py +52 -53
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +31 -29
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +37 -31
torchzero/modules/momentum/momentum.py +12 -12
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +20 -20
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/{functional.py → opt_utils.py} +1 -1
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +46 -43
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +2 -2
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +10 -10
torchzero/modules/quasi_newton/lsr1.py +10 -10
torchzero/modules/quasi_newton/quasi_newton.py +54 -39
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +39 -37
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +57 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +165 -196
torchzero/modules/second_order/newton_cg.py +105 -157
torchzero/modules/second_order/nystrom.py +216 -185
torchzero/modules/second_order/rsn.py +132 -125
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +10 -10
torchzero/modules/step_size/adaptive.py +24 -24
torchzero/modules/step_size/lr.py +17 -17
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +3 -3
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +2 -2
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +23 -21
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +17 -18
torchzero/modules/wrappers/optim_wrapper.py +14 -14
torchzero/modules/zeroth_order/cd.py +10 -7
torchzero/optim/mbs.py +291 -0
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -13
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +8 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +97 -73
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/adaptive/lmadagrad.py +0 -186
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

torchzero/modules/weight_decay/reinit.py ADDED Viewed

@@ -0,0 +1,83 @@
+from functools import partial
+import torch
+from ...core import Module
+from ...utils import NumberList, TensorList
+def _reset_except_self(objective, modules, self: Module):
+    for m in modules:
+        if m is not self:
+            m.reset()
+class RandomReinitialize(Module):
+    """On each step with probability ``p_reinit`` trigger reinitialization,
+    whereby ``p_weights`` weights are reset to their initial values.
+    This modifies the parameters directly. Place it as the first module.
+    Args:
+        p_reinit (float, optional): probability to trigger reinitialization on each step. Defaults to 0.01.
+        p_weights (float, optional): probability for each weight to be set to initial value when reinitialization is triggered. Defaults to 0.1.
+        store_every (int | None, optional): if set, stores new initial values every this many steps. Defaults to None.
+        beta (float, optional):
+            whenever ``store_every`` is triggered, uses linear interpolation with this beta.
+            If ``store_every=1``, this can be set to some value close to 1 such as 0.999
+            to reinitialize to slow parameter EMA. Defaults to 0.
+        reset (bool, optional): whether to reset states of other modules on reinitialization. Defaults to False.
+        seed (int | None, optional): random seed.
+    """
+    def __init__(
+        self,
+        p_reinit: float = 0.01,
+        p_weights: float = 0.1,
+        store_every: int | None = None,
+        beta: float = 0,
+        reset: bool = False,
+        seed: int | None = None,
+    ):
+        defaults = dict(p_weights=p_weights, p_reinit=p_reinit, store_every=store_every, beta=beta, reset=reset, seed=seed)
+        super().__init__(defaults)
+    def update(self, objective):
+        # this stores initial values to per-parameter states
+        p_init = self.get_state(objective.params, "p_init", init="params", cls=TensorList)
+        # store new params every store_every steps
+        step = self.global_state.get("step", 0)
+        self.global_state["step"] = step + 1
+        store_every = self.defaults["store_every"]
+        if (store_every is not None and step % store_every == 0):
+            beta = self.get_settings(objective.params, "beta", cls=NumberList)
+            p_init.lerp_(objective.params, weight=(1 - beta))
+    @torch.no_grad
+    def apply(self, objective):
+        p_reinit = self.defaults["p_reinit"]
+        device = objective.params[0].device
+        generator = self.get_generator(device, self.defaults["seed"])
+        # determine whether to trigger reinitialization
+        reinitialize = torch.rand(1, generator=generator, device=device) < p_reinit
+        # reinitialize
+        if reinitialize:
+            params = TensorList(objective.params)
+            p_init = self.get_state(params, "p_init", init=params)
+            # mask with p_weights entries being True
+            p_weights = self.get_settings(params, "p_weights")
+            mask = params.bernoulli_like(p_weights, generator=generator).as_bool()
+            # set weights at mask to their initialization
+            params.masked_set_(mask, p_init)
+            # reset
+            if self.defaults["reset"]:
+                objective.post_step_hooks.append(partial(_reset_except_self, self=self))
+        return objective

torchzero/modules/weight_decay/weight_decay.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Literal
 import torch
-from ...core import Module, Target, Transform
+from ...core import Module,  TensorTransform
 from ...utils import NumberList, TensorList, as_tensorlist, unpack_dicts, unpack_states, Metrics
@@ -21,7 +21,7 @@ def weight_decay_(
     return grad_.add_(params.pow(ord-1).copysign_(params).mul_(weight_decay))
-class WeightDecay(Transform):
+class WeightDecay(TensorTransform):
     """Weight decay.
     Args:
@@ -33,7 +33,7 @@ class WeightDecay(Transform):
     Adam with non-decoupled weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.WeightDecay(1e-3),
         tz.m.Adam(),
@@ -44,7 +44,7 @@ class WeightDecay(Transform):
     Adam with decoupled weight decay that still scales with learning rate
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.WeightDecay(1e-3),
@@ -54,7 +54,7 @@ class WeightDecay(Transform):
     Adam with fully decoupled weight decay that doesn't scale with learning rate
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.LR(1e-3),
@@ -63,19 +63,19 @@ class WeightDecay(Transform):
     ```
     """
-    def __init__(self, weight_decay: float, ord: int = 2, target: Target = 'update'):
+    def __init__(self, weight_decay: float, ord: int = 2):
         defaults = dict(weight_decay=weight_decay, ord=ord)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         weight_decay = NumberList(s['weight_decay'] for s in settings)
         ord = settings[0]['ord']
         return weight_decay_(as_tensorlist(tensors), as_tensorlist(params), weight_decay, ord)
-class RelativeWeightDecay(Transform):
+class RelativeWeightDecay(TensorTransform):
     """Weight decay relative to the mean absolute value of update, gradient or parameters depending on value of ``norm_input`` argument.
     Args:
@@ -93,7 +93,7 @@ class RelativeWeightDecay(Transform):
     Adam with non-decoupled relative weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.RelativeWeightDecay(1e-1),
         tz.m.Adam(),
@@ -103,7 +103,7 @@ class RelativeWeightDecay(Transform):
     Adam with decoupled relative weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.RelativeWeightDecay(1e-1),
@@ -117,13 +117,12 @@ class RelativeWeightDecay(Transform):
         ord: int  = 2,
         norm_input: Literal["update", "grad", "params"] = "update",
         metric: Metrics = 'mad',
-        target: Target = "update",
     ):
         defaults = dict(weight_decay=weight_decay, ord=ord, norm_input=norm_input, metric=metric)
-        super().__init__(defaults, uses_grad=norm_input == 'grad', target=target)
+        super().__init__(defaults, uses_grad=norm_input == 'grad')
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         weight_decay = NumberList(s['weight_decay'] for s in settings)
         ord = settings[0]['ord']
@@ -161,9 +160,9 @@ class DirectWeightDecay(Module):
         super().__init__(defaults)
     @torch.no_grad
-    def step(self, var):
-        weight_decay = self.get_settings(var.params, 'weight_decay', cls=NumberList)
+    def apply(self, objective):
+        weight_decay = self.get_settings(objective.params, 'weight_decay', cls=NumberList)
         ord = self.defaults['ord']
-        decay_weights_(var.params, weight_decay, ord)
-        return var
+        decay_weights_(objective.params, weight_decay, ord)
+        return objective

torchzero/modules/wrappers/optim_wrapper.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any
 import torch
 from ...core.module import Module
-from ...utils import Params, _copy_param_groups, _make_param_groups
+from ...utils.params import Params, _copy_param_groups, _make_param_groups
 class Wrap(Module):
@@ -11,7 +11,7 @@ class Wrap(Module):
     Wraps a pytorch optimizer to use it as a module.
     Note:
-        Custom param groups are supported only by ``set_param_groups``, settings passed to Modular will be applied to all parameters.
+        Custom param groups are supported only by ``set_param_groups``, settings passed to Optimizer will be applied to all parameters.
     Args:
         opt_fn (Callable[..., torch.optim.Optimizer] | torch.optim.Optimizer):
@@ -21,7 +21,7 @@ class Wrap(Module):
         **kwargs:
             Extra args to be passed to opt_fn. The function is called as ``opt_fn(parameters, *args, **kwargs)``.
         use_param_groups:
-            Whether to pass settings passed to Modular to the wrapped optimizer.
+            Whether to pass settings passed to Optimizer to the wrapped optimizer.
             Note that settings to the first parameter are used for all parameters,
             so if you specified per-parameter settings, they will be ignored.
@@ -32,7 +32,7 @@ class Wrap(Module):
     ```python
     from pytorch_optimizer import StableAdamW
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Wrap(StableAdamW, lr=1),
         tz.m.Cautious(),
@@ -66,8 +66,8 @@ class Wrap(Module):
         return super().set_param_groups(param_groups)
     @torch.no_grad
-    def step(self, var):
-        params = var.params
+    def apply(self, objective):
+        params = objective.params
         # initialize opt on 1st step
         if self.optimizer is None:
@@ -76,14 +76,14 @@ class Wrap(Module):
             self.optimizer = self._opt_fn(param_groups, *self._opt_args, **self._opt_kwargs)
         # set optimizer per-parameter settings
-        if self.defaults["use_param_groups"] and var.modular is not None:
+        if self.defaults["use_param_groups"] and objective.modular is not None:
             for group in self.optimizer.param_groups:
                 first_param = group['params'][0]
                 setting = self.settings[first_param]
                 # settings passed in `set_param_groups` are the highest priority
                 # schedulers will override defaults but not settings passed in `set_param_groups`
-                # this is consistent with how Modular does it.
+                # this is consistent with how Optimizer does it.
                 if self._custom_param_groups is not None:
                     setting = {k:v for k,v in setting if k not in self._custom_param_groups[0]}
@@ -91,19 +91,19 @@ class Wrap(Module):
         # set grad to update
         orig_grad = [p.grad for p in params]
-        for p, u in zip(params, var.get_update()):
+        for p, u in zip(params, objective.get_updates()):
             p.grad = u
         # if this is last module, simply use optimizer to update parameters
-        if var.modular is not None and self is var.modular.modules[-1]:
+        if objective.modular is not None and self is objective.modular.modules[-1]:
             self.optimizer.step()
             # restore grad
             for p, g in zip(params, orig_grad):
                 p.grad = g
-            var.stop = True; var.skip_update = True
-            return var
+            objective.stop = True; objective.skip_update = True
+            return objective
         # this is not the last module, meaning update is difference in parameters
         # and passed to next module
@@ -111,11 +111,11 @@ class Wrap(Module):
         self.optimizer.step() # step and update params
         for p, g in zip(params, orig_grad):
             p.grad = g
-        var.update = list(torch._foreach_sub(params_before_step, params)) # set update to difference between params
+        objective.updates = list(torch._foreach_sub(params_before_step, params)) # set update to difference between params
         for p, o in zip(params, params_before_step):
             p.set_(o) # pyright: ignore[reportArgumentType]
-        return var
+        return objective
     def reset(self):
         super().reset()

torchzero/modules/zeroth_order/cd.py CHANGED Viewed

@@ -29,17 +29,20 @@ class CD(Module):
             whether to use three points (three function evaluatins) to determine descent direction.
             if False, uses two points, but then ``adaptive`` can't be used. Defaults to True.
     """
-    def __init__(self, h:float=1e-3, grad:bool=True, adaptive:bool=True, index:Literal['cyclic', 'cyclic2', 'random']="cyclic2", threepoint:bool=True,):
+    def __init__(self, h:float=1e-3, grad:bool=False, adaptive:bool=True, index:Literal['cyclic', 'cyclic2', 'random']="cyclic2", threepoint:bool=True,):
         defaults = dict(h=h, grad=grad, adaptive=adaptive, index=index, threepoint=threepoint)
         super().__init__(defaults)
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var):
-        closure = var.closure
+    def step(self, objective):
+        closure = objective.closure
         if closure is None:
             raise RuntimeError("CD requires closure")
-        params = TensorList(var.params)
+        params = TensorList(objective.params)
         ndim = params.global_numel()
         grad_step_size = self.defaults['grad']
@@ -79,7 +82,7 @@ class CD(Module):
             else:
                 warnings.warn("CD adaptive=True only works with threepoint=True")
-        f_0 = var.get_loss(False)
+        f_0 = objective.get_loss(False)
         params.flat_set_lambda_(idx, lambda x: x + h)
         f_p = closure(False)
@@ -117,6 +120,6 @@ class CD(Module):
         # ----------------------------- create the update ---------------------------- #
         update = params.zeros_like()
         update.flat_set_(idx, alpha)
-        var.update = update
-        return var
+        objective.updates = update
+        return objective

torchzero/optim/mbs.py ADDED Viewed

@@ -0,0 +1,291 @@
+from typing import NamedTuple
+import math
+from collections.abc import Iterable
+from decimal import ROUND_HALF_UP, Decimal
+import numpy as np
+def format_number(number, n):
+    """Rounds to n significant digits after the decimal point."""
+    if number == 0: return 0
+    if math.isnan(number) or math.isinf(number) or (not math.isfinite(number)): return number
+    if n <= 0: raise ValueError("n must be positive")
+    dec = Decimal(str(number))
+    if dec.is_zero(): return 0
+    if number > 10**n or dec % 1 == 0: return int(dec)
+    if abs(dec) >= 1:
+        places = n
+    else:
+        frac_str = format(abs(dec), 'f').split('.')[1]
+        leading_zeros = len(frac_str) - len(frac_str.lstrip('0'))
+        places = leading_zeros + n
+    quantizer = Decimal('1e-' + str(places))
+    rounded_dec = dec.quantize(quantizer, rounding=ROUND_HALF_UP)
+    if rounded_dec % 1 == 0: return int(rounded_dec)
+    return float(rounded_dec)
+def _nonfinite_to_inf(x):
+    if not math.isfinite(x): return math.inf
+    return x
+def _tofloatlist(x) -> list[float]:
+    if isinstance(x, (int,float)): return [x]
+    if isinstance(x, np.ndarray) and x.size == 1: return [float(x.item())]
+    return [float(i) for i in x]
+class Trial(NamedTuple):
+    x: float
+    f: tuple[float, ...]
+class Solution(NamedTuple):
+    x: float
+    f: tuple[float, ...]
+    trials: list[Trial]
+class MBS:
+    """Univariate minimization via grid search followed by refining, supports multi-objective functions.
+    This tends to outperform bayesian optimization for learning rate tuning, it is also good for plotting.
+    First it evaluates all points defined in ``grid``. The grid doesn't have to be dense and the solution doesn't
+    have to be between the endpoints.
+    Then it picks ``num_candidates`` best points per each objective. If any of those points are endpoints,
+    it expands the search space by ``step`` in that direction and evaluates the new endpoint.
+    Otherwise it keeps picking points between best points and evaluating them, until ``num_binary`` evaluations
+    have been performed.
+    Args:
+        grid (Iterable[float], optional): values for initial grid search. If ``log_scale=True``, should be in log10 scale.
+        step (float, optional): expansion step size. Defaults to 1.
+        num_candidates (int, optional): number of best points to sample new points around on each iteration. Defaults to 2.
+        num_binary (int, optional): maximum number of new points sampled via binary search. Defaults to 7.
+        num_expansions (int, optional): maximum number of expansions (not counted towards binary search points). Defaults to 7.
+        rounding (int, optional): rounding is to significant digits, avoids evaluating points that are too close.
+        lb (float | None, optional): lower bound. If ``log_scale=True``, should be in log10 scale.
+        ub (float | None, optional): upper bound. If ``log_scale=True``, should be in log10 scale.
+        log_scale (bool, optional):
+            whether to minimize in log10 scale. If true, it is assumed that
+            ``grid``, ``lb`` and ``ub`` are given in log10 scale.
+    Example:
+    ```python
+    def objective(x: float):
+        x = x * 4
+        return -(np.sin(x) * (x / 3) + np.cos(x*2.5) * 2 - 0.05 * (x-5)**2)
+    mbs = MBS(grid=[-1, 0, 1, 2, 3, 4], step=1, num_binary=10, num_expansions=10)
+    x, f, trials = mbs.run(objective)
+    # x - solution
+    # f - value at solution x
+    # trials - list of trials, each trial is a named tuple: Trial(x, f)
+    """
+    def __init__(
+        self,
+        grid: Iterable[float],
+        step: float,
+        num_candidates: int = 3,
+        num_binary: int = 20,
+        num_expansions: int = 20,
+        rounding: int| None = 2,
+        lb = None,
+        ub = None,
+        log_scale: bool = False,
+    ):
+        self.objectives: dict[int, dict[float,float]] = {}
+        """dictionary of objectives, each maps point (x) to value (v)"""
+        self.evaluated: set[float] = set()
+        """set of evaluated points (x)"""
+        grid = tuple(grid)
+        if len(grid) == 0: raise ValueError("At least one grid search point must be specified")
+        self.grid = sorted(grid)
+        self.step = step
+        self.num_candidates = num_candidates
+        self.num_binary = num_binary
+        self.num_expansions = num_expansions
+        self.rounding = rounding
+        self.log_scale = log_scale
+        self.lb = lb
+        self.ub = ub
+    def _get_best_x(self, n: int, objective: int):
+        """n best points"""
+        obj = self.objectives[objective]
+        v_to_x = [(v,x) for x,v in obj.items()]
+        v_to_x.sort(key = lambda vx: vx[0])
+        xs = [x for v,x in v_to_x]
+        return xs[:n]
+    def _suggest_points_around(self, x: float, objective: int):
+        """suggests points around x"""
+        points = list(self.objectives[objective].keys())
+        points.sort()
+        if x not in points: raise RuntimeError(f"{x} not in {points}")
+        expansions = []
+        if x == points[0]:
+            expansions.append((x-self.step, 'expansion'))
+        if x == points[-1]:
+            expansions.append((x+self.step, 'expansion'))
+        if len(expansions) != 0: return expansions
+        idx = points.index(x)
+        xm = points[idx-1]
+        xp = points[idx+1]
+        x1 = (x - (x - xm)/2)
+        x2 = (x + (xp - x)/2)
+        return [(x1, 'binary'), (x2, 'binary')]
+    def _out_of_bounds(self, x):
+        if self.lb is not None and x < self.lb: return True
+        if self.ub is not None and x > self.ub: return True
+        return False
+    def _evaluate(self, fn, x):
+        """Evaluate a point, returns False if point is already in history"""
+        if self.rounding is not None: x = format_number(x, self.rounding)
+        if x in self.evaluated: return False
+        if self._out_of_bounds(x): return False
+        self.evaluated.add(x)
+        if self.log_scale: vals = _tofloatlist(fn(10 ** x))
+        else: vals = _tofloatlist(fn(x))
+        vals = [_nonfinite_to_inf(v) for v in vals]
+        for idx, v in enumerate(vals):
+            if idx not in self.objectives: self.objectives[idx] = {}
+            self.objectives[idx][x] = v
+        return True
+    def run(self, fn) -> Solution:
+        # step 1 - gr id search
+        for x in self.grid:
+            self._evaluate(fn, x)
+        # step 2 - binary search
+        while True:
+            if (self.num_candidates <= 0) or (self.num_expansions <= 0 and self.num_binary <= 0): break
+            # suggest candidates
+            candidates: list[tuple[float, str]] = []
+            # sample around best points
+            for objective in self.objectives:
+                best_points = self._get_best_x(self.num_candidates, objective)
+                for p in best_points:
+                    candidates.extend(self._suggest_points_around(p, objective=objective))
+            # filter
+            if self.num_expansions <= 0:
+                candidates = [(x,t) for x,t in candidates if t != 'expansion']
+            if self.num_candidates <= 0:
+                candidates = [(x,t) for x,t in candidates if t != 'binary']
+            # if expansion was suggested, discard anything else
+            types = [t for x, t in candidates]
+            if any(t == 'expansion' for t in types):
+                candidates = [(x,t) for x,t in candidates if t == 'expansion']
+            # evaluate candidates
+            terminate = False
+            at_least_one_evaluated = False
+            for x, t in candidates:
+                evaluated = self._evaluate(fn, x)
+                if not evaluated: continue
+                at_least_one_evaluated = True
+                if t == 'expansion': self.num_expansions -= 1
+                elif t == 'binary': self.num_binary -= 1
+                if self.num_binary < 0:
+                    terminate = True
+                    break
+            if terminate: break
+            if not at_least_one_evaluated:
+                if self.rounding is None: break
+                self.rounding += 1
+                if self.rounding == 100: break
+        # create dict[float, tuple[float,...]]
+        ret = {}
+        for i, objective in enumerate(self.objectives.values()):
+            for x, v in objective.items():
+                if self.log_scale: x = 10 ** x
+                if x not in ret: ret[x] = [None for _ in self.objectives]
+                ret[x][i] = v
+        for v in ret.values():
+            assert len(v) == len(self.objectives), v
+            assert all(i is not None for i in v), v
+        # ret maps x to list of per-objective values, e.g. {1: [0.1, 0.3], ...}
+        # now make a list of trials as they are easier to work with
+        trials: list[Trial] = []
+        for x, values in ret.items():
+            trials.append(Trial(x=x, f=values))
+        # sort trials by sum of values
+        trials.sort(key = lambda trial: sum(trial.f))
+        return Solution(x=trials[0].x, f=trials[0].f, trials=trials)
+def mbs_minimize(
+    fn,
+    grid: Iterable[float],
+    step: float,
+    num_candidates: int = 3,
+    num_binary: int = 20,
+    num_expansions: int = 20,
+    rounding=2,
+    lb:float | None = None,
+    ub:float | None = None,
+    log_scale=False,
+) -> Solution:
+    """minimize univariate function via MBS.
+    Args:
+        fn (function): objective function that accepts a float and returns a float or a sequence of floats to minimize.
+        step (float, optional): expansion step size. Defaults to 1.
+        num_candidates (int, optional): number of best points to sample new points around on each iteration. Defaults to 2.
+        num_binary (int, optional): maximum number of new points sampled via binary search. Defaults to 7.
+        num_expansions (int, optional): maximum number of expansions (not counted towards binary search points). Defaults to 7.
+        rounding (int, optional): rounding is to significant digits, avoids evaluating points that are too close.
+        lb (float | None, optional): lower bound. If ``log_scale=True``, should be in log10 scale.
+        ub (float | None, optional): upper bound. If ``log_scale=True``, should be in log10 scale.
+        log_scale (bool, optional):
+            whether to minimize in log10 scale. If true, it is assumed that
+            ``grid``, ``lb`` and ``ub`` are given in log10 scale.
+    Example:
+    ```python
+    def objective(x: float):
+        x = x * 4
+        return -(np.sin(x) * (x / 3) + np.cos(x*2.5) * 2 - 0.05 * (x-5)**2)
+    x, f, trials = mbs_minimize(objective, grid=[-1, 0, 1, 2, 3, 4], step=1, num_binary=10, num_expansions=10)
+    # x - solution
+    # f - value at solution x
+    # trials - list of trials, each trial is a named tuple: Trial(x, f)
+    """
+    mbs = MBS(grid, step=step, num_candidates=num_candidates, num_binary=num_binary, num_expansions=num_expansions, rounding=rounding, lb=lb, ub=ub, log_scale=log_scale)
+    return mbs.run(fn)

torchzero/optim/root.py CHANGED Viewed

@@ -3,7 +3,7 @@ from collections.abc import Callable
 from abc import abstractmethod
 import torch
-from ..modules.higher_order.multipoint import sixth_order_im1, sixth_order_p6, _solve
+from ..modules.second_order.multipoint import sixth_order_3p, sixth_order_5p, two_point_newton, sixth_order_3pm2, _solve
 def make_evaluate(f: Callable[[torch.Tensor], torch.Tensor]):
     def evaluate(x, order) -> tuple[torch.Tensor, ...]:
@@ -53,7 +53,7 @@ class Newton(RootBase):
     def one_iteration(self, x, evaluate): return newton(x, evaluate, self.lstsq)
-class SixthOrderP6(RootBase):
+class SixthOrder3P(RootBase):
     """sixth-order iterative method
     Abro, Hameer Akhtar, and Muhammad Mujtaba Shaikh. "A new time-efficient and convergent nonlinear solver." Applied Mathematics and Computation 355 (2019): 516-536.
@@ -62,4 +62,4 @@ class SixthOrderP6(RootBase):
     def one_iteration(self, x, evaluate):
         def f(x): return evaluate(x, 0)[0]
         def f_j(x): return evaluate(x, 1)
-        return sixth_order_p6(x, f, f_j, self.lstsq)
+        return sixth_order_3p(x, f, f_j, self.lstsq)

torchzero/optim/utility/split.py CHANGED Viewed

@@ -3,7 +3,8 @@ from collections.abc import Callable, Iterable
 import torch
-from ...utils import flatten, get_params
+from ...utils import flatten
+from ...utils.optimizer import get_params
 class Split(torch.optim.Optimizer):
     """Steps will all `optimizers`, also has a check that they have no duplicate parameters.

torchzero 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl