PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

tests/test_identical.py +22 -22
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +225 -214
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +2 -2
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +53 -57
torchzero/core/module.py +132 -52
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +11 -0
torchzero/linalg/eigh.py +253 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +93 -0
torchzero/{utils/linalg → linalg}/qr.py +16 -2
torchzero/{utils/linalg → linalg}/solve.py +74 -88
torchzero/linalg/svd.py +47 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +167 -217
torchzero/modules/adaptive/adahessian.py +76 -105
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +50 -31
torchzero/modules/adaptive/adaptive_heavyball.py +12 -7
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +7 -11
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +7 -7
torchzero/modules/adaptive/matrix_momentum.py +48 -52
torchzero/modules/adaptive/msam.py +71 -53
torchzero/modules/adaptive/muon.py +67 -129
torchzero/modules/adaptive/natural_gradient.py +63 -41
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +149 -130
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +22 -25
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +27 -38
torchzero/modules/experimental/__init__.py +7 -6
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +23 -54
torchzero/modules/experimental/newtonnewton.py +45 -48
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +24 -21
torchzero/modules/least_squares/gn.py +121 -50
torchzero/modules/line_search/backtracking.py +4 -4
torchzero/modules/line_search/line_search.py +33 -33
torchzero/modules/line_search/strong_wolfe.py +4 -4
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +11 -79
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +121 -123
torchzero/modules/misc/multistep.py +52 -53
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +31 -29
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +37 -31
torchzero/modules/momentum/momentum.py +12 -12
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +20 -20
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/{functional.py → opt_utils.py} +1 -1
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +46 -43
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +2 -2
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +10 -10
torchzero/modules/quasi_newton/lsr1.py +10 -10
torchzero/modules/quasi_newton/quasi_newton.py +54 -39
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +39 -37
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +57 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +165 -196
torchzero/modules/second_order/newton_cg.py +105 -157
torchzero/modules/second_order/nystrom.py +216 -185
torchzero/modules/second_order/rsn.py +132 -125
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +10 -10
torchzero/modules/step_size/adaptive.py +24 -24
torchzero/modules/step_size/lr.py +17 -17
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +3 -3
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +2 -2
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +23 -21
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +17 -18
torchzero/modules/wrappers/optim_wrapper.py +14 -14
torchzero/modules/zeroth_order/cd.py +10 -7
torchzero/optim/mbs.py +291 -0
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -13
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +8 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +97 -73
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/adaptive/lmadagrad.py +0 -186
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

torchzero/modules/ops/binary.py CHANGED Viewed

@@ -6,8 +6,8 @@ from typing import Any
 import torch
-from ...core import Chainable, Module, Target, Var, maybe_chain
-from ...utils import TensorList, tensorlist
+from ...core import Chainable, Module, Objective
+from ...utils import TensorList
 class BinaryOperationBase(Module, ABC):
@@ -25,263 +25,264 @@ class BinaryOperationBase(Module, ABC):
                 self.operands[k] = v
     @abstractmethod
-    def transform(self, var: Var, update: list[torch.Tensor], **operands: Any | list[torch.Tensor]) -> Iterable[torch.Tensor]:
+    def transform(self, objective: Objective, update: list[torch.Tensor], **operands: Any | list[torch.Tensor]) -> Iterable[torch.Tensor]:
         """applies the operation to operands"""
         raise NotImplementedError
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var: Var) -> Var:
+    def step(self, objective: Objective) -> Objective:
         # pass cloned update to all module operands
         processed_operands: dict[str, Any | list[torch.Tensor]] = self.operands.copy()
         for k,v in self.operands.items():
             if k in self.children:
                 v: Module
-                updated_var = v.step(var.clone(clone_update=True))
-                processed_operands[k] = updated_var.get_update()
-                var.update_attrs_from_clone_(updated_var) # update loss, grad, etc if this module calculated them
+                updated_obj = v.step(objective.clone(clone_updates=True))
+                processed_operands[k] = updated_obj.get_updates()
+                objective.update_attrs_from_clone_(updated_obj) # update loss, grad, etc if this module calculated them
-        transformed = self.transform(var, update=var.get_update(), **processed_operands)
-        var.update = list(transformed)
-        return var
+        transformed = self.transform(objective, update=objective.get_updates(), **processed_operands)
+        objective.updates = list(transformed)
+        return objective
 class Add(BinaryOperationBase):
-    """Add :code:`other` to tensors. :code:`other` can be a number or a module.
+    """Add ``other`` to tensors. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`tensors + other(tensors)`
+    If ``other`` is a module, this calculates ``tensors + other(tensors)``
     """
     def __init__(self, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         if isinstance(other, (int,float)): torch._foreach_add_(update, other * self.defaults['alpha'])
         else: torch._foreach_add_(update, other, alpha=self.defaults['alpha'])
         return update
 class Sub(BinaryOperationBase):
-    """Subtract :code:`other` from tensors. :code:`other` can be a number or a module.
+    """Subtract ``other`` from tensors. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`tensors - other(tensors)`
+    If ``other`` is a module, this calculates :code:`tensors - other(tensors)`
     """
     def __init__(self, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         if isinstance(other, (int,float)): torch._foreach_sub_(update, other * self.defaults['alpha'])
         else: torch._foreach_sub_(update, other, alpha=self.defaults['alpha'])
         return update
 class RSub(BinaryOperationBase):
-    """Subtract tensors from :code:`other`. :code:`other` can be a number or a module.
+    """Subtract tensors from ``other``. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`other(tensors) - tensors`
+    If ``other`` is a module, this calculates ``other(tensors) - tensors``
     """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         return other - TensorList(update)
 class Mul(BinaryOperationBase):
-    """Multiply tensors by :code:`other`. :code:`other` can be a number or a module.
+    """Multiply tensors by ``other``. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`tensors * other(tensors)`
+    If ``other`` is a module, this calculates ``tensors * other(tensors)``
     """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         torch._foreach_mul_(update, other)
         return update
 class Div(BinaryOperationBase):
-    """Divide tensors by :code:`other`. :code:`other` can be a number or a module.
+    """Divide tensors by ``other``. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`tensors / other(tensors)`
+    If ``other`` is a module, this calculates ``tensors / other(tensors)``
     """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         torch._foreach_div_(update, other)
         return update
 class RDiv(BinaryOperationBase):
-    """Divide :code:`other` by tensors. :code:`other` can be a number or a module.
+    """Divide ``other`` by tensors. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`other(tensors) / tensors`
+    If ``other`` is a module, this calculates ``other(tensors) / tensors``
     """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         return other / TensorList(update)
 class Pow(BinaryOperationBase):
-    """Take tensors to the power of :code:`exponent`. :code:`exponent` can be a number or a module.
+    """Take tensors to the power of ``exponent``. ``exponent`` can be a number or a module.
-    If :code:`exponent` is a module, this calculates :code:`tensors ^ exponent(tensors)`
+    If ``exponent`` is a module, this calculates ``tensors ^ exponent(tensors)``
     """
     def __init__(self, exponent: Chainable | float):
         super().__init__({}, exponent=exponent)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], exponent: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], exponent: float | list[torch.Tensor]):
         torch._foreach_pow_(update, exponent)
         return update
 class RPow(BinaryOperationBase):
-    """Take :code:`other` to the power of tensors. :code:`other` can be a number or a module.
+    """Take ``other`` to the power of tensors. ``other`` can be a number or a module.
-    If :code:`other` is a module, this calculates :code:`other(tensors) ^ tensors`
+    If ``other`` is a module, this calculates ``other(tensors) ^ tensors``
     """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         if isinstance(other, (int, float)): return torch._foreach_pow(other, update) # no in-place
         torch._foreach_pow_(other, update)
         return other
 class Lerp(BinaryOperationBase):
-    """Does a linear interpolation of tensors and :code:`end` module based on a scalar :code:`weight`.
+    """Does a linear interpolation of tensors and ``end`` module based on a scalar ``weight``.
-    The output is given by :code:`output = tensors + weight * (end(tensors) - tensors)`
+    The output is given by ``output = tensors + weight * (end(tensors) - tensors)``
     """
     def __init__(self, end: Chainable, weight: float):
         defaults = dict(weight=weight)
         super().__init__(defaults, end=end)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], end: list[torch.Tensor]):
-        torch._foreach_lerp_(update, end, weight=self.get_settings(var.params, 'weight'))
+    def transform(self, objective, update: list[torch.Tensor], end: list[torch.Tensor]):
+        torch._foreach_lerp_(update, end, weight=self.get_settings(objective.params, 'weight'))
         return update
 class CopySign(BinaryOperationBase):
-    """Returns tensors with sign copied from :code:`other(tensors)`."""
+    """Returns tensors with sign copied from ``other(tensors)``."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: list[torch.Tensor]):
         return [u.copysign_(o) for u, o in zip(update, other)]
 class RCopySign(BinaryOperationBase):
-    """Returns :code:`other(tensors)` with sign copied from tensors."""
+    """Returns ``other(tensors)`` with sign copied from tensors."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: list[torch.Tensor]):
         return [o.copysign_(u) for u, o in zip(update, other)]
 CopyMagnitude = RCopySign
 class Clip(BinaryOperationBase):
-    """clip tensors to be in  :code:`(min, max)` range. :code:`min` and :code:`max: can be None, numbers or modules.
+    """clip tensors to be in  ``(min, max)`` range. ``min`` and ``max`: can be None, numbers or modules.
-    If code:`min` and :code:`max`:  are modules, this calculates :code:`tensors.clip(min(tensors), max(tensors))`.
+    If ``min`` and ``max``  are modules, this calculates ``tensors.clip(min(tensors), max(tensors))``.
     """
     def __init__(self, min: float | Chainable | None = None, max: float | Chainable | None = None):
         super().__init__({}, min=min, max=max)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], min: float | list[torch.Tensor] | None, max: float | list[torch.Tensor] | None):
+    def transform(self, objective, update: list[torch.Tensor], min: float | list[torch.Tensor] | None, max: float | list[torch.Tensor] | None):
         return TensorList(update).clamp_(min=min,  max=max)
 class MirroredClip(BinaryOperationBase):
-    """clip tensors to be in  :code:`(-value, value)` range. :code:`value` can be a number or a module.
+    """clip tensors to be in  ``(-value, value)`` range. ``value`` can be a number or a module.
-    If :code:`value` is a module, this calculates :code:`tensors.clip(-value(tensors), value(tensors))`
+    If ``value`` is a module, this calculates ``tensors.clip(-value(tensors), value(tensors))``
     """
     def __init__(self, value: float | Chainable):
         super().__init__({}, value=value)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], value: float | list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], value: float | list[torch.Tensor]):
         min = -value if isinstance(value, (int,float)) else [-v for v in value]
         return TensorList(update).clamp_(min=min,  max=value)
-class Graft(BinaryOperationBase):
-    """Outputs tensors rescaled to have the same norm as :code:`magnitude(tensors)`."""
+class GraftInputToOutput(BinaryOperationBase):
+    """Outputs ``tensors`` rescaled to have the same norm as ``magnitude(tensors)``."""
     def __init__(self, magnitude: Chainable, tensorwise:bool=True, ord:float=2, eps:float = 1e-6):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps)
         super().__init__(defaults, magnitude=magnitude)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], magnitude: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], magnitude: list[torch.Tensor]):
         tensorwise, ord, eps = itemgetter('tensorwise','ord','eps')(self.defaults)
         return TensorList(update).graft_(magnitude, tensorwise=tensorwise, ord=ord, eps=eps)
-class RGraft(BinaryOperationBase):
-    """Outputs :code:`magnitude(tensors)` rescaled to have the same norm as tensors"""
+class GraftOutputToInput(BinaryOperationBase):
+    """Outputs ``magnitude(tensors)`` rescaled to have the same norm as ``tensors``"""
     def __init__(self, direction: Chainable, tensorwise:bool=True, ord:float=2, eps:float = 1e-6):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps)
         super().__init__(defaults, direction=direction)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], direction: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], direction: list[torch.Tensor]):
         tensorwise, ord, eps = itemgetter('tensorwise','ord','eps')(self.defaults)
         return TensorList(direction).graft_(update, tensorwise=tensorwise, ord=ord, eps=eps)
-GraftToUpdate = RGraft
 class Maximum(BinaryOperationBase):
-    """Outputs :code:`maximum(tensors, other(tensors))`"""
+    """Outputs ``maximum(tensors, other(tensors))``"""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: list[torch.Tensor]):
         torch._foreach_maximum_(update, other)
         return update
 class Minimum(BinaryOperationBase):
-    """Outputs :code:`minimum(tensors, other(tensors))`"""
+    """Outputs ``minimum(tensors, other(tensors))``"""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: list[torch.Tensor]):
         torch._foreach_minimum_(update, other)
         return update
 class GramSchimdt(BinaryOperationBase):
-    """outputs tensors made orthogonal to `other(tensors)` via Gram-Schmidt."""
+    """outputs tensors made orthogonal to ``other(tensors)`` via Gram-Schmidt."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
+    def transform(self, objective, update: list[torch.Tensor], other: list[torch.Tensor]):
         update = TensorList(update); other = TensorList(other)
         min = torch.finfo(update[0].dtype).tiny * 2
         return update - (other*update) / (other*other).clip(min=min)
 class Threshold(BinaryOperationBase):
-    """Outputs tensors thresholded such that values above :code:`threshold` are set to :code:`value`."""
+    """Outputs tensors thresholded such that values above ``threshold`` are set to ``value``."""
     def __init__(self, threshold: Chainable | float, value: Chainable | float, update_above: bool):
         defaults = dict(update_above=update_above)
         super().__init__(defaults, threshold=threshold, value=value)
     @torch.no_grad
-    def transform(self, var, update: list[torch.Tensor], threshold: list[torch.Tensor] | float, value: list[torch.Tensor] | float):
+    def transform(self, objective, update: list[torch.Tensor], threshold: list[torch.Tensor] | float, value: list[torch.Tensor] | float):
         update_above = self.defaults['update_above']
         update = TensorList(update)
         if update_above:
-            if isinstance(value, list): return update.where_(update>threshold, value)
+            if isinstance(value, list): return update.where(update>threshold, value)
             return update.masked_fill_(update<=threshold, value)
-        if isinstance(value, list): return update.where_(update<threshold, value)
+        if isinstance(value, list): return update.where(update<threshold, value)
         return update.masked_fill_(update>=threshold, value)

torchzero/modules/ops/higher_level.py CHANGED Viewed

@@ -4,9 +4,9 @@ from typing import Literal
 import torch
-from ...core import Target, Transform
+from ...core import  TensorTransform
 from ...utils import NumberList, TensorList, unpack_dicts, unpack_states
-from ..functional import (
+from ..opt_utils import (
     centered_ema_sq_,
     debias,
     debias_second_momentum,
@@ -17,7 +17,7 @@ from ..functional import (
 )
-class EMASquared(Transform):
+class EMASquared(TensorTransform):
     """Maintains an exponential moving average of squared updates.
     Args:
@@ -29,10 +29,10 @@ class EMASquared(Transform):
     def __init__(self, beta:float=0.999, amsgrad=False, pow:float=2):
         defaults = dict(beta=beta,pow=pow,amsgrad=amsgrad)
-        super().__init__(defaults, uses_grad=False)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         amsgrad, pow = itemgetter('amsgrad', 'pow')(self.settings[params[0]])
         beta = NumberList(s['beta'] for s in settings)
@@ -44,7 +44,7 @@ class EMASquared(Transform):
         return self.EMA_SQ_FN(TensorList(tensors), exp_avg_sq_=exp_avg_sq, beta=beta, max_exp_avg_sq_=max_exp_avg_sq, pow=pow).clone()
-class SqrtEMASquared(Transform):
+class SqrtEMASquared(TensorTransform):
     """Maintains an exponential moving average of squared updates, outputs optionally debiased square root.
     Args:
@@ -56,11 +56,11 @@ class SqrtEMASquared(Transform):
     SQRT_EMA_SQ_FN: staticmethod = staticmethod(sqrt_ema_sq_)
     def __init__(self, beta:float=0.999, amsgrad=False, debiased: bool = False, pow:float=2,):
         defaults = dict(beta=beta,pow=pow,amsgrad=amsgrad,debiased=debiased)
-        super().__init__(defaults, uses_grad=False)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         amsgrad, pow, debiased = itemgetter('amsgrad', 'pow', 'debiased')(settings[0])
@@ -83,7 +83,7 @@ class SqrtEMASquared(Transform):
         )
-class Debias(Transform):
+class Debias(TensorTransform):
     """Multiplies the update by an Adam debiasing term based first and/or second momentum.
     Args:
@@ -95,12 +95,12 @@ class Debias(Transform):
         pow (float, optional): power, assumes absolute value is used. Defaults to 2.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, beta1: float | None = None, beta2: float | None = None, alpha: float = 1, pow:float=2, target: Target = 'update',):
+    def __init__(self, beta1: float | None = None, beta2: float | None = None, alpha: float = 1, pow:float=2):
         defaults = dict(beta1=beta1, beta2=beta2, alpha=alpha, pow=pow)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         pow = settings[0]['pow']
@@ -108,7 +108,7 @@ class Debias(Transform):
         return debias(TensorList(tensors), step=step, beta1=beta1, beta2=beta2, alpha=alpha, pow=pow, inplace=True)
-class Debias2(Transform):
+class Debias2(TensorTransform):
     """Multiplies the update by an Adam debiasing term based on the second momentum.
     Args:
@@ -117,19 +117,19 @@ class Debias2(Transform):
         pow (float, optional): power, assumes absolute value is used. Defaults to 2.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, beta: float = 0.999, pow: float = 2, target: Target = 'update',):
+    def __init__(self, beta: float = 0.999, pow: float = 2,):
         defaults = dict(beta=beta, pow=pow)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         pow = settings[0]['pow']
         beta = NumberList(s['beta'] for s in settings)
         return debias_second_momentum(TensorList(tensors), step=step, beta=beta, pow=pow, inplace=True)
-class CenteredEMASquared(Transform):
+class CenteredEMASquared(TensorTransform):
     """Maintains a centered exponential moving average of squared updates. This also maintains an additional
     exponential moving average of un-squared updates, square of which is subtracted from the EMA.
@@ -143,7 +143,7 @@ class CenteredEMASquared(Transform):
         super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         amsgrad, pow = itemgetter('amsgrad', 'pow')(settings[0])
         beta = NumberList(s['beta'] for s in settings)
@@ -162,7 +162,7 @@ class CenteredEMASquared(Transform):
             pow=pow,
         ).clone()
-class CenteredSqrtEMASquared(Transform):
+class CenteredSqrtEMASquared(TensorTransform):
     """Maintains a centered exponential moving average of squared updates, outputs optionally debiased square root.
     This also maintains an additional exponential moving average of un-squared updates, square of which is subtracted from the EMA.
@@ -177,7 +177,7 @@ class CenteredSqrtEMASquared(Transform):
         super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         amsgrad, pow, debiased = itemgetter('amsgrad', 'pow', 'debiased')(settings[0])

torchzero 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl