PyPI - torchzero - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

torchzero 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

docs/source/conf.py +6 -4
docs/source/docstring template.py +46 -0
tests/test_identical.py +2 -3
tests/test_opts.py +64 -50
tests/test_vars.py +1 -0
torchzero/core/module.py +138 -6
torchzero/core/transform.py +158 -51
torchzero/modules/__init__.py +3 -2
torchzero/modules/clipping/clipping.py +114 -17
torchzero/modules/clipping/ema_clipping.py +27 -13
torchzero/modules/clipping/growth_clipping.py +8 -7
torchzero/modules/experimental/__init__.py +22 -5
torchzero/modules/experimental/absoap.py +5 -2
torchzero/modules/experimental/adadam.py +8 -2
torchzero/modules/experimental/adamY.py +8 -2
torchzero/modules/experimental/adam_lambertw.py +149 -0
torchzero/modules/{line_search/trust_region.py → experimental/adaptive_step_size.py} +21 -4
torchzero/modules/experimental/adasoap.py +7 -2
torchzero/modules/experimental/cosine.py +214 -0
torchzero/modules/experimental/cubic_adam.py +97 -0
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/experimental/eigendescent.py +4 -1
torchzero/modules/experimental/etf.py +32 -9
torchzero/modules/experimental/exp_adam.py +113 -0
torchzero/modules/experimental/expanded_lbfgs.py +141 -0
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/hnewton.py +85 -0
torchzero/modules/{quasi_newton/experimental → experimental}/modular_lbfgs.py +27 -28
torchzero/modules/experimental/newtonnewton.py +7 -3
torchzero/modules/experimental/parabolic_search.py +220 -0
torchzero/modules/experimental/reduce_outward_lr.py +4 -4
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +12 -54
torchzero/modules/experimental/subspace_preconditioners.py +11 -4
torchzero/modules/experimental/{tada.py → tensor_adagrad.py} +10 -6
torchzero/modules/functional.py +12 -2
torchzero/modules/grad_approximation/fdm.py +30 -3
torchzero/modules/grad_approximation/forward_gradient.py +13 -3
torchzero/modules/grad_approximation/grad_approximator.py +51 -6
torchzero/modules/grad_approximation/rfdm.py +285 -38
torchzero/modules/higher_order/higher_order_newton.py +152 -89
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/adaptive.py +99 -0
torchzero/modules/line_search/backtracking.py +34 -9
torchzero/modules/line_search/line_search.py +70 -12
torchzero/modules/line_search/polynomial.py +233 -0
torchzero/modules/line_search/scipy.py +2 -2
torchzero/modules/line_search/strong_wolfe.py +34 -7
torchzero/modules/misc/__init__.py +27 -0
torchzero/modules/{ops → misc}/debug.py +24 -1
torchzero/modules/misc/escape.py +60 -0
torchzero/modules/misc/gradient_accumulation.py +70 -0
torchzero/modules/misc/misc.py +316 -0
torchzero/modules/misc/multistep.py +158 -0
torchzero/modules/misc/regularization.py +171 -0
torchzero/modules/{ops → misc}/split.py +29 -1
torchzero/modules/{ops → misc}/switch.py +44 -3
torchzero/modules/momentum/__init__.py +1 -1
torchzero/modules/momentum/averaging.py +6 -6
torchzero/modules/momentum/cautious.py +45 -8
torchzero/modules/momentum/ema.py +7 -7
torchzero/modules/momentum/experimental.py +2 -2
torchzero/modules/momentum/matrix_momentum.py +90 -63
torchzero/modules/momentum/momentum.py +2 -1
torchzero/modules/ops/__init__.py +3 -31
torchzero/modules/ops/accumulate.py +6 -10
torchzero/modules/ops/binary.py +72 -26
torchzero/modules/ops/multi.py +77 -16
torchzero/modules/ops/reduce.py +15 -7
torchzero/modules/ops/unary.py +29 -13
torchzero/modules/ops/utility.py +20 -12
torchzero/modules/optimizers/__init__.py +12 -3
torchzero/modules/optimizers/adagrad.py +23 -13
torchzero/modules/optimizers/adahessian.py +223 -0
torchzero/modules/optimizers/adam.py +7 -6
torchzero/modules/optimizers/adan.py +110 -0
torchzero/modules/optimizers/adaptive_heavyball.py +57 -0
torchzero/modules/optimizers/esgd.py +171 -0
torchzero/modules/{experimental/spectral.py → optimizers/ladagrad.py} +91 -71
torchzero/modules/optimizers/lion.py +1 -1
torchzero/modules/optimizers/mars.py +91 -0
torchzero/modules/optimizers/msam.py +186 -0
torchzero/modules/optimizers/muon.py +30 -5
torchzero/modules/optimizers/orthograd.py +1 -1
torchzero/modules/optimizers/rmsprop.py +7 -4
torchzero/modules/optimizers/rprop.py +42 -8
torchzero/modules/optimizers/sam.py +163 -0
torchzero/modules/optimizers/shampoo.py +39 -5
torchzero/modules/optimizers/soap.py +29 -19
torchzero/modules/optimizers/sophia_h.py +71 -14
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +3 -1
torchzero/modules/projections/projection.py +188 -94
torchzero/modules/quasi_newton/__init__.py +12 -2
torchzero/modules/quasi_newton/cg.py +160 -59
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +163 -0
torchzero/modules/quasi_newton/lbfgs.py +154 -97
torchzero/modules/quasi_newton/lsr1.py +101 -57
torchzero/modules/quasi_newton/quasi_newton.py +863 -215
torchzero/modules/quasi_newton/trust_region.py +397 -0
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/newton.py +220 -41
torchzero/modules/second_order/newton_cg.py +300 -11
torchzero/modules/second_order/nystrom.py +104 -1
torchzero/modules/smoothing/gaussian.py +34 -0
torchzero/modules/smoothing/laplacian.py +14 -4
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +122 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +89 -7
torchzero/modules/wrappers/optim_wrapper.py +29 -1
torchzero/optim/wrappers/directsearch.py +39 -2
torchzero/optim/wrappers/fcmaes.py +21 -13
torchzero/optim/wrappers/mads.py +5 -6
torchzero/optim/wrappers/nevergrad.py +16 -1
torchzero/optim/wrappers/optuna.py +1 -1
torchzero/optim/wrappers/scipy.py +5 -3
torchzero/utils/__init__.py +2 -2
torchzero/utils/derivatives.py +3 -3
torchzero/utils/linalg/__init__.py +1 -1
torchzero/utils/linalg/solve.py +251 -12
torchzero/utils/numberlist.py +2 -0
torchzero/utils/python_tools.py +10 -0
torchzero/utils/tensorlist.py +40 -28
{torchzero-0.3.10.dist-info → torchzero-0.3.11.dist-info}/METADATA +65 -40
torchzero-0.3.11.dist-info/RECORD +159 -0
torchzero/modules/experimental/diagonal_higher_order_newton.py +0 -225
torchzero/modules/experimental/soapy.py +0 -163
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/adaptive.py +0 -93
torchzero/modules/lr/lr.py +0 -63
torchzero/modules/ops/misc.py +0 -418
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero-0.3.10.dist-info/RECORD +0 -139
{torchzero-0.3.10.dist-info → torchzero-0.3.11.dist-info}/WHEEL +0 -0
{torchzero-0.3.10.dist-info → torchzero-0.3.11.dist-info}/licenses/LICENSE +0 -0
{torchzero-0.3.10.dist-info → torchzero-0.3.11.dist-info}/top_level.txt +0 -0

torchzero/modules/ops/binary.py CHANGED Viewed

@@ -1,5 +1,4 @@
 #pyright: reportIncompatibleMethodOverride=false
-""""""
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sequence
 from operator import itemgetter
@@ -11,7 +10,7 @@ from ...core import Chainable, Module, Target, Var, maybe_chain
 from ...utils import TensorList, tensorlist
-class BinaryOperation(Module, ABC):
+class BinaryOperationBase(Module, ABC):
     """Base class for operations that use update as the first operand. This is an abstract class, subclass it and override `transform` method to use it."""
     def __init__(self, defaults: dict[str, Any] | None, **operands: Chainable | Any):
         super().__init__(defaults=defaults)
@@ -47,7 +46,11 @@ class BinaryOperation(Module, ABC):
         return var
-class Add(BinaryOperation):
+class Add(BinaryOperationBase):
+    """Add :code:`other` to tensors. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`tensors + other(tensors)`
+    """
     def __init__(self, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, other=other)
@@ -58,7 +61,11 @@ class Add(BinaryOperation):
         else: torch._foreach_add_(update, other, alpha=self.settings[var.params[0]]['alpha'])
         return update
-class Sub(BinaryOperation):
+class Sub(BinaryOperationBase):
+    """Subtract :code:`other` from tensors. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`tensors - other(tensors)`
+    """
     def __init__(self, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, other=other)
@@ -69,7 +76,11 @@ class Sub(BinaryOperation):
         else: torch._foreach_sub_(update, other, alpha=self.settings[var.params[0]]['alpha'])
         return update
-class RSub(BinaryOperation):
+class RSub(BinaryOperationBase):
+    """Subtract tensors from :code:`other`. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`other(tensors) - tensors`
+    """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
@@ -77,7 +88,11 @@ class RSub(BinaryOperation):
     def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         return other - TensorList(update)
-class Mul(BinaryOperation):
+class Mul(BinaryOperationBase):
+    """Multiply tensors by :code:`other`. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`tensors * other(tensors)`
+    """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
@@ -86,7 +101,11 @@ class Mul(BinaryOperation):
         torch._foreach_mul_(update, other)
         return update
-class Div(BinaryOperation):
+class Div(BinaryOperationBase):
+    """Divide tensors by :code:`other`. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`tensors / other(tensors)`
+    """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
@@ -95,7 +114,11 @@ class Div(BinaryOperation):
         torch._foreach_div_(update, other)
         return update
-class RDiv(BinaryOperation):
+class RDiv(BinaryOperationBase):
+    """Divide :code:`other` by tensors. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`other(tensors) / tensors`
+    """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
@@ -103,7 +126,11 @@ class RDiv(BinaryOperation):
     def transform(self, var, update: list[torch.Tensor], other: float | list[torch.Tensor]):
         return other / TensorList(update)
-class Pow(BinaryOperation):
+class Pow(BinaryOperationBase):
+    """Take tensors to the power of :code:`exponent`. :code:`exponent` can be a number or a module.
+    If :code:`exponent` is a module, this calculates :code:`tensors ^ exponent(tensors)`
+    """
     def __init__(self, exponent: Chainable | float):
         super().__init__({}, exponent=exponent)
@@ -112,7 +139,11 @@ class Pow(BinaryOperation):
         torch._foreach_pow_(update, exponent)
         return update
-class RPow(BinaryOperation):
+class RPow(BinaryOperationBase):
+    """Take :code:`other` to the power of tensors. :code:`other` can be a number or a module.
+    If :code:`other` is a module, this calculates :code:`other(tensors) ^ tensors`
+    """
     def __init__(self, other: Chainable | float):
         super().__init__({}, other=other)
@@ -122,7 +153,11 @@ class RPow(BinaryOperation):
         torch._foreach_pow_(other, update)
         return other
-class Lerp(BinaryOperation):
+class Lerp(BinaryOperationBase):
+    """Does a linear interpolation of tensors and :code:`end` module based on a scalar :code:`weight`.
+    The output is given by :code:`output = tensors + weight * (end(tensors) - tensors)`
+    """
     def __init__(self, end: Chainable, weight: float):
         defaults = dict(weight=weight)
         super().__init__(defaults, end=end)
@@ -132,7 +167,8 @@ class Lerp(BinaryOperation):
         torch._foreach_lerp_(update, end, weight=self.get_settings(var.params, 'weight'))
         return update
-class CopySign(BinaryOperation):
+class CopySign(BinaryOperationBase):
+    """Returns tensors with sign copied from :code:`other(tensors)`."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
@@ -140,7 +176,8 @@ class CopySign(BinaryOperation):
     def transform(self, var, update: list[torch.Tensor], other: list[torch.Tensor]):
         return [u.copysign_(o) for u, o in zip(update, other)]
-class RCopySign(BinaryOperation):
+class RCopySign(BinaryOperationBase):
+    """Returns :code:`other(tensors)` with sign copied from tensors."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
@@ -149,7 +186,11 @@ class RCopySign(BinaryOperation):
         return [o.copysign_(u) for u, o in zip(update, other)]
 CopyMagnitude = RCopySign
-class Clip(BinaryOperation):
+class Clip(BinaryOperationBase):
+    """clip tensors to be in  :code:`(min, max)` range. :code:`min` and :code:`max: can be None, numbers or modules.
+    If code:`min` and :code:`max`:  are modules, this calculates :code:`tensors.clip(min(tensors), max(tensors))`.
+    """
     def __init__(self, min: float | Chainable | None = None, max: float | Chainable | None = None):
         super().__init__({}, min=min, max=max)
@@ -157,8 +198,11 @@ class Clip(BinaryOperation):
     def transform(self, var, update: list[torch.Tensor], min: float | list[torch.Tensor] | None, max: float | list[torch.Tensor] | None):
         return TensorList(update).clamp_(min=min,  max=max)
-class MirroredClip(BinaryOperation):
-    """clip by -value, value"""
+class MirroredClip(BinaryOperationBase):
+    """clip tensors to be in  :code:`(-value, value)` range. :code:`value` can be a number or a module.
+    If :code:`value` is a module, this calculates :code:`tensors.clip(-value(tensors), value(tensors))`
+    """
     def __init__(self, value: float | Chainable):
         super().__init__({}, value=value)
@@ -167,8 +211,8 @@ class MirroredClip(BinaryOperation):
         min = -value if isinstance(value, (int,float)) else [-v for v in value]
         return TensorList(update).clamp_(min=min,  max=value)
-class Graft(BinaryOperation):
-    """use direction from update and magnitude from `magnitude` module"""
+class Graft(BinaryOperationBase):
+    """Outputs tensors rescaled to have the same norm as :code:`magnitude(tensors)`."""
     def __init__(self, magnitude: Chainable, tensorwise:bool=True, ord:float=2, eps:float = 1e-6):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps)
         super().__init__(defaults, magnitude=magnitude)
@@ -178,8 +222,8 @@ class Graft(BinaryOperation):
         tensorwise, ord, eps = itemgetter('tensorwise','ord','eps')(self.settings[var.params[0]])
         return TensorList(update).graft_(magnitude, tensorwise=tensorwise, ord=ord, eps=eps)
-class RGraft(BinaryOperation):
-    """use direction from `direction` module and magnitude from update"""
+class RGraft(BinaryOperationBase):
+    """Outputs :code:`magnitude(tensors)` rescaled to have the same norm as tensors"""
     def __init__(self, direction: Chainable, tensorwise:bool=True, ord:float=2, eps:float = 1e-6):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps)
@@ -192,7 +236,8 @@ class RGraft(BinaryOperation):
 GraftToUpdate = RGraft
-class Maximum(BinaryOperation):
+class Maximum(BinaryOperationBase):
+    """Outputs :code:`maximum(tensors, other(tensors))`"""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
@@ -201,7 +246,8 @@ class Maximum(BinaryOperation):
         torch._foreach_maximum_(update, other)
         return update
-class Minimum(BinaryOperation):
+class Minimum(BinaryOperationBase):
+    """Outputs :code:`minimum(tensors, other(tensors))`"""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
@@ -211,8 +257,8 @@ class Minimum(BinaryOperation):
         return update
-class GramSchimdt(BinaryOperation):
-    """makes update orthonormal to `other`"""
+class GramSchimdt(BinaryOperationBase):
+    """outputs tensors made orthogonal to `other(tensors)` via Gram-Schmidt."""
     def __init__(self, other: Chainable):
         super().__init__({}, other=other)
@@ -222,8 +268,8 @@ class GramSchimdt(BinaryOperation):
         return update - (other*update) / ((other*other) + 1e-8)
-class Threshold(BinaryOperation):
-    """update above/below threshold, value at and below"""
+class Threshold(BinaryOperationBase):
+    """Outputs tensors thresholded such that values above :code:`threshold` are set to :code:`value`."""
     def __init__(self, threshold: Chainable | float, value: Chainable | float, update_above: bool):
         defaults = dict(update_above=update_above)
         super().__init__(defaults, threshold=threshold, value=value)

torchzero/modules/ops/multi.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Sequence
 from operator import itemgetter
-from typing import Any
+from typing import Any, Literal
 import torch
@@ -11,7 +11,7 @@ from ...core import Chainable, Module, Target, Var, maybe_chain
 from ...utils import TensorList, tensorlist
-class MultiOperation(Module, ABC):
+class MultiOperationBase(Module, ABC):
     """Base class for operations that use operands. This is an abstract class, subclass it and override `transform` method to use it."""
     def __init__(self, defaults: dict[str, Any] | None, **operands: Chainable | Any):
         super().__init__(defaults=defaults)
@@ -51,7 +51,8 @@ class MultiOperation(Module, ABC):
-class SubModules(MultiOperation):
+class SubModules(MultiOperationBase):
+    """Calculates :code:`input - other`. :code:`input` and :code:`other` can be numbers or modules."""
     def __init__(self, input: Chainable | float, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, input=input, other=other)
@@ -68,10 +69,12 @@ class SubModules(MultiOperation):
         else: torch._foreach_sub_(input, other, alpha=alpha)
         return input
-class DivModules(MultiOperation):
-    def __init__(self, input: Chainable | float, other: Chainable | float):
+class DivModules(MultiOperationBase):
+    """Calculates :code:`input / other`. :code:`input` and :code:`other` can be numbers or modules."""
+    def __init__(self, input: Chainable | float, other: Chainable | float, other_first:bool=False):
         defaults = {}
-        super().__init__(defaults, input=input, other=other)
+        if other_first: super().__init__(defaults, other=other, input=input)
+        else: super().__init__(defaults, input=input, other=other)
     @torch.no_grad
     def transform(self, var: Var, input: float | list[torch.Tensor], other: float | list[torch.Tensor]) -> list[torch.Tensor]:
@@ -82,7 +85,9 @@ class DivModules(MultiOperation):
         torch._foreach_div_(input, other)
         return input
-class PowModules(MultiOperation):
+class PowModules(MultiOperationBase):
+    """Calculates :code:`input ** exponent`. :code:`input` and :code:`other` can be numbers or modules."""
     def __init__(self, input: Chainable | float, exponent: Chainable | float):
         defaults = {}
         super().__init__(defaults, input=input, exponent=exponent)
@@ -96,7 +101,11 @@ class PowModules(MultiOperation):
         torch._foreach_div_(input, exponent)
         return input
-class LerpModules(MultiOperation):
+class LerpModules(MultiOperationBase):
+    """Does a linear interpolation of :code:`input(tensors)` and :code:`end(tensors)` based on a scalar :code:`weight`.
+    The output is given by :code:`output = input(tensors) + weight * (end(tensors) - input(tensors))`
+    """
     def __init__(self, input: Chainable, end: Chainable, weight: float):
         defaults = dict(weight=weight)
         super().__init__(defaults, input=input, end=end)
@@ -106,7 +115,8 @@ class LerpModules(MultiOperation):
         torch._foreach_lerp_(input, end, weight=self.settings[var.params[0]]['weight'])
         return input
-class ClipModules(MultiOperation):
+class ClipModules(MultiOperationBase):
+    """Calculates :code:`input(tensors).clip(min, max)`. :code:`min` and :code:`max` can be numbers or modules."""
     def __init__(self, input: Chainable, min: float | Chainable | None = None, max: float | Chainable | None = None):
         defaults = {}
         super().__init__(defaults, input=input, min=min, max=max)
@@ -116,7 +126,34 @@ class ClipModules(MultiOperation):
         return TensorList(input).clamp_(min=min, max=max)
-class GraftModules(MultiOperation):
+class GraftModules(MultiOperationBase):
+    """Outputs :code:`direction` output rescaled to have the same norm as :code:`magnitude` output.
+    Args:
+        direction (Chainable): module to use the direction from
+        magnitude (Chainable): module to use the magnitude from
+        tensorwise (bool, optional): whether to calculate norm per-tensor or globally. Defaults to True.
+        ord (float, optional): norm order. Defaults to 2.
+        eps (float, optional): clips denominator to be no less than this value. Defaults to 1e-6.
+        strength (float, optional): strength of grafting. Defaults to 1.
+    Example:
+        Shampoo grafted to Adam
+        .. code-block:: python
+            opt = tz.Modular(
+                model.parameters(),
+                tz.m.GraftModules(
+                    direction = tz.m.Shampoo(),
+                    magnitude = tz.m.Adam(),
+                ),
+                tz.m.LR(1e-3)
+            )
+    Reference:
+        Agarwal, N., Anil, R., Hazan, E., Koren, T., & Zhang, C. (2020). Disentangling adaptive gradient methods from learning rates. arXiv preprint arXiv:2002.11803. https://arxiv.org/pdf/2002.11803
+    """
     def __init__(self, direction: Chainable, magnitude: Chainable, tensorwise:bool=True, ord:float=2, eps:float = 1e-6, strength:float=1):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps, strength=strength)
         super().__init__(defaults, direction=direction, magnitude=magnitude)
@@ -126,12 +163,36 @@ class GraftModules(MultiOperation):
         tensorwise, ord, eps, strength = itemgetter('tensorwise','ord','eps', 'strength')(self.settings[var.params[0]])
         return TensorList(direction).graft_(magnitude, tensorwise=tensorwise, ord=ord, eps=eps, strength=strength)
-class Where(MultiOperation):
-    def __init__(self, condition: Chainable, input: Chainable | float, other: Chainable | float):
-        super().__init__({}, condition=condition, input=input, other=other)
+class MultiplyByModuleNorm(MultiOperationBase):
+    """Outputs :code:`input` multiplied by norm of the :code:`norm` output."""
+    def __init__(self, input: Chainable, norm: Chainable, tensorwise:bool=True, ord:float|Literal['mean_abs']=2):
+        defaults = dict(tensorwise=tensorwise, ord=ord)
+        super().__init__(defaults, input=input, norm=norm)
     @torch.no_grad
-    def transform(self, var, condition: list[torch.Tensor], input: list[torch.Tensor] | float, other: list[torch.Tensor] | float):
-        return tensorlist.where(TensorList(condition).as_bool(), input, other)
+    def transform(self, var, input: list[torch.Tensor], norm:list[torch.Tensor]):
+        tensorwise, ord = itemgetter('tensorwise','ord')(self.settings[var.params[0]])
+        if tensorwise:
+            if ord == 'mean_abs': n = [t.mean() for t in torch._foreach_abs(norm)]
+            else: n = torch._foreach_norm(norm, ord)
+        else: n = TensorList(norm).global_vector_norm(ord)
+        torch._foreach_mul_(input, n)
+        return input
+class DivideByModuleNorm(MultiOperationBase):
+    """Outputs :code:`input` divided by norm of the :code:`norm` output."""
+    def __init__(self, input: Chainable, norm: Chainable, tensorwise:bool=True, ord:float|Literal['mean_abs']=2):
+        defaults = dict(tensorwise=tensorwise, ord=ord)
+        super().__init__(defaults, input=input, norm=norm)
+    @torch.no_grad
+    def transform(self, var, input: list[torch.Tensor], norm:list[torch.Tensor]):
+        tensorwise, ord = itemgetter('tensorwise','ord')(self.settings[var.params[0]])
+        if tensorwise:
+            if ord == 'mean_abs': n = [t.mean().clip(min=1e-8) for t in torch._foreach_abs(norm)]
+            else: n = torch._foreach_clamp_min(torch._foreach_norm(norm, ord), 1e-8)
+        else: n = TensorList(norm).global_vector_norm(ord).clip(min=1e-8)
+        torch._foreach_div_(input, n)
+        return input

torchzero/modules/ops/reduce.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from ...core import Chainable, Module, Target, Var, maybe_chain
-class ReduceOperation(Module, ABC):
+class ReduceOperationBase(Module, ABC):
     """Base class for reduction operations like Sum, Prod, Maximum. This is an abstract class, subclass it and override `transform` method to use it."""
     def __init__(self, defaults: dict[str, Any] | None, *operands: Chainable | Any):
         super().__init__(defaults=defaults)
@@ -46,7 +46,8 @@ class ReduceOperation(Module, ABC):
         var.update = transformed
         return var
-class Sum(ReduceOperation):
+class Sum(ReduceOperationBase):
+    """Outputs sum of :code:`inputs` that can be modules or numbers."""
     USE_MEAN = False
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
@@ -63,12 +64,14 @@ class Sum(ReduceOperation):
         return sum
 class Mean(Sum):
+    """Outputs a mean of :code:`inputs` that can be modules or numbers."""
     USE_MEAN = True
-class WeightedSum(ReduceOperation):
+class WeightedSum(ReduceOperationBase):
     USE_MEAN = False
     def __init__(self, *inputs: Chainable | float, weights: Iterable[float]):
+        """Outputs a weighted sum of :code:`inputs` that can be modules or numbers."""
         weights = list(weights)
         if len(inputs) != len(weights):
             raise ValueError(f'Number of inputs {len(inputs)} must match number of weights {len(weights)}')
@@ -91,9 +94,11 @@ class WeightedSum(ReduceOperation):
 class WeightedMean(WeightedSum):
+    """Outputs weighted mean of :code:`inputs` that can be modules or numbers."""
     USE_MEAN = True
-class Median(ReduceOperation):
+class Median(ReduceOperationBase):
+    """Outputs median of :code:`inputs` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
@@ -106,7 +111,8 @@ class Median(ReduceOperation):
             res.append(torch.median(torch.stack(tensors + tuple(torch.full_like(tensors[0], f) for f in floats)), dim=0))
         return res
-class Prod(ReduceOperation):
+class Prod(ReduceOperationBase):
+    """Outputs product of :code:`inputs` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
@@ -120,7 +126,8 @@ class Prod(ReduceOperation):
         return prod
-class MaximumModules(ReduceOperation):
+class MaximumModules(ReduceOperationBase):
+    """Outputs elementwise maximum of :code:`inputs` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
@@ -134,7 +141,8 @@ class MaximumModules(ReduceOperation):
         return maximum
-class MinimumModules(ReduceOperation):
+class MinimumModules(ReduceOperationBase):
+    """Outputs elementwise minimum of :code:`inputs` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)

torchzero/modules/ops/unary.py CHANGED Viewed

@@ -6,76 +6,92 @@ from ...core import TensorwiseTransform, Target, Transform
 from ...utils import TensorList, unpack_dicts,unpack_states
 class UnaryLambda(Transform):
+    """Applies :code:`fn` to input tensors.
+    :code:`fn` must accept and return a list of tensors.
+    """
     def __init__(self, fn, target: "Target" = 'update'):
         defaults = dict(fn=fn)
         super().__init__(defaults=defaults, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         return settings[0]['fn'](tensors)
 class UnaryParameterwiseLambda(TensorwiseTransform):
+    """Applies :code:`fn` to each input tensor.
+    :code:`fn` must accept and return a tensor.
+    """
     def __init__(self, fn, target: "Target" = 'update'):
         defaults = dict(fn=fn)
         super().__init__(uses_grad=False, defaults=defaults, target=target)
     @torch.no_grad
-    def apply_tensor(self, tensor, param, grad, loss, state, settings):
-        return settings['fn'](tensor)
+    def apply_tensor(self, tensor, param, grad, loss, state, setting):
+        return setting['fn'](tensor)
 class CustomUnaryOperation(Transform):
+    """Applies :code:`getattr(tensor, name)` to each tensor
+    """
     def __init__(self, name: str, target: "Target" = 'update'):
         defaults = dict(name=name)
         super().__init__(defaults=defaults, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         return getattr(tensors, settings[0]['name'])()
 class Abs(Transform):
+    """Returns :code:`abs(input)`"""
     def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         torch._foreach_abs_(tensors)
         return tensors
 class Sign(Transform):
+    """Returns :code:`sign(input)`"""
     def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         torch._foreach_sign_(tensors)
         return tensors
 class Exp(Transform):
+    """Returns :code:`exp(input)`"""
     def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         torch._foreach_exp_(tensors)
         return tensors
 class Sqrt(Transform):
+    """Returns :code:`sqrt(input)`"""
     def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         torch._foreach_sqrt_(tensors)
         return tensors
 class Reciprocal(Transform):
+    """Returns :code:`1 / input`"""
     def __init__(self, eps = 0, target: "Target" = 'update'):
         defaults = dict(eps = eps)
         super().__init__(defaults, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         eps = [s['eps'] for s in settings]
         if any(e != 0 for e in eps): torch._foreach_add_(tensors, eps)
         torch._foreach_reciprocal_(tensors)
         return tensors
 class Negate(Transform):
+    """Returns :code:`- input`"""
     def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         torch._foreach_neg_(tensors)
         return tensors
@@ -97,18 +113,18 @@ class NanToNum(Transform):
         super().__init__(defaults, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         nan, posinf, neginf = unpack_dicts(settings, 'nan', 'posinf', 'neginf')
         return [t.nan_to_num_(nan_i, posinf_i, neginf_i) for t, nan_i, posinf_i, neginf_i in zip(tensors, nan, posinf, neginf)]
 class Rescale(Transform):
-    """rescale update to (min, max) range"""
+    """Rescales input to :code`(min, max)` range"""
     def __init__(self, min: float, max: float, tensorwise: bool = False, eps:float=1e-8, target: "Target" = 'update'):
         defaults = dict(min=min, max=max, eps=eps, tensorwise=tensorwise)
         super().__init__(defaults, uses_grad=False, target=target)
     @torch.no_grad
-    def apply(self, tensors, params, grads, loss, states, settings):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         min, max = unpack_dicts(settings, 'min','max')
         tensorwise = settings[0]['tensorwise']
         dim = None if tensorwise else 'global'

torchzero 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

torchzero 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl