PyPI - torchzero - Versions diffs - 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +47 -36
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +8 -2
torchzero/core/chain.py +47 -0
torchzero/core/functional.py +103 -0
torchzero/core/modular.py +233 -0
torchzero/core/module.py +132 -643
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +56 -23
torchzero/core/transform.py +261 -365
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +132 -34
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +0 -1
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +3 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/{higher_order → experimental}/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +20 -17
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +5 -5
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +8 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/__init__.py +1 -1
torchzero/modules/line_search/_polyinterp.py +3 -1
torchzero/modules/line_search/adaptive.py +3 -3
torchzero/modules/line_search/backtracking.py +3 -3
torchzero/modules/line_search/interpolation.py +160 -0
torchzero/modules/line_search/line_search.py +42 -51
torchzero/modules/line_search/strong_wolfe.py +5 -5
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +63 -61
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/__init__.py +2 -0
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +25 -16
torchzero/modules/quasi_newton/sg2.py +292 -0
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +6 -3
torchzero/modules/second_order/ifn.py +58 -0
torchzero/modules/second_order/inm.py +101 -0
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +105 -228
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +158 -178
torchzero/modules/second_order/rsn.py +237 -0
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +57 -50
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +112 -88
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/modules/experimental/momentum.py +0 -160
torchzero/modules/higher_order/__init__.py +0 -1
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.14.dist-info/RECORD +0 -167
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/modules/ops/multi.py CHANGED Viewed

@@ -7,8 +7,8 @@ from typing import Any, Literal
 import torch
-from ...core import Chainable, Module, Target, Var, maybe_chain
-from ...utils import TensorList, tensorlist, Metrics
+from ...core import Chainable, Module,  Objective
+from ...utils import TensorList, Metrics
 class MultiOperationBase(Module, ABC):
@@ -29,36 +29,39 @@ class MultiOperationBase(Module, ABC):
             raise ValueError('At least one operand must be a module')
     @abstractmethod
-    def transform(self, var: Var, **operands: Any | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, **operands: Any | list[torch.Tensor]) -> list[torch.Tensor]:
         """applies the operation to operands"""
         raise NotImplementedError
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var: Var) -> Var:
+    def step(self, objective: Objective) -> Objective:
         # pass cloned update to all module operands
         processed_operands: dict[str, Any | list[torch.Tensor]] = self.operands.copy()
         for k,v in self.operands.items():
             if k in self.children:
                 v: Module
-                updated_var = v.step(var.clone(clone_update=True))
-                processed_operands[k] = updated_var.get_update()
-                var.update_attrs_from_clone_(updated_var) # update loss, grad, etc if this module calculated them
+                updated_obj = v.step(objective.clone(clone_updates=True))
+                processed_operands[k] = updated_obj.get_updates()
+                objective.update_attrs_from_clone_(updated_obj) # update loss, grad, etc if this module calculated them
-        transformed = self.transform(var, **processed_operands)
-        var.update = transformed
-        return var
+        transformed = self.transform(objective, **processed_operands)
+        objective.updates = transformed
+        return objective
 class SubModules(MultiOperationBase):
-    """Calculates :code:`input - other`. :code:`input` and :code:`other` can be numbers or modules."""
+    """Calculates ``input - other``. ``input`` and ``other`` can be numbers or modules."""
     def __init__(self, input: Chainable | float, other: Chainable | float, alpha: float = 1):
         defaults = dict(alpha=alpha)
         super().__init__(defaults, input=input, other=other)
     @torch.no_grad
-    def transform(self, var: Var, input: float | list[torch.Tensor], other: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, input: float | list[torch.Tensor], other: float | list[torch.Tensor]) -> list[torch.Tensor]:
         alpha = self.defaults['alpha']
         if isinstance(input, (int,float)):
@@ -70,14 +73,14 @@ class SubModules(MultiOperationBase):
         return input
 class DivModules(MultiOperationBase):
-    """Calculates :code:`input / other`. :code:`input` and :code:`other` can be numbers or modules."""
+    """Calculates ``input / other``. ``input`` and ``other`` can be numbers or modules."""
     def __init__(self, input: Chainable | float, other: Chainable | float, other_first:bool=False):
         defaults = {}
         if other_first: super().__init__(defaults, other=other, input=input)
         else: super().__init__(defaults, input=input, other=other)
     @torch.no_grad
-    def transform(self, var: Var, input: float | list[torch.Tensor], other: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, input: float | list[torch.Tensor], other: float | list[torch.Tensor]) -> list[torch.Tensor]:
         if isinstance(input, (int,float)):
             assert isinstance(other, list)
             return input / TensorList(other)
@@ -87,13 +90,13 @@ class DivModules(MultiOperationBase):
 class PowModules(MultiOperationBase):
-    """Calculates :code:`input ** exponent`. :code:`input` and :code:`other` can be numbers or modules."""
+    """Calculates ``input ** exponent``. ``input`` and ``other`` can be numbers or modules."""
     def __init__(self, input: Chainable | float, exponent: Chainable | float):
         defaults = {}
         super().__init__(defaults, input=input, exponent=exponent)
     @torch.no_grad
-    def transform(self, var: Var, input: float | list[torch.Tensor], exponent: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, input: float | list[torch.Tensor], exponent: float | list[torch.Tensor]) -> list[torch.Tensor]:
         if isinstance(input, (int,float)):
             assert isinstance(exponent, list)
             return input ** TensorList(exponent)
@@ -102,32 +105,32 @@ class PowModules(MultiOperationBase):
         return input
 class LerpModules(MultiOperationBase):
-    """Does a linear interpolation of :code:`input(tensors)` and :code:`end(tensors)` based on a scalar :code:`weight`.
+    """Does a linear interpolation of ``input(tensors)`` and ``end(tensors)`` based on a scalar ``weight``.
-    The output is given by :code:`output = input(tensors) + weight * (end(tensors) - input(tensors))`
+    The output is given by ``output = input(tensors) + weight * (end(tensors) - input(tensors))``
     """
     def __init__(self, input: Chainable, end: Chainable, weight: float):
         defaults = dict(weight=weight)
         super().__init__(defaults, input=input, end=end)
     @torch.no_grad
-    def transform(self, var: Var, input: list[torch.Tensor], end: list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, input: list[torch.Tensor], end: list[torch.Tensor]) -> list[torch.Tensor]:
         torch._foreach_lerp_(input, end, weight=self.defaults['weight'])
         return input
 class ClipModules(MultiOperationBase):
-    """Calculates :code:`input(tensors).clip(min, max)`. :code:`min` and :code:`max` can be numbers or modules."""
+    """Calculates ``input(tensors).clip(min, max)``. ``min`` and ``max`` can be numbers or modules."""
     def __init__(self, input: Chainable, min: float | Chainable | None = None, max: float | Chainable | None = None):
         defaults = {}
         super().__init__(defaults, input=input, min=min, max=max)
     @torch.no_grad
-    def transform(self, var: Var, input: list[torch.Tensor], min: float | list[torch.Tensor], max: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, input: list[torch.Tensor], min: float | list[torch.Tensor], max: float | list[torch.Tensor]) -> list[torch.Tensor]:
         return TensorList(input).clamp_(min=min, max=max)
-class GraftModules(MultiOperationBase):
-    """Outputs :code:`direction` output rescaled to have the same norm as :code:`magnitude` output.
+class Graft(MultiOperationBase):
+    """Outputs ``direction`` output rescaled to have the same norm as ``magnitude`` output.
     Args:
         direction (Chainable): module to use the direction from
@@ -137,40 +140,40 @@ class GraftModules(MultiOperationBase):
         eps (float, optional): clips denominator to be no less than this value. Defaults to 1e-6.
         strength (float, optional): strength of grafting. Defaults to 1.
-    Example:
-        Shampoo grafted to Adam
-        .. code-block:: python
+    ### Example:
-            opt = tz.Modular(
-                model.parameters(),
-                tz.m.GraftModules(
-                    direction = tz.m.Shampoo(),
-                    magnitude = tz.m.Adam(),
-                ),
-                tz.m.LR(1e-3)
-            )
+    Shampoo grafted to Adam
+    ```python
+    opt = tz.Modular(
+        model.parameters(),
+        tz.m.GraftModules(
+            direction = tz.m.Shampoo(),
+            magnitude = tz.m.Adam(),
+        ),
+        tz.m.LR(1e-3)
+    )
+    ```
     Reference:
-        Agarwal, N., Anil, R., Hazan, E., Koren, T., & Zhang, C. (2020). Disentangling adaptive gradient methods from learning rates. arXiv preprint arXiv:2002.11803. https://arxiv.org/pdf/2002.11803
+        [Agarwal, N., Anil, R., Hazan, E., Koren, T., & Zhang, C. (2020). Disentangling adaptive gradient methods from learning rates. arXiv preprint arXiv:2002.11803.](https://arxiv.org/pdf/2002.11803)
     """
     def __init__(self, direction: Chainable, magnitude: Chainable, tensorwise:bool=True, ord:Metrics=2, eps:float = 1e-6, strength:float=1):
         defaults = dict(tensorwise=tensorwise, ord=ord, eps=eps, strength=strength)
         super().__init__(defaults, direction=direction, magnitude=magnitude)
     @torch.no_grad
-    def transform(self, var, magnitude: list[torch.Tensor], direction:list[torch.Tensor]):
+    def transform(self, objective, magnitude: list[torch.Tensor], direction:list[torch.Tensor]):
         tensorwise, ord, eps, strength = itemgetter('tensorwise','ord','eps', 'strength')(self.defaults)
         return TensorList(direction).graft_(magnitude, tensorwise=tensorwise, ord=ord, eps=eps, strength=strength)
 class MultiplyByModuleNorm(MultiOperationBase):
-    """Outputs :code:`input` multiplied by norm of the :code:`norm` output."""
+    """Outputs ``input`` multiplied by norm of the ``norm`` output."""
     def __init__(self, input: Chainable, norm: Chainable, tensorwise:bool=True, ord:Metrics=2):
         defaults = dict(tensorwise=tensorwise, ord=ord)
         super().__init__(defaults, input=input, norm=norm)
     @torch.no_grad
-    def transform(self, var, input: list[torch.Tensor], norm:list[torch.Tensor]):
+    def transform(self, objective, input: list[torch.Tensor], norm:list[torch.Tensor]):
         tensorwise, ord = itemgetter('tensorwise','ord')(self.defaults)
         if tensorwise:
             n = TensorList(norm).metric(ord)
@@ -181,13 +184,13 @@ class MultiplyByModuleNorm(MultiOperationBase):
         return input
 class DivideByModuleNorm(MultiOperationBase):
-    """Outputs :code:`input` divided by norm of the :code:`norm` output."""
+    """Outputs ``input`` divided by norm of the ``norm`` output."""
     def __init__(self, input: Chainable, norm: Chainable, tensorwise:bool=True, ord:Metrics=2):
         defaults = dict(tensorwise=tensorwise, ord=ord)
         super().__init__(defaults, input=input, norm=norm)
     @torch.no_grad
-    def transform(self, var, input: list[torch.Tensor], norm:list[torch.Tensor]):
+    def transform(self, objective, input: list[torch.Tensor], norm:list[torch.Tensor]):
         tensorwise, ord = itemgetter('tensorwise','ord')(self.defaults)
         if tensorwise:
             n = TensorList(norm).metric(ord)

torchzero/modules/ops/reduce.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, cast
 import torch
-from ...core import Chainable, Module, Target, Var, maybe_chain
+from ...core import Chainable, Module,  Objective, maybe_chain
 class ReduceOperationBase(Module, ABC):
@@ -26,34 +26,37 @@ class ReduceOperationBase(Module, ABC):
             raise ValueError('At least one operand must be a module')
     @abstractmethod
-    def transform(self, var: Var, *operands: Any | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *operands: Any | list[torch.Tensor]) -> list[torch.Tensor]:
         """applies the operation to operands"""
         raise NotImplementedError
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var: Var) -> Var:
+    def step(self, objective: Objective) -> Objective:
         # pass cloned update to all module operands
         processed_operands: list[Any | list[torch.Tensor]] = self.operands.copy()
         for i, v in enumerate(self.operands):
             if f'operand_{i}' in self.children:
                 v: Module
-                updated_var = v.step(var.clone(clone_update=True))
-                processed_operands[i] = updated_var.get_update()
-                var.update_attrs_from_clone_(updated_var) # update loss, grad, etc if this module calculated them
+                updated_obj = v.step(objective.clone(clone_updates=True))
+                processed_operands[i] = updated_obj.get_updates()
+                objective.update_attrs_from_clone_(updated_obj) # update loss, grad, etc if this module calculated them
-        transformed = self.transform(var, *processed_operands)
-        var.update = transformed
-        return var
+        transformed = self.transform(objective, *processed_operands)
+        objective.updates = transformed
+        return objective
 class Sum(ReduceOperationBase):
-    """Outputs sum of :code:`inputs` that can be modules or numbers."""
+    """Outputs sum of ``inputs`` that can be modules or numbers."""
     USE_MEAN = False
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         sorted_inputs = sorted(inputs, key=lambda x: isinstance(x, float))
         sum = cast(list, sorted_inputs[0])
         if len(sorted_inputs) > 1:
@@ -64,14 +67,14 @@ class Sum(ReduceOperationBase):
         return sum
 class Mean(Sum):
-    """Outputs a mean of :code:`inputs` that can be modules or numbers."""
+    """Outputs a mean of ``inputs`` that can be modules or numbers."""
     USE_MEAN = True
 class WeightedSum(ReduceOperationBase):
+    """Outputs a weighted sum of ``inputs`` that can be modules or numbers."""
     USE_MEAN = False
     def __init__(self, *inputs: Chainable | float, weights: Iterable[float]):
-        """Outputs a weighted sum of :code:`inputs` that can be modules or numbers."""
         weights = list(weights)
         if len(inputs) != len(weights):
             raise ValueError(f'Number of inputs {len(inputs)} must match number of weights {len(weights)}')
@@ -79,7 +82,7 @@ class WeightedSum(ReduceOperationBase):
         super().__init__(defaults=defaults, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         sorted_inputs = sorted(inputs, key=lambda x: isinstance(x, float))
         weights = self.defaults['weights']
         sum = cast(list, sorted_inputs[0])
@@ -94,16 +97,16 @@ class WeightedSum(ReduceOperationBase):
 class WeightedMean(WeightedSum):
-    """Outputs weighted mean of :code:`inputs` that can be modules or numbers."""
+    """Outputs weighted mean of ``inputs`` that can be modules or numbers."""
     USE_MEAN = True
 class Median(ReduceOperationBase):
-    """Outputs median of :code:`inputs` that can be modules or numbers."""
+    """Outputs median of ``inputs`` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         res = []
         lists = [i for i in inputs if isinstance(i, list)]
         floats = [i for i in inputs if isinstance(i, (int,float))]
@@ -112,12 +115,12 @@ class Median(ReduceOperationBase):
         return res
 class Prod(ReduceOperationBase):
-    """Outputs product of :code:`inputs` that can be modules or numbers."""
+    """Outputs product of ``inputs`` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         sorted_inputs = sorted(inputs, key=lambda x: isinstance(x, float))
         prod = cast(list, sorted_inputs[0])
         if len(sorted_inputs) > 1:
@@ -127,12 +130,12 @@ class Prod(ReduceOperationBase):
         return prod
 class MaximumModules(ReduceOperationBase):
-    """Outputs elementwise maximum of :code:`inputs` that can be modules or numbers."""
+    """Outputs elementwise maximum of ``inputs`` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         sorted_inputs = sorted(inputs, key=lambda x: isinstance(x, float))
         maximum = cast(list, sorted_inputs[0])
         if len(sorted_inputs) > 1:
@@ -142,12 +145,12 @@ class MaximumModules(ReduceOperationBase):
         return maximum
 class MinimumModules(ReduceOperationBase):
-    """Outputs elementwise minimum of :code:`inputs` that can be modules or numbers."""
+    """Outputs elementwise minimum of ``inputs`` that can be modules or numbers."""
     def __init__(self, *inputs: Chainable | float):
         super().__init__({}, *inputs)
     @torch.no_grad
-    def transform(self, var: Var, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
+    def transform(self, objective: Objective, *inputs: float | list[torch.Tensor]) -> list[torch.Tensor]:
         sorted_inputs = sorted(inputs, key=lambda x: isinstance(x, float))
         minimum = cast(list, sorted_inputs[0])
         if len(sorted_inputs) > 1:

torchzero/modules/ops/unary.py CHANGED Viewed

@@ -2,102 +2,102 @@ from collections import deque
 import torch
-from ...core import TensorwiseTransform, Target, Transform
+from ...core import TensorTransform
 from ...utils import TensorList, unpack_dicts,unpack_states
-class UnaryLambda(Transform):
-    """Applies :code:`fn` to input tensors.
+class UnaryLambda(TensorTransform):
+    """Applies ``fn`` to input tensors.
-    :code:`fn` must accept and return a list of tensors.
+    ``fn`` must accept and return a list of tensors.
     """
-    def __init__(self, fn, target: "Target" = 'update'):
+    def __init__(self, fn):
         defaults = dict(fn=fn)
-        super().__init__(defaults=defaults, uses_grad=False, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         return settings[0]['fn'](tensors)
-class UnaryParameterwiseLambda(TensorwiseTransform):
-    """Applies :code:`fn` to each input tensor.
+class UnaryParameterwiseLambda(TensorTransform):
+    """Applies ``fn`` to each input tensor.
-    :code:`fn` must accept and return a tensor.
+    ``fn`` must accept and return a tensor.
     """
-    def __init__(self, fn, target: "Target" = 'update'):
+    def __init__(self, fn):
         defaults = dict(fn=fn)
-        super().__init__(uses_grad=False, defaults=defaults, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensor(self, tensor, param, grad, loss, state, setting):
+    def single_tensor_apply(self, tensor, param, grad, loss, state, setting):
         return setting['fn'](tensor)
-class CustomUnaryOperation(Transform):
-    """Applies :code:`getattr(tensor, name)` to each tensor
+class CustomUnaryOperation(TensorTransform):
+    """Applies ``getattr(tensor, name)`` to each tensor
     """
-    def __init__(self, name: str, target: "Target" = 'update'):
+    def __init__(self, name: str):
         defaults = dict(name=name)
-        super().__init__(defaults=defaults, uses_grad=False, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         return getattr(tensors, settings[0]['name'])()
-class Abs(Transform):
-    """Returns :code:`abs(input)`"""
-    def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
+class Abs(TensorTransform):
+    """Returns ``abs(input)``"""
+    def __init__(self): super().__init__()
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         torch._foreach_abs_(tensors)
         return tensors
-class Sign(Transform):
-    """Returns :code:`sign(input)`"""
-    def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
+class Sign(TensorTransform):
+    """Returns ``sign(input)``"""
+    def __init__(self): super().__init__()
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         torch._foreach_sign_(tensors)
         return tensors
-class Exp(Transform):
-    """Returns :code:`exp(input)`"""
-    def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
+class Exp(TensorTransform):
+    """Returns ``exp(input)``"""
+    def __init__(self): super().__init__()
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         torch._foreach_exp_(tensors)
         return tensors
-class Sqrt(Transform):
-    """Returns :code:`sqrt(input)`"""
-    def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
+class Sqrt(TensorTransform):
+    """Returns ``sqrt(input)``"""
+    def __init__(self): super().__init__()
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         torch._foreach_sqrt_(tensors)
         return tensors
-class Reciprocal(Transform):
-    """Returns :code:`1 / input`"""
-    def __init__(self, eps = 0, target: "Target" = 'update'):
+class Reciprocal(TensorTransform):
+    """Returns ``1 / input``"""
+    def __init__(self, eps = 0):
         defaults = dict(eps = eps)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         eps = [s['eps'] for s in settings]
         if any(e != 0 for e in eps): torch._foreach_add_(tensors, eps)
         torch._foreach_reciprocal_(tensors)
         return tensors
-class Negate(Transform):
-    """Returns :code:`- input`"""
-    def __init__(self, target: "Target" = 'update'): super().__init__({}, uses_grad=False, target=target)
+class Negate(TensorTransform):
+    """Returns ``- input``"""
+    def __init__(self): super().__init__()
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         torch._foreach_neg_(tensors)
         return tensors
-class NanToNum(Transform):
-    """Convert `nan`, `inf` and `-inf` to numbers.
+class NanToNum(TensorTransform):
+    """Convert ``nan``, ``inf`` and `-`inf`` to numbers.
     Args:
         nan (optional): the value to replace NaNs with. Default is zero.
@@ -108,23 +108,23 @@ class NanToNum(Transform):
             If None, negative infinity values are replaced with the lowest finite value
             representable by input's dtype. Default is None.
     """
-    def __init__(self, nan=None, posinf=None, neginf=None, target: "Target" = 'update'):
+    def __init__(self, nan=None, posinf=None, neginf=None):
         defaults = dict(nan=nan, posinf=posinf, neginf=neginf)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         nan, posinf, neginf = unpack_dicts(settings, 'nan', 'posinf', 'neginf')
         return [t.nan_to_num_(nan_i, posinf_i, neginf_i) for t, nan_i, posinf_i, neginf_i in zip(tensors, nan, posinf, neginf)]
-class Rescale(Transform):
-    """Rescales input to :code`(min, max)` range"""
-    def __init__(self, min: float, max: float, tensorwise: bool = False, eps:float=1e-8, target: "Target" = 'update'):
+class Rescale(TensorTransform):
+    """Rescales input to ``(min, max)`` range"""
+    def __init__(self, min: float, max: float, tensorwise: bool = False, eps:float=1e-8):
         defaults = dict(min=min, max=max, eps=eps, tensorwise=tensorwise)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         min, max = unpack_dicts(settings, 'min','max')
         tensorwise = settings[0]['tensorwise']
         dim = None if tensorwise else 'global'

torchzero 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl