PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

tests/test_identical.py +22 -22
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +225 -214
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +2 -2
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +53 -57
torchzero/core/module.py +132 -52
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +11 -0
torchzero/linalg/eigh.py +253 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +93 -0
torchzero/{utils/linalg → linalg}/qr.py +16 -2
torchzero/{utils/linalg → linalg}/solve.py +74 -88
torchzero/linalg/svd.py +47 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +167 -217
torchzero/modules/adaptive/adahessian.py +76 -105
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +50 -31
torchzero/modules/adaptive/adaptive_heavyball.py +12 -7
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +7 -11
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +7 -7
torchzero/modules/adaptive/matrix_momentum.py +48 -52
torchzero/modules/adaptive/msam.py +71 -53
torchzero/modules/adaptive/muon.py +67 -129
torchzero/modules/adaptive/natural_gradient.py +63 -41
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +149 -130
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +22 -25
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +27 -38
torchzero/modules/experimental/__init__.py +7 -6
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +23 -54
torchzero/modules/experimental/newtonnewton.py +45 -48
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +24 -21
torchzero/modules/least_squares/gn.py +121 -50
torchzero/modules/line_search/backtracking.py +4 -4
torchzero/modules/line_search/line_search.py +33 -33
torchzero/modules/line_search/strong_wolfe.py +4 -4
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +11 -79
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +121 -123
torchzero/modules/misc/multistep.py +52 -53
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +31 -29
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +37 -31
torchzero/modules/momentum/momentum.py +12 -12
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +20 -20
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/{functional.py → opt_utils.py} +1 -1
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +46 -43
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +2 -2
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +10 -10
torchzero/modules/quasi_newton/lsr1.py +10 -10
torchzero/modules/quasi_newton/quasi_newton.py +54 -39
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +39 -37
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +57 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +165 -196
torchzero/modules/second_order/newton_cg.py +105 -157
torchzero/modules/second_order/nystrom.py +216 -185
torchzero/modules/second_order/rsn.py +132 -125
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +10 -10
torchzero/modules/step_size/adaptive.py +24 -24
torchzero/modules/step_size/lr.py +17 -17
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +3 -3
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +2 -2
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +23 -21
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +17 -18
torchzero/modules/wrappers/optim_wrapper.py +14 -14
torchzero/modules/zeroth_order/cd.py +10 -7
torchzero/optim/mbs.py +291 -0
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -13
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +8 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +97 -73
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/adaptive/lmadagrad.py +0 -186
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

torchzero/modules/momentum/averaging.py CHANGED Viewed

@@ -1,27 +1,27 @@
 """Modules that perform averaging over a history of past updates."""
 from collections import deque
 from collections.abc import Sequence
-from typing import Any, Literal, cast
+from typing import Any
 import torch
-from ...core import TensorwiseTransform, Target
+from ...core import TensorTransform
 from ...utils import tolist
-class Averaging(TensorwiseTransform):
+class Averaging(TensorTransform):
     """Average of past ``history_size`` updates.
     Args:
         history_size (int): Number of past updates to average
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, history_size: int, target: Target = 'update'):
+    def __init__(self, history_size: int):
         defaults = dict(history_size=history_size)
-        super().__init__(uses_grad=False, defaults=defaults, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensor(self, tensor, param, grad, loss, state, setting):
+    def single_tensor_apply(self, tensor, param, grad, loss, state, setting):
         history_size = setting['history_size']
         if 'history' not in state:
             state['history'] = deque(maxlen=history_size)
@@ -34,19 +34,19 @@ class Averaging(TensorwiseTransform):
         return average / len(history)
-class WeightedAveraging(TensorwiseTransform):
+class WeightedAveraging(TensorTransform):
     """Weighted average of past ``len(weights)`` updates.
     Args:
         weights (Sequence[float]): a sequence of weights from oldest to newest.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, weights: Sequence[float] | torch.Tensor | Any, target: Target = 'update'):
+    def __init__(self, weights: Sequence[float] | torch.Tensor | Any):
         defaults = dict(weights = tolist(weights))
-        super().__init__(uses_grad=False, defaults=defaults, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensor(self, tensor, param, grad, loss, state, setting):
+    def single_tensor_apply(self, tensor, param, grad, loss, state, setting):
         weights = setting['weights']
         if 'history' not in state:
@@ -68,19 +68,19 @@ class WeightedAveraging(TensorwiseTransform):
         return average
-class MedianAveraging(TensorwiseTransform):
+class MedianAveraging(TensorTransform):
     """Median of past ``history_size`` updates.
     Args:
         history_size (int): Number of past updates to average
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, history_size: int, target: Target = 'update'):
+    def __init__(self, history_size: int,):
         defaults = dict(history_size = history_size)
-        super().__init__(uses_grad=False, defaults=defaults, target=target)
+        super().__init__(defaults=defaults)
     @torch.no_grad
-    def apply_tensor(self, tensor, param, grad, loss, state, setting):
+    def single_tensor_apply(self, tensor, param, grad, loss, state, setting):
         history_size = setting['history_size']
         if 'history' not in state:

torchzero/modules/momentum/cautious.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Literal
 import torch
-from ...core import Target, Transform, Module, Chainable
+from ...core import  TensorTransform, Module, Chainable
 from ...utils import NumberList, TensorList, unpack_dicts
@@ -36,7 +36,7 @@ def cautious_(
     tensors_ -= tensors_.mul(2).mul_(mask.logical_not_())
     return tensors_
-class Cautious(Transform):
+class Cautious(TensorTransform):
     """Negates update for parameters where update and gradient sign is inconsistent.
     Optionally normalizes the update by the number of parameters that are not masked.
     This is meant to be used after any momentum-based modules.
@@ -57,7 +57,7 @@ class Cautious(Transform):
     Cautious Adam
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         bench.parameters(),
         tz.m.Adam(),
         tz.m.Cautious(),
@@ -79,12 +79,12 @@ class Cautious(Transform):
         super().__init__(defaults, uses_grad=True)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         assert grads is not None
         mode, normalize, eps = itemgetter('mode', 'normalize', 'eps')(settings[0])
         return cautious_(TensorList(tensors), TensorList(grads), normalize=normalize, eps=eps, mode=mode)
-class UpdateGradientSignConsistency(Transform):
+class UpdateGradientSignConsistency(TensorTransform):
     """Compares update and gradient signs. Output will have 1s where signs match, and 0s where they don't.
     Args:
@@ -98,7 +98,7 @@ class UpdateGradientSignConsistency(Transform):
         super().__init__(defaults, uses_grad=True)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         assert grads is not None
         normalize, eps = itemgetter('normalize', 'eps')(settings[0])
@@ -108,7 +108,7 @@ class UpdateGradientSignConsistency(Transform):
         return mask
 class IntermoduleCautious(Module):
-    """Negaties update on :code:`main` module where it's sign doesn't match with output of :code:`compare` module.
+    """Negaties update on :code:`main` module where it's sign doesn't match with output of ``compare`` module.
     Args:
         main (Chainable): main module or sequence of modules whose update will be cautioned.
@@ -137,29 +137,32 @@ class IntermoduleCautious(Module):
         self.set_child('main', main)
         self.set_child('compare', compare)
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var):
+    def step(self, objective):
         main = self.children['main']
         compare = self.children['compare']
-        main_var = main.step(var.clone(clone_update=True))
-        var.update_attrs_from_clone_(main_var)
+        main_var = main.step(objective.clone(clone_updates=True))
+        objective.update_attrs_from_clone_(main_var)
-        compare_var = compare.step(var.clone(clone_update=True))
-        var.update_attrs_from_clone_(compare_var)
+        compare_var = compare.step(objective.clone(clone_updates=True))
+        objective.update_attrs_from_clone_(compare_var)
         mode, normalize, eps = itemgetter('mode', 'normalize', 'eps')(self.defaults)
-        var.update = cautious_(
-            TensorList(main_var.get_update()),
-            TensorList(compare_var.get_update()),
+        objective.updates = cautious_(
+            TensorList(main_var.get_updates()),
+            TensorList(compare_var.get_updates()),
             normalize=normalize,
             mode=mode,
             eps=eps,
         )
-        return var
+        return objective
-class ScaleByGradCosineSimilarity(Transform):
+class ScaleByGradCosineSimilarity(TensorTransform):
     """Multiplies the update by cosine similarity with gradient.
     If cosine similarity is negative, naturally the update will be negated as well.
@@ -170,7 +173,7 @@ class ScaleByGradCosineSimilarity(Transform):
     Scaled Adam
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         bench.parameters(),
         tz.m.Adam(),
         tz.m.ScaleByGradCosineSimilarity(),
@@ -186,7 +189,7 @@ class ScaleByGradCosineSimilarity(Transform):
         super().__init__(defaults, uses_grad=True)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         assert grads is not None
         eps = settings[0]['eps']
         tensors = TensorList(tensors)
@@ -196,8 +199,8 @@ class ScaleByGradCosineSimilarity(Transform):
         return tensors.mul_(cos_sim)
 class ScaleModulesByCosineSimilarity(Module):
-    """Scales the output of :code:`main` module by it's cosine similarity to the output
-    of :code:`compare` module.
+    """Scales the output of ``main`` module by it's cosine similarity to the output
+    of ``compare`` module.
     Args:
         main (Chainable): main module or sequence of modules whose update will be scaled.
@@ -208,7 +211,7 @@ class ScaleModulesByCosineSimilarity(Module):
     Adam scaled by similarity to RMSprop
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         bench.parameters(),
         tz.m.ScaleModulesByCosineSimilarity(
             main = tz.m.Adam(),
@@ -230,22 +233,25 @@ class ScaleModulesByCosineSimilarity(Module):
         self.set_child('main', main)
         self.set_child('compare', compare)
+    def update(self, objective): raise RuntimeError
+    def apply(self, objective): raise RuntimeError
     @torch.no_grad
-    def step(self, var):
+    def step(self, objective):
         main = self.children['main']
         compare = self.children['compare']
-        main_var = main.step(var.clone(clone_update=True))
-        var.update_attrs_from_clone_(main_var)
+        main_var = main.step(objective.clone(clone_updates=True))
+        objective.update_attrs_from_clone_(main_var)
-        compare_var = compare.step(var.clone(clone_update=True))
-        var.update_attrs_from_clone_(compare_var)
+        compare_var = compare.step(objective.clone(clone_updates=True))
+        objective.update_attrs_from_clone_(compare_var)
-        m = TensorList(main_var.get_update())
-        c = TensorList(compare_var.get_update())
+        m = TensorList(main_var.get_updates())
+        c = TensorList(compare_var.get_updates())
         eps = self.defaults['eps']
         cos_sim = m.dot(c) / (m.global_vector_norm() * c.global_vector_norm()).clip(min=eps)
-        var.update = m.mul_(cos_sim)
-        return var
+        objective.updates = m.mul_(cos_sim)
+        return objective

torchzero/modules/momentum/momentum.py CHANGED Viewed

@@ -4,12 +4,12 @@ from typing import Literal
 import torch
-from ...core import Target, Transform
+from ...core import  TensorTransform
 from ...utils import NumberList, TensorList, unpack_dicts, unpack_states
-from ..functional import debias, ema_
+from ..opt_utils import debias, ema_
-class EMA(Transform):
+class EMA(TensorTransform):
     """Maintains an exponential moving average of update.
     Args:
@@ -20,12 +20,12 @@ class EMA(Transform):
         ema_init (str, optional): initial values for the EMA, "zeros" or "update".
         target (Target, optional): target to apply EMA to. Defaults to 'update'.
     """
-    def __init__(self, momentum:float=0.9, dampening:float=0, debiased: bool = False, lerp=True, ema_init: Literal['zeros', 'update'] = 'zeros', target: Target = 'update'):
+    def __init__(self, momentum:float=0.9, dampening:float=0, debiased: bool = False, lerp=True, ema_init: Literal['zeros', 'update'] = 'zeros'):
         defaults = dict(momentum=momentum,dampening=dampening,debiased=debiased,lerp=lerp,ema_init=ema_init)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         debiased, lerp, ema_init = itemgetter('debiased','lerp','ema_init')(settings[0])
@@ -53,8 +53,8 @@ class HeavyBall(EMA):
         ema_init (str, optional): initial values for the EMA, "zeros" or "update".
         target (Target, optional): target to apply EMA to. Defaults to 'update'.
     """
-    def __init__(self, momentum:float=0.9, dampening:float=0, debiased: bool = False, lerp=False, ema_init: Literal['zeros', 'update'] = 'update', target: Target = 'update'):
-        super().__init__(momentum=momentum, dampening=dampening, debiased=debiased, lerp=lerp, ema_init=ema_init, target=target)
+    def __init__(self, momentum:float=0.9, dampening:float=0, debiased: bool = False, lerp=False, ema_init: Literal['zeros', 'update'] = 'update'):
+        super().__init__(momentum=momentum, dampening=dampening, debiased=debiased, lerp=lerp, ema_init=ema_init)
 def nag_(
     tensors_: TensorList,
@@ -74,7 +74,7 @@ def nag_(
     return tensors_
-class NAG(Transform):
+class NAG(TensorTransform):
     """Nesterov accelerated gradient method (nesterov momentum).
     Args:
@@ -84,12 +84,12 @@ class NAG(Transform):
             whether to use linear interpolation, if True, this becomes similar to exponential moving average. Defaults to False.
         target (Target, optional): target to apply EMA to. Defaults to 'update'.
     """
-    def __init__(self, momentum:float=0.9, dampening:float=0, lerp=False, target: Target = 'update'):
+    def __init__(self, momentum:float=0.9, dampening:float=0, lerp=False):
         defaults = dict(momentum=momentum,dampening=dampening, lerp=lerp)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         velocity = unpack_states(states, tensors, 'velocity', cls=TensorList)
         lerp = self.settings[params[0]]['lerp']

torchzero/modules/ops/__init__.py CHANGED Viewed

@@ -12,8 +12,8 @@ from .binary import (
     CopyMagnitude,
     CopySign,
     Div,
-    Graft,
-    GraftToUpdate,
+    GraftInputToOutput,
+    GraftInputToOutput,
     GramSchimdt,
     Maximum,
     Minimum,
@@ -21,7 +21,7 @@ from .binary import (
     Pow,
     RCopySign,
     RDiv,
-    RGraft,
+    GraftOutputToInput,
     RPow,
     RSub,
     Sub,
@@ -38,7 +38,7 @@ from .higher_level import (
 from .multi import (
     ClipModules,
     DivModules,
-    GraftModules,
+    Graft,
     LerpModules,
     MultiOperationBase,
     PowModules,

torchzero/modules/ops/accumulate.py CHANGED Viewed

@@ -1,90 +1,90 @@
 import torch
-from ...core import Target, Transform
+from ...core import  TensorTransform
 from ...utils import TensorList, unpack_states
-class AccumulateSum(Transform):
+class AccumulateSum(TensorTransform):
     """Accumulates sum of all past updates.
     Args:
         decay (float, optional): decays the accumulator. Defaults to 0.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, decay: float = 0, target: Target = 'update',):
+    def __init__(self, decay: float = 0):
         defaults = dict(decay=decay)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         sum = unpack_states(states, tensors, 'sum', cls=TensorList)
         decay = [1-s['decay'] for s in settings]
         return sum.add_(tensors).lazy_mul(decay, clone=True)
-class AccumulateMean(Transform):
+class AccumulateMean(TensorTransform):
     """Accumulates mean of all past updates.
     Args:
         decay (float, optional): decays the accumulator. Defaults to 0.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, decay: float = 0, target: Target = 'update',):
+    def __init__(self, decay: float = 0):
         defaults = dict(decay=decay)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
         mean = unpack_states(states, tensors, 'mean', cls=TensorList)
         decay = [1-s['decay'] for s in settings]
         return mean.add_(tensors).lazy_mul(decay, clone=True).div_(step)
-class AccumulateProduct(Transform):
+class AccumulateProduct(TensorTransform):
     """Accumulates product of all past updates.
     Args:
         decay (float, optional): decays the accumulator. Defaults to 0.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, decay: float = 0, target: Target = 'update',):
+    def __init__(self, decay: float = 0, target = 'update',):
         defaults = dict(decay=decay)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         prod = unpack_states(states, tensors, 'prod', cls=TensorList)
         decay = [1-s['decay'] for s in settings]
         return prod.mul_(tensors).lazy_mul(decay, clone=True)
-class AccumulateMaximum(Transform):
+class AccumulateMaximum(TensorTransform):
     """Accumulates maximum of all past updates.
     Args:
         decay (float, optional): decays the accumulator. Defaults to 0.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, decay: float = 0, target: Target = 'update',):
+    def __init__(self, decay: float = 0):
         defaults = dict(decay=decay)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         maximum = unpack_states(states, tensors, 'maximum', cls=TensorList)
         decay = [1-s['decay'] for s in settings]
         return maximum.maximum_(tensors).lazy_mul(decay, clone=True)
-class AccumulateMinimum(Transform):
+class AccumulateMinimum(TensorTransform):
     """Accumulates minimum of all past updates.
     Args:
         decay (float, optional): decays the accumulator. Defaults to 0.
         target (Target, optional): target. Defaults to 'update'.
     """
-    def __init__(self, decay: float = 0, target: Target = 'update',):
+    def __init__(self, decay: float = 0):
         defaults = dict(decay=decay)
-        super().__init__(defaults, uses_grad=False, target=target)
+        super().__init__(defaults)
     @torch.no_grad
-    def apply_tensors(self, tensors, params, grads, loss, states, settings):
+    def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
         minimum = unpack_states(states, tensors, 'minimum', cls=TensorList)
         decay = [1-s['decay'] for s in settings]
         return minimum.minimum_(tensors).lazy_mul(decay, clone=True)

torchzero 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl