PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +43 -33
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +48 -52
torchzero/core/module.py +130 -50
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +2 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +15 -12
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +1 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/backtracking.py +2 -2
torchzero/modules/line_search/line_search.py +32 -32
torchzero/modules/line_search/strong_wolfe.py +2 -2
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +50 -48
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +10 -10
torchzero/modules/quasi_newton/sg2.py +19 -19
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +49 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +57 -90
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +157 -177
torchzero/modules/second_order/rsn.py +106 -96
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +10 -10
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +93 -69
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/modules/line_search/line_search.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Literal
 import numpy as np
 import torch
-from ...core import Module, Target, Var
+from ...core import Module,  Objective
 from ...utils import tofloat, set_storage_
 from ..functional import clip_by_finfo
@@ -139,7 +139,7 @@ class LineSearchBase(Module, ABC):
         for c, n in zip(params, new_params):
             set_storage_(c, n)
-    def _loss(self, step_size: float, var: Var, closure, params: list[torch.Tensor],
+    def _loss(self, step_size: float, var: Objective, closure, params: list[torch.Tensor],
               update: list[torch.Tensor], backward:bool=False) -> float:
         # if step_size is 0, we might already know the loss
@@ -165,16 +165,16 @@ class LineSearchBase(Module, ABC):
         # if evaluated loss at step size 0, set it to var.loss
         if step_size == 0:
             var.loss = loss
-            if backward: var.grad = [p.grad if p.grad is not None else torch.zeros_like(p) for p in params]
+            if backward: var.grads = [p.grad if p.grad is not None else torch.zeros_like(p) for p in params]
         return tofloat(loss)
-    def _loss_derivative_gradient(self, step_size: float, var: Var, closure,
+    def _loss_derivative_gradient(self, step_size: float, var: Objective, closure,
                          params: list[torch.Tensor], update: list[torch.Tensor]):
         # if step_size is 0, we might already know the derivative
-        if (var.grad is not None) and (step_size == 0):
+        if (var.grads is not None) and (step_size == 0):
             loss = self._loss(step_size=step_size,var=var,closure=closure,params=params,update=update,backward=False)
-            derivative = - sum(t.sum() for t in torch._foreach_mul(var.grad, update))
+            derivative = - sum(t.sum() for t in torch._foreach_mul(var.grads, update))
         else:
             # loss with a backward pass sets params.grad
@@ -184,79 +184,79 @@ class LineSearchBase(Module, ABC):
             derivative = - sum(t.sum() for t in torch._foreach_mul([p.grad if p.grad is not None
                                                                     else torch.zeros_like(p) for p in params], update))
-        assert var.grad is not None
-        return loss, tofloat(derivative), var.grad
+        assert var.grads is not None
+        return loss, tofloat(derivative), var.grads
-    def _loss_derivative(self, step_size: float, var: Var, closure,
+    def _loss_derivative(self, step_size: float, var: Objective, closure,
                          params: list[torch.Tensor], update: list[torch.Tensor]):
         return self._loss_derivative_gradient(step_size=step_size, var=var,closure=closure,params=params,update=update)[:2]
-    def evaluate_f(self, step_size: float, var: Var, backward:bool=False):
+    def evaluate_f(self, step_size: float, var: Objective, backward:bool=False):
         """evaluate function value at alpha `step_size`."""
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return self._loss(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_update(),backward=backward)
+        return self._loss(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_updates(),backward=backward)
-    def evaluate_f_d(self, step_size: float, var: Var):
+    def evaluate_f_d(self, step_size: float, var: Objective):
         """evaluate function value and directional derivative in the direction of the update at step size `step_size`."""
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return self._loss_derivative(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_update())
+        return self._loss_derivative(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_updates())
-    def evaluate_f_d_g(self, step_size: float, var: Var):
+    def evaluate_f_d_g(self, step_size: float, var: Objective):
         """evaluate function value, directional derivative, and gradient list at step size `step_size`."""
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return self._loss_derivative_gradient(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_update())
+        return self._loss_derivative_gradient(step_size=step_size, var=var, closure=closure, params=var.params,update=var.get_updates())
-    def make_objective(self, var: Var, backward:bool=False):
+    def make_objective(self, var: Objective, backward:bool=False):
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return partial(self._loss, var=var, closure=closure, params=var.params, update=var.get_update(), backward=backward)
+        return partial(self._loss, var=var, closure=closure, params=var.params, update=var.get_updates(), backward=backward)
-    def make_objective_with_derivative(self, var: Var):
+    def make_objective_with_derivative(self, var: Objective):
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return partial(self._loss_derivative, var=var, closure=closure, params=var.params, update=var.get_update())
+        return partial(self._loss_derivative, var=var, closure=closure, params=var.params, update=var.get_updates())
-    def make_objective_with_derivative_and_gradient(self, var: Var):
+    def make_objective_with_derivative_and_gradient(self, var: Objective):
         closure = var.closure
         if closure is None: raise RuntimeError('line search requires closure')
-        return partial(self._loss_derivative_gradient, var=var, closure=closure, params=var.params, update=var.get_update())
+        return partial(self._loss_derivative_gradient, var=var, closure=closure, params=var.params, update=var.get_updates())
     @abstractmethod
-    def search(self, update: list[torch.Tensor], var: Var) -> float:
+    def search(self, update: list[torch.Tensor], var: Objective) -> float:
         """Finds the step size to use"""
     @torch.no_grad
-    def step(self, var: Var) -> Var:
+    def apply(self, objective: Objective) -> Objective:
         self._reset()
-        params = var.params
+        params = objective.params
         self._initial_params = [p.clone() for p in params]
-        update = var.get_update()
+        update = objective.get_updates()
         try:
-            step_size = self.search(update=update, var=var)
+            step_size = self.search(update=update, var=objective)
         except MaxLineSearchItersReached:
             step_size = self._best_step_size
         step_size = clip_by_finfo(step_size, torch.finfo(update[0].dtype))
         # set loss_approx
-        if var.loss_approx is None: var.loss_approx = self._lowest_loss
+        if objective.loss_approx is None: objective.loss_approx = self._lowest_loss
         # if this is last module, directly update parameters to avoid redundant operations
-        if var.modular is not None and self is var.modular.modules[-1]:
+        if objective.modular is not None and self is objective.modular.modules[-1]:
             self.set_step_size_(step_size, params=params, update=update)
-            var.stop = True; var.skip_update = True
-            return var
+            objective.stop = True; objective.skip_update = True
+            return objective
         # revert parameters and multiply update by step size
         self.set_step_size_(0, params=params, update=update)
-        torch._foreach_mul_(var.update, step_size)
-        return var
+        torch._foreach_mul_(objective.updates, step_size)
+        return objective

torchzero/modules/line_search/strong_wolfe.py CHANGED Viewed

@@ -284,8 +284,8 @@ class StrongWolfe(LineSearchBase):
             'init_value', 'init', 'c1', 'c2', 'a_max', 'maxiter', 'maxzoom',
             'maxeval', 'interpolation', 'adaptive', 'plus_minus', 'fallback', 'tol_change')(self.defaults)
-        dir = as_tensorlist(var.get_update())
-        grad_list = var.get_grad()
+        dir = as_tensorlist(var.get_updates())
+        grad_list = var.get_grads()
         g_0 = -sum(t.sum() for t in torch._foreach_mul(grad_list, dir))
         f_0 = var.get_loss(False)

torchzero/modules/misc/debug.py CHANGED Viewed

@@ -11,9 +11,9 @@ class PrintUpdate(Module):
         defaults = dict(text=text, print_fn=print_fn)
         super().__init__(defaults)
-    def step(self, var):
-        self.defaults["print_fn"](f'{self.defaults["text"]}{var.update}')
-        return var
+    def apply(self, objective):
+        self.defaults["print_fn"](f'{self.defaults["text"]}{objective.updates}')
+        return objective
 class PrintShape(Module):
     """Prints shapes of the update."""
@@ -21,10 +21,10 @@ class PrintShape(Module):
         defaults = dict(text=text, print_fn=print_fn)
         super().__init__(defaults)
-    def step(self, var):
-        shapes = [u.shape for u in var.update] if var.update is not None else None
+    def apply(self, objective):
+        shapes = [u.shape for u in objective.updates] if objective.updates is not None else None
         self.defaults["print_fn"](f'{self.defaults["text"]}{shapes}')
-        return var
+        return objective
 class PrintParams(Module):
     """Prints current update."""
@@ -32,9 +32,9 @@ class PrintParams(Module):
         defaults = dict(text=text, print_fn=print_fn)
         super().__init__(defaults)
-    def step(self, var):
-        self.defaults["print_fn"](f'{self.defaults["text"]}{var.params}')
-        return var
+    def apply(self, objective):
+        self.defaults["print_fn"](f'{self.defaults["text"]}{objective.params}')
+        return objective
 class PrintLoss(Module):
@@ -43,6 +43,6 @@ class PrintLoss(Module):
         defaults = dict(text=text, print_fn=print_fn)
         super().__init__(defaults)
-    def step(self, var):
-        self.defaults["print_fn"](f'{self.defaults["text"]}{var.get_loss(False)}')
-        return var
+    def apply(self, objective):
+        self.defaults["print_fn"](f'{self.defaults["text"]}{objective.get_loss(False)}')
+        return objective

torchzero/modules/misc/escape.py CHANGED Viewed

@@ -3,7 +3,7 @@ import math
 from typing import Literal
 import torch
-from ...core import Modular, Module, Var, Chainable
+from ...core import Modular, Module, Objective, Chainable
 from ...utils import NumberList, TensorList
@@ -15,11 +15,11 @@ class EscapeAnnealing(Module):
     @torch.no_grad
-    def step(self, var):
-        closure = var.closure
+    def apply(self, objective):
+        closure = objective.closure
         if closure is None: raise RuntimeError("Escape requries closure")
-        params = TensorList(var.params)
+        params = TensorList(objective.params)
         settings = self.settings[params[0]]
         max_region = self.get_settings(params, 'max_region', cls=NumberList)
         max_iter = settings['max_iter']
@@ -41,7 +41,7 @@ class EscapeAnnealing(Module):
         self.global_state['n_bad'] = n_bad
         # no progress
-        f_0 = var.get_loss(False)
+        f_0 = objective.get_loss(False)
         if n_bad >= n_tol:
             for i in range(1, max_iter+1):
                 alpha = max_region * (i / max_iter)
@@ -51,12 +51,12 @@ class EscapeAnnealing(Module):
                 f_star = closure(False)
                 if math.isfinite(f_star) and f_star < f_0-1e-12:
-                    var.update = None
-                    var.stop = True
-                    var.skip_update = True
-                    return var
+                    objective.updates = None
+                    objective.stop = True
+                    objective.skip_update = True
+                    return objective
                 params.sub_(pert)
             self.global_state['n_bad'] = 0
-        return var
+        return objective

torchzero/modules/misc/gradient_accumulation.py CHANGED Viewed

@@ -3,74 +3,6 @@ import torch
 from ...core import Chainable, Module
-# class GradientAccumulation(Module):
-#     """Uses :code:`n` steps to accumulate gradients, after :code:`n` gradients have been accumulated, they are passed to :code:`modules` and parameters are updates.
-#     Accumulating gradients for :code:`n` steps is equivalent to increasing batch size by :code:`n`. Increasing the batch size
-#     is more computationally efficient, but sometimes it is not feasible due to memory constraints.
-#     .. note::
-#         Technically this can accumulate any inputs, including updates generated by previous modules. As long as this module is first, it will accumulate the gradients.
-#     Args:
-#         modules (Chainable): modules that perform a step every :code:`n` steps using the accumulated gradients.
-#         n (int): number of gradients to accumulate.
-#         mean (bool, optional): if True, uses mean of accumulated gradients, otherwise uses sum. Defaults to True.
-#         stop (bool, optional):
-#             this module prevents next modules from stepping unless :code:`n` gradients have been accumulate. Setting this argument to False disables that. Defaults to True.
-#     Examples:
-#         Adam with gradients accumulated for 16 batches.
-#         .. code-block:: python
-#             opt = tz.Modular(
-#                 model.parameters(),
-#                 tz.m.GradientAccumulation(
-#                     [tz.m.Adam(), tz.m.LR(1e-2)],
-#                     n=16
-#                 )
-#             )
-#     """
-#     def __init__(self, modules: Chainable, n: int, mean=True, stop=True):
-#         defaults = dict(n=n, mean=mean, stop=stop)
-#         super().__init__(defaults)
-#         self.set_child('modules', modules)
-#     @torch.no_grad
-#     def step(self, var):
-#         accumulator = self.get_state(var.params, 'accumulator')
-#         settings = self.defaults
-#         n = settings['n']; mean = settings['mean']; stop = settings['stop']
-#         step = self.global_state['step'] = self.global_state.get('step', 0) + 1
-#         # add update to accumulator
-#         torch._foreach_add_(accumulator, var.get_update())
-#         # step with accumulated updates
-#         if step % n == 0:
-#             if mean:
-#                 torch._foreach_div_(accumulator, n)
-#             var.update = [a.clone() for a in accumulator]
-#             var = self.children['modules'].step(var)
-#             # zero accumulator
-#             torch._foreach_zero_(accumulator)
-#         else:
-#             # prevent update
-#             if stop:
-#                 var.update = None
-#                 var.stop=True
-#                 var.skip_update=True
-#         return var
 class GradientAccumulation(Module):
     """Uses ``n`` steps to accumulate gradients, after ``n`` gradients have been accumulated, they are passed to :code:`modules` and parameters are updates.
@@ -106,21 +38,21 @@ class GradientAccumulation(Module):
     @torch.no_grad
-    def step(self, var):
-        accumulator = self.get_state(var.params, 'accumulator')
+    def apply(self, objective):
+        accumulator = self.get_state(objective.params, 'accumulator')
         settings = self.defaults
         n = settings['n']; mean = settings['mean']; stop = settings['stop']
-        step = self.global_state['step'] = self.global_state.get('step', 0) + 1
+        step = self.increment_counter("step", 0)
         # add update to accumulator
-        torch._foreach_add_(accumulator, var.get_update())
+        torch._foreach_add_(accumulator, objective.get_updates())
         # step with accumulated updates
-        if step % n == 0:
+        if (step + 1) % n == 0:
             if mean:
                 torch._foreach_div_(accumulator, n)
-            var.update = accumulator
+            objective.updates = accumulator
             # zero accumulator
             self.clear_state_keys('accumulator')
@@ -128,9 +60,9 @@ class GradientAccumulation(Module):
         else:
             # prevent update
             if stop:
-                var.update = None
-                var.stop=True
-                var.skip_update=True
+                objective.updates = None
+                objective.stop=True
+                objective.skip_update=True
-        return var
+        return objective

torchzero/modules/misc/homotopy.py CHANGED Viewed

@@ -13,27 +13,27 @@ class HomotopyBase(Module):
         """transform the loss"""
     @torch.no_grad
-    def step(self, var):
-        if var.loss is not None:
-            var.loss = self.loss_transform(var.loss)
+    def apply(self, objective):
+        if objective.loss is not None:
+            objective.loss = self.loss_transform(objective.loss)
-        closure = var.closure
+        closure = objective.closure
         if closure is None: raise RuntimeError("SquareHomotopy requires closure")
         def homotopy_closure(backward=True):
             if backward:
                 with torch.enable_grad():
                     loss = self.loss_transform(closure(False))
-                    grad = torch.autograd.grad(loss, var.params, allow_unused=True)
-                    for p,g in zip(var.params, grad):
+                    grad = torch.autograd.grad(loss, objective.params, allow_unused=True)
+                    for p,g in zip(objective.params, grad):
                         p.grad = g
             else:
                 loss = self.loss_transform(closure(False))
             return loss
-        var.closure = homotopy_closure
-        return var
+        objective.closure = homotopy_closure
+        return objective
 class SquareHomotopy(HomotopyBase):
     def __init__(self): super().__init__()
@@ -57,3 +57,11 @@ class LambdaHomotopy(HomotopyBase):
         super().__init__(defaults)
     def loss_transform(self, loss): return self.defaults['fn'](loss)
+class FixedLossHomotopy(HomotopyBase):
+    def __init__(self, value: float = 1):
+        defaults = dict(value=value)
+        super().__init__(defaults)
+    def loss_transform(self, loss): return loss / loss.detach().clip(min=torch.finfo(loss.dtype).tiny * 2)

torchzero 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl