PyPI - torchzero - Versions diffs - 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +47 -36
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +8 -2
torchzero/core/chain.py +47 -0
torchzero/core/functional.py +103 -0
torchzero/core/modular.py +233 -0
torchzero/core/module.py +132 -643
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +56 -23
torchzero/core/transform.py +261 -365
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +132 -34
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +0 -1
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +3 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/{higher_order → experimental}/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +20 -17
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +5 -5
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +8 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/__init__.py +1 -1
torchzero/modules/line_search/_polyinterp.py +3 -1
torchzero/modules/line_search/adaptive.py +3 -3
torchzero/modules/line_search/backtracking.py +3 -3
torchzero/modules/line_search/interpolation.py +160 -0
torchzero/modules/line_search/line_search.py +42 -51
torchzero/modules/line_search/strong_wolfe.py +5 -5
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +63 -61
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/__init__.py +2 -0
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +25 -16
torchzero/modules/quasi_newton/sg2.py +292 -0
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +6 -3
torchzero/modules/second_order/ifn.py +58 -0
torchzero/modules/second_order/inm.py +101 -0
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +105 -228
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +158 -178
torchzero/modules/second_order/rsn.py +237 -0
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +57 -50
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +112 -88
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/modules/experimental/momentum.py +0 -160
torchzero/modules/higher_order/__init__.py +0 -1
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.14.dist-info/RECORD +0 -167
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/modules/quasi_newton/sg2.py ADDED Viewed

@@ -0,0 +1,292 @@
+import torch
+from ...core import Module, Chainable, step
+from ...utils import TensorList, vec_to_tensors
+from ..second_order.newton import _newton_step, _get_H
+def sg2_(
+    delta_g: torch.Tensor,
+    cd: torch.Tensor,
+) -> torch.Tensor:
+    """cd is c * perturbation, and must be multiplied by two if hessian estimate is two-sided
+    (or divide delta_g by two)."""
+    M = torch.outer(1.0 / cd, delta_g)
+    H_hat = 0.5 * (M + M.T)
+    return H_hat
+class SG2(Module):
+    """second-order stochastic gradient
+    SG2 with line search
+    ```python
+    opt = tz.Modular(
+        model.parameters(),
+        tz.m.SG2(),
+        tz.m.Backtracking()
+    )
+    ```
+    SG2 with trust region
+    ```python
+    opt = tz.Modular(
+        model.parameters(),
+        tz.m.LevenbergMarquardt(tz.m.SG2()),
+    )
+    ```
+    """
+    def __init__(
+        self,
+        n_samples: int = 1,
+        h: float = 1e-2,
+        beta: float | None = None,
+        damping: float = 0,
+        eigval_fn=None,
+        one_sided: bool = False, # one-sided hessian
+        use_lstsq: bool = True,
+        seed=None,
+        inner: Chainable | None = None,
+    ):
+        defaults = dict(n_samples=n_samples, h=h, beta=beta, damping=damping, eigval_fn=eigval_fn, one_sided=one_sided, seed=seed, use_lstsq=use_lstsq)
+        super().__init__(defaults)
+        if inner is not None: self.set_child('inner', inner)
+    @torch.no_grad
+    def update(self, objective):
+        k = self.global_state.get('step', 0) + 1
+        self.global_state["step"] = k
+        params = TensorList(objective.params)
+        closure = objective.closure
+        if closure is None:
+            raise RuntimeError("closure is required for SG2")
+        generator = self.get_generator(params[0].device, self.defaults["seed"])
+        h = self.get_settings(params, "h")
+        x_0 = params.clone()
+        n_samples = self.defaults["n_samples"]
+        H_hat = None
+        for i in range(n_samples):
+            # generate perturbation
+            cd = params.rademacher_like(generator=generator).mul_(h)
+            # one sided
+            if self.defaults["one_sided"]:
+                g_0 = TensorList(objective.get_grads())
+                params.add_(cd)
+                closure()
+                g_p = params.grad.fill_none_(params)
+                delta_g = (g_p - g_0) * 2
+            # two sided
+            else:
+                params.add_(cd)
+                closure()
+                g_p = params.grad.fill_none_(params)
+                params.copy_(x_0)
+                params.sub_(cd)
+                closure()
+                g_n = params.grad.fill_none_(params)
+                delta_g = g_p - g_n
+            # restore params
+            params.set_(x_0)
+            # compute H hat
+            H_i = sg2_(
+                delta_g = delta_g.to_vec(),
+                cd = cd.to_vec(),
+            )
+            if H_hat is None: H_hat = H_i
+            else: H_hat += H_i
+        assert H_hat is not None
+        if n_samples > 1: H_hat /= n_samples
+        # update H
+        H = self.global_state.get("H", None)
+        if H is None: H = H_hat
+        else:
+            beta = self.defaults["beta"]
+            if beta is None: beta = k / (k+1)
+            H.lerp_(H_hat, 1-beta)
+        self.global_state["H"] = H
+    @torch.no_grad
+    def apply(self, objective):
+        dir = _newton_step(
+            objective=objective,
+            H = self.global_state["H"],
+            damping = self.defaults["damping"],
+            inner = self.children.get("inner", None),
+            H_tfm=None,
+            eigval_fn=self.defaults["eigval_fn"],
+            use_lstsq=self.defaults["use_lstsq"],
+            g_proj=None,
+        )
+        objective.updates = vec_to_tensors(dir, objective.params)
+        return objective
+    def get_H(self,objective=...):
+        return _get_H(self.global_state["H"], self.defaults["eigval_fn"])
+# two sided
+# we have g via x + d, x - d
+# H via g(x + d), g(x - d)
+# 1 is x, x+2d
+# 2 is x, x-2d
+# 5 evals in total
+# one sided
+# g via x, x + d
+# 1 is x, x + d
+# 2 is x, x - d
+# 3 evals and can use two sided for g_0
+class SPSA2(Module):
+    """second-order SPSA
+    SPSA2 with line search
+    ```python
+    opt = tz.Modular(
+        model.parameters(),
+        tz.m.SPSA2(),
+        tz.m.Backtracking()
+    )
+    ```
+    SPSA2 with trust region
+    ```python
+    opt = tz.Modular(
+        model.parameters(),
+        tz.m.LevenbergMarquardt(tz.m.SPSA2()),
+    )
+    ```
+    """
+    def __init__(
+        self,
+        n_samples: int = 1,
+        h: float = 1e-2,
+        beta: float | None = None,
+        damping: float = 0,
+        eigval_fn=None,
+        use_lstsq: bool = True,
+        seed=None,
+        inner: Chainable | None = None,
+    ):
+        defaults = dict(n_samples=n_samples, h=h, beta=beta, damping=damping, eigval_fn=eigval_fn, seed=seed, use_lstsq=use_lstsq)
+        super().__init__(defaults)
+        if inner is not None: self.set_child('inner', inner)
+    @torch.no_grad
+    def update(self, objective):
+        k = self.global_state.get('step', 0) + 1
+        self.global_state["step"] = k
+        params = TensorList(objective.params)
+        closure = objective.closure
+        if closure is None:
+            raise RuntimeError("closure is required for SPSA2")
+        generator = self.get_generator(params[0].device, self.defaults["seed"])
+        h = self.get_settings(params, "h")
+        x_0 = params.clone()
+        n_samples = self.defaults["n_samples"]
+        H_hat = None
+        g_0 = None
+        for i in range(n_samples):
+            # perturbations for g and H
+            cd_g = params.rademacher_like(generator=generator).mul_(h)
+            cd_H = params.rademacher_like(generator=generator).mul_(h)
+            # evaluate 4 points
+            x_p = x_0 + cd_g
+            x_n = x_0 - cd_g
+            params.set_(x_p)
+            f_p = closure(False)
+            params.add_(cd_H)
+            f_pp = closure(False)
+            params.set_(x_n)
+            f_n = closure(False)
+            params.add_(cd_H)
+            f_np = closure(False)
+            g_p_vec = (f_pp - f_p) / cd_H
+            g_n_vec = (f_np - f_n) / cd_H
+            delta_g = g_p_vec - g_n_vec
+            # restore params
+            params.set_(x_0)
+            # compute grad
+            g_i = (f_p - f_n) / (2 * cd_g)
+            if g_0 is None: g_0 = g_i
+            else: g_0 += g_i
+            # compute H hat
+            H_i = sg2_(
+                delta_g = delta_g.to_vec().div_(2.0),
+                cd = cd_g.to_vec(), # The interval is measured by the original 'cd'
+            )
+            if H_hat is None: H_hat = H_i
+            else: H_hat += H_i
+        assert g_0 is not None and H_hat is not None
+        if n_samples > 1:
+            g_0 /= n_samples
+            H_hat /= n_samples
+        # set grad to approximated grad
+        objective.grads = g_0
+        # update H
+        H = self.global_state.get("H", None)
+        if H is None: H = H_hat
+        else:
+            beta = self.defaults["beta"]
+            if beta is None: beta = k / (k+1)
+            H.lerp_(H_hat, 1-beta)
+        self.global_state["H"] = H
+    @torch.no_grad
+    def apply(self, objective):
+        dir = _newton_step(
+            objective=objective,
+            H = self.global_state["H"],
+            damping = self.defaults["damping"],
+            inner = self.children.get("inner", None),
+            H_tfm=None,
+            eigval_fn=self.defaults["eigval_fn"],
+            use_lstsq=self.defaults["use_lstsq"],
+            g_proj=None,
+        )
+        objective.updates = vec_to_tensors(dir, objective.params)
+        return objective
+    def get_H(self,objective=...):
+        return _get_H(self.global_state["H"], self.defaults["eigval_fn"])

torchzero/modules/restarts/restars.py CHANGED Viewed

@@ -4,12 +4,14 @@ from typing import final, Literal, cast
 import torch
-from ...core import Chainable, Module, Var
+from ...core import Chainable, Module, Objective
 from ...utils import TensorList
 from ..termination import TerminationCriteriaBase
 def _reset_except_self(optimizer, var, self: Module):
-    for m in optimizer.unrolled_modules: m.reset()
+    for m in optimizer.unrolled_modules:
+        if m is not self:
+            m.reset()
 class RestartStrategyBase(Module, ABC):
     """Base class for restart strategies.
@@ -24,7 +26,7 @@ class RestartStrategyBase(Module, ABC):
             self.set_child('modules', modules)
     @abstractmethod
-    def should_reset(self, var: Var) -> bool:
+    def should_reset(self, var: Objective) -> bool:
         """returns whether reset should occur"""
     def _reset_on_condition(self, var):
@@ -39,23 +41,23 @@ class RestartStrategyBase(Module, ABC):
         return modules
     @final
-    def update(self, var):
-        modules = self._reset_on_condition(var)
+    def update(self, objective):
+        modules = self._reset_on_condition(objective)
         if modules is not None:
-            modules.update(var)
+            modules.update(objective)
     @final
-    def apply(self, var):
+    def apply(self, objective):
         # don't check here because it was check in `update`
         modules = self.children.get('modules', None)
-        if modules is None: return var
-        return modules.apply(var.clone(clone_update=False))
+        if modules is None: return objective
+        return modules.apply(objective.clone(clone_updates=False))
     @final
-    def step(self, var):
-        modules = self._reset_on_condition(var)
-        if modules is None: return var
-        return modules.step(var.clone(clone_update=False))
+    def step(self, objective):
+        modules = self._reset_on_condition(objective)
+        if modules is None: return objective
+        return modules.step(objective.clone(clone_updates=False))
@@ -170,7 +172,7 @@ class PowellRestart(RestartStrategyBase):
         super().__init__(defaults, modules)
     def should_reset(self, var):
-        g = TensorList(var.get_grad())
+        g = TensorList(var.get_grads())
         cond1 = self.defaults['cond1']; cond2 = self.defaults['cond2']
         # -------------------------------- initialize -------------------------------- #
@@ -192,7 +194,7 @@ class PowellRestart(RestartStrategyBase):
         # ------------------------------- 2nd condition ------------------------------ #
         if (cond2 is not None) and (not reset):
-            d_g = TensorList(var.get_update()).dot(g)
+            d_g = TensorList(var.get_updates()).dot(g)
             if (-1-cond2) * g_g < d_g < (-1 + cond2) * g_g:
                 reset = True
@@ -229,17 +231,17 @@ class BirginMartinezRestart(Module):
         self.set_child("module", module)
-    def update(self, var):
+    def update(self, objective):
         module = self.children['module']
-        module.update(var)
+        module.update(objective)
-    def apply(self, var):
+    def apply(self, objective):
         module = self.children['module']
-        var = module.apply(var.clone(clone_update=False))
+        objective = module.apply(objective.clone(clone_updates=False))
         cond = self.defaults['cond']
-        g = TensorList(var.get_grad())
-        d = TensorList(var.get_update())
+        g = TensorList(objective.get_grads())
+        d = TensorList(objective.get_updates())
         d_g = d.dot(g)
         d_norm = d.global_vector_norm()
         g_norm = g.global_vector_norm()
@@ -247,7 +249,7 @@ class BirginMartinezRestart(Module):
         # d in our case is same direction as g so it has a minus sign
         if -d_g > -cond * d_norm * g_norm:
             module.reset()
-            var.update = g.clone()
-            return var
+            objective.updates = g.clone()
+            return objective
-        return var
+        return objective

torchzero/modules/second_order/__init__.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from .newton import Newton, InverseFreeNewton
+from .ifn import InverseFreeNewton
+from .inm import ImprovedNewton
+from .multipoint import SixthOrder3P, SixthOrder3PM2, SixthOrder5P, TwoPointNewton
+from .newton import Newton
 from .newton_cg import NewtonCG, NewtonCGSteihaug
-from .nystrom import NystromSketchAndSolve, NystromPCG
-from .multipoint import SixthOrder3P, SixthOrder5P, TwoPointNewton, SixthOrder3PM2
+from .nystrom import NystromPCG, NystromSketchAndSolve
+from .rsn import SubspaceNewton

torchzero/modules/second_order/ifn.py ADDED Viewed

@@ -0,0 +1,58 @@
+import torch
+from ...core import Chainable, Transform, HessianMethod
+from ...utils import TensorList, vec_to_tensors
+from ...linalg.linear_operator import DenseWithInverse
+class InverseFreeNewton(Transform):
+    """Inverse-free newton's method
+    Reference
+        [Massalski, Marcin, and Magdalena Nockowska-Rosiak. "INVERSE-FREE NEWTON'S METHOD." Journal of Applied Analysis & Computation 15.4 (2025): 2238-2257.](https://www.jaac-online.com/article/doi/10.11948/20240428)
+    """
+    def __init__(
+        self,
+        update_freq: int = 1,
+        hessian_method: HessianMethod = "batched_autograd",
+        h: float = 1e-3,
+        inner: Chainable | None = None,
+    ):
+        defaults = dict(hessian_method=hessian_method, h=h)
+        super().__init__(defaults, update_freq=update_freq, inner=inner)
+    @torch.no_grad
+    def update_states(self, objective, states, settings):
+        fs = settings[0]
+        _, _, H = objective.hessian(
+            hessian_method=fs['hessian_method'],
+            h=fs['h'],
+            at_x0=True
+        )
+        self.global_state["H"] = H
+        # inverse free part
+        if 'Y' not in self.global_state:
+            num = H.T
+            denom = (torch.linalg.norm(H, 1) * torch.linalg.norm(H, float('inf'))) # pylint:disable=not-callable
+            finfo = torch.finfo(H.dtype)
+            self.global_state['Y'] = num.div_(denom.clip(min=finfo.tiny * 2, max=finfo.max / 2))
+        else:
+            Y = self.global_state['Y']
+            I2 = torch.eye(Y.size(0), device=Y.device, dtype=Y.dtype).mul_(2)
+            I2 -= H @ Y
+            self.global_state['Y'] = Y @ I2
+    def apply_states(self, objective, states, settings):
+        Y = self.global_state["Y"]
+        g = torch.cat([t.ravel() for t in objective.get_updates()])
+        objective.updates = vec_to_tensors(Y@g, objective.params)
+        return objective
+    def get_H(self,objective=...):
+        return DenseWithInverse(A = self.global_state["H"], A_inv=self.global_state["Y"])

torchzero/modules/second_order/inm.py ADDED Viewed

@@ -0,0 +1,101 @@
+from collections.abc import Callable
+import torch
+from ...core import Chainable, Transform, HessianMethod
+from ...utils import TensorList, vec_to_tensors, unpack_states
+from ..functional import safe_clip
+from .newton import _get_H, _newton_step
+@torch.no_grad
+def inm(f:torch.Tensor, J:torch.Tensor, s:torch.Tensor, y:torch.Tensor):
+    yy = safe_clip(y.dot(y))
+    ss = safe_clip(s.dot(s))
+    term1 = y.dot(y - J@s) / yy
+    FbT = f.outer(s).mul_(term1 / ss)
+    P = FbT.add_(J)
+    return P
+def _eigval_fn(J: torch.Tensor, fn) -> torch.Tensor:
+    if fn is None: return J
+    L, Q = torch.linalg.eigh(J) # pylint:disable=not-callable
+    return (Q * L.unsqueeze(-2)) @ Q.mH
+class ImprovedNewton(Transform):
+    """Improved Newton's Method (INM).
+    Reference:
+        [Saheya, B., et al. "A new Newton-like method for solving nonlinear equations." SpringerPlus 5.1 (2016): 1269.](https://d-nb.info/1112813721/34)
+    """
+    def __init__(
+        self,
+        damping: float = 0,
+        use_lstsq: bool = False,
+        update_freq: int = 1,
+        H_tfm: Callable[[torch.Tensor, torch.Tensor], tuple[torch.Tensor, bool]] | Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
+        eigval_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
+        hessian_method: HessianMethod = "batched_autograd",
+        h: float = 1e-3,
+        inner: Chainable | None = None,
+    ):
+        defaults = locals().copy()
+        del defaults['self'], defaults['inner'], defaults["update_freq"]
+        super().__init__(defaults, update_freq=update_freq, inner=inner, )
+    @torch.no_grad
+    def update_states(self, objective, states, settings):
+        fs = settings[0]
+        _, f_list, J = objective.hessian(
+            hessian_method=fs['hessian_method'],
+            h=fs['h'],
+            at_x0=True
+        )
+        if f_list is None: f_list = objective.get_grads()
+        f = torch.cat([t.ravel() for t in f_list])
+        J = _eigval_fn(J, fs["eigval_fn"])
+        x_list = TensorList(objective.params)
+        f_list = TensorList(objective.get_grads())
+        x_prev, f_prev = unpack_states(states, objective.params, "x_prev", "f_prev", cls=TensorList)
+        # initialize on 1st step, do Newton step
+        if "P" not in self.global_state:
+            x_prev.copy_(x_list)
+            f_prev.copy_(f_list)
+            self.global_state["P"] = J
+            return
+        # INM update
+        s_list = x_list - x_prev
+        y_list = f_list - f_prev
+        x_prev.copy_(x_list)
+        f_prev.copy_(f_list)
+        self.global_state["P"] = inm(f, J, s=s_list.to_vec(), y=y_list.to_vec())
+    @torch.no_grad
+    def apply_states(self, objective, states, settings):
+        fs = settings[0]
+        update = _newton_step(
+            objective = objective,
+            H = self.global_state["P"],
+            damping = fs["damping"],
+            H_tfm = fs["H_tfm"],
+            eigval_fn = None, # it is applied in `update`
+            use_lstsq = fs["use_lstsq"],
+        )
+        objective.updates = vec_to_tensors(update, objective.params)
+        return objective
+    def get_H(self,objective=...):
+        return _get_H(self.global_state["P"], eigval_fn=None)

torchzero 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl