PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

tests/test_identical.py +22 -22
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +225 -214
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +2 -2
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +53 -57
torchzero/core/module.py +132 -52
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +11 -0
torchzero/linalg/eigh.py +253 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +93 -0
torchzero/{utils/linalg → linalg}/qr.py +16 -2
torchzero/{utils/linalg → linalg}/solve.py +74 -88
torchzero/linalg/svd.py +47 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +167 -217
torchzero/modules/adaptive/adahessian.py +76 -105
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +50 -31
torchzero/modules/adaptive/adaptive_heavyball.py +12 -7
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +7 -11
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +7 -7
torchzero/modules/adaptive/matrix_momentum.py +48 -52
torchzero/modules/adaptive/msam.py +71 -53
torchzero/modules/adaptive/muon.py +67 -129
torchzero/modules/adaptive/natural_gradient.py +63 -41
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +149 -130
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +22 -25
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +27 -38
torchzero/modules/experimental/__init__.py +7 -6
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +23 -54
torchzero/modules/experimental/newtonnewton.py +45 -48
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +24 -21
torchzero/modules/least_squares/gn.py +121 -50
torchzero/modules/line_search/backtracking.py +4 -4
torchzero/modules/line_search/line_search.py +33 -33
torchzero/modules/line_search/strong_wolfe.py +4 -4
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +11 -79
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +121 -123
torchzero/modules/misc/multistep.py +52 -53
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +31 -29
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +37 -31
torchzero/modules/momentum/momentum.py +12 -12
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +20 -20
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/{functional.py → opt_utils.py} +1 -1
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +46 -43
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +2 -2
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +10 -10
torchzero/modules/quasi_newton/lsr1.py +10 -10
torchzero/modules/quasi_newton/quasi_newton.py +54 -39
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +39 -37
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +57 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +165 -196
torchzero/modules/second_order/newton_cg.py +105 -157
torchzero/modules/second_order/nystrom.py +216 -185
torchzero/modules/second_order/rsn.py +132 -125
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +10 -10
torchzero/modules/step_size/adaptive.py +24 -24
torchzero/modules/step_size/lr.py +17 -17
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +3 -3
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +2 -2
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +23 -21
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +17 -18
torchzero/modules/wrappers/optim_wrapper.py +14 -14
torchzero/modules/zeroth_order/cd.py +10 -7
torchzero/optim/mbs.py +291 -0
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -13
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +8 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +97 -73
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/adaptive/lmadagrad.py +0 -186
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

torchzero/modules/second_order/newton_cg.py CHANGED Viewed

@@ -1,16 +1,16 @@
-import warnings
-import math
-from typing import Literal, cast
 from operator import itemgetter
+from typing import Literal, cast
 import torch
-from ...core import Chainable, Module, apply_transform
-from ...utils import TensorList, as_tensorlist, tofloat
-from ...utils.derivatives import hvp, hvp_fd_central, hvp_fd_forward
-from ...utils.linalg.solve import cg, minres, find_within_trust_radius
+from ...core import Chainable, Transform, HVPMethod
+from ...utils import TensorList, tofloat, unpack_dicts, unpack_states
+from ...linalg.solve import cg, find_within_trust_radius, minres
 from ..trust_region.trust_region import default_radius
-class NewtonCG(Module):
+class NewtonCG(Transform):
     """Newton's method with a matrix-free conjugate gradient or minimial-residual solver.
     Notes:
@@ -37,17 +37,14 @@ class NewtonCG(Module):
         hvp_method (str, optional):
             Determines how Hessian-vector products are evaluated.
-            - ``"autograd"``: Use PyTorch's autograd to calculate exact HVPs.
-              This requires creating a graph for the gradient.
-            - ``"forward"``: Use a forward finite difference formula to
-              approximate the HVP. This requires one extra gradient evaluation.
-            - ``"central"``: Use a central finite difference formula for a
-              more accurate HVP approximation. This requires two extra
-              gradient evaluations.
-            Defaults to "autograd".
+            - ``"autograd"`` - uses autograd hessian-vector products. If multiple hessian-vector products are evaluated, uses a for-loop.
+            - ``"fd_forward"`` - uses gradient finite difference approximation with a less accurate forward formula which requires one extra gradient evaluation per hessian-vector product.
+            - ``"fd_central"`` - uses gradient finite difference approximation with a more accurate central formula which requires two gradient evaluations per hessian-vector product.
+            For NewtonCG ``"batched_autograd"`` is equivalent to ``"autograd"``. Defaults to ``"autograd"``.
         h (float, optional):
-            The step size for finite differences if :code:`hvp_method` is
-            ``"forward"`` or ``"central"``. Defaults to 1e-3.
+            The step size for finite difference if ``hvp_method`` is
+            ``"fd_forward"`` or ``"fd_central"``. Defaults to 1e-3.
         warm_start (bool, optional):
             If ``True``, the conjugate gradient solver is initialized with the
             solution from the previous optimization step. This can accelerate
@@ -60,7 +57,7 @@ class NewtonCG(Module):
     Newton-CG with a backtracking line search:
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.NewtonCG(),
         tz.m.Backtracking()
@@ -69,7 +66,7 @@ class NewtonCG(Module):
     Truncated Newton method (useful for large-scale problems):
     ```
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.NewtonCG(maxiter=10),
         tz.m.Backtracking()
@@ -82,100 +79,72 @@ class NewtonCG(Module):
         maxiter: int | None = None,
         tol: float = 1e-8,
         reg: float = 1e-8,
-        hvp_method: Literal["forward", "central", "autograd"] = "autograd",
-        solver: Literal['cg', 'minres', 'minres_npc'] = 'cg',
+        hvp_method: HVPMethod = "autograd",
+        solver: Literal['cg', 'minres'] = 'cg',
+        npc_terminate: bool = False,
         h: float = 1e-3, # tuned 1e-4 or 1e-3
         miniter:int = 1,
         warm_start=False,
+        warm_beta:float=0,
         inner: Chainable | None = None,
     ):
         defaults = locals().copy()
         del defaults['self'], defaults['inner']
-        super().__init__(defaults,)
-        if inner is not None:
-            self.set_child('inner', inner)
+        super().__init__(defaults, inner=inner)
         self._num_hvps = 0
         self._num_hvps_last_step = 0
     @torch.no_grad
-    def step(self, var):
-        params = TensorList(var.params)
-        closure = var.closure
-        if closure is None: raise RuntimeError('NewtonCG requires closure')
-        settings = self.settings[params[0]]
-        tol = settings['tol']
-        reg = settings['reg']
-        maxiter = settings['maxiter']
-        hvp_method = settings['hvp_method']
-        solver = settings['solver'].lower().strip()
-        h = settings['h']
-        warm_start = settings['warm_start']
+    def update_states(self, objective, states, settings):
+        fs = settings[0]
+        hvp_method = fs['hvp_method']
+        h = fs['h']
-        self._num_hvps_last_step = 0
         # ---------------------- Hessian vector product function --------------------- #
-        if hvp_method == 'autograd':
-            grad = var.get_grad(create_graph=True)
-            def H_mm(x):
-                self._num_hvps_last_step += 1
-                with torch.enable_grad():
-                    return TensorList(hvp(params, grad, x, retain_graph=True))
-        else:
-            with torch.enable_grad():
-                grad = var.get_grad()
-            if hvp_method == 'forward':
-                def H_mm(x):
-                    self._num_hvps_last_step += 1
-                    return TensorList(hvp_fd_forward(closure, params, x, h=h, g_0=grad, normalize=True)[1])
-            elif hvp_method == 'central':
-                def H_mm(x):
-                    self._num_hvps_last_step += 1
-                    return TensorList(hvp_fd_central(closure, params, x, h=h, normalize=True)[1])
-            else:
-                raise ValueError(hvp_method)
+        _, H_mv = objective.list_Hvp_function(hvp_method=hvp_method, h=h, at_x0=True)
+        objective.temp = H_mv
+    @torch.no_grad
+    def apply_states(self, objective, states, settings):
+        self._num_hvps_last_step = 0
+        H_mv = objective.poptemp()
-        # -------------------------------- inner step -------------------------------- #
-        b = var.get_update()
-        if 'inner' in self.children:
-            b = apply_transform(self.children['inner'], b, params=params, grads=grad, var=var)
-        b = as_tensorlist(b)
+        fs = settings[0]
+        tol = fs['tol']
+        reg = fs['reg']
+        maxiter = fs['maxiter']
+        solver = fs['solver'].lower().strip()
+        warm_start = fs['warm_start']
+        npc_terminate = fs["npc_terminate"]
         # ---------------------------------- run cg ---------------------------------- #
         x0 = None
-        if warm_start: x0 = self.get_state(params, 'prev_x', cls=TensorList) # initialized to 0 which is default anyway
+        if warm_start:
+            x0 = unpack_states(states, objective.params, 'prev_x', cls=TensorList)
+        b = TensorList(objective.get_updates())
         if solver == 'cg':
-            d, _ = cg(A_mm=H_mm, b=b, x0=x0, tol=tol, maxiter=maxiter, miniter=self.defaults["miniter"],reg=reg)
+            d, _ = cg(A_mv=H_mv, b=b, x0=x0, tol=tol, maxiter=maxiter,
+                      miniter=fs["miniter"], reg=reg, npc_terminate=npc_terminate)
         elif solver == 'minres':
-            d = minres(A_mm=H_mm, b=b, x0=x0, tol=tol, maxiter=maxiter, reg=reg, npc_terminate=False)
-        elif solver == 'minres_npc':
-            d = minres(A_mm=H_mm, b=b, x0=x0, tol=tol, maxiter=maxiter, reg=reg, npc_terminate=True)
+            d = minres(A_mv=H_mv, b=b, x0=x0, tol=tol, maxiter=maxiter, reg=reg, npc_terminate=npc_terminate)
         else:
             raise ValueError(f"Unknown solver {solver}")
         if warm_start:
             assert x0 is not None
-            x0.copy_(d)
-        var.update = d
+            x0.lerp_(d, weight = 1-fs["warm_beta"])
+        objective.updates = d
         self._num_hvps += self._num_hvps_last_step
-        return var
+        return objective
-class NewtonCGSteihaug(Module):
+class NewtonCGSteihaug(Transform):
     """Newton's method with trust region and a matrix-free Steihaug-Toint conjugate gradient solver.
     Notes:
@@ -219,7 +188,7 @@ class NewtonCGSteihaug(Module):
             whether to terminate CG/MINRES whenever negative curvature is detected. Defaults to False.
         hvp_method (str, optional):
-            either "forward" to use forward formula which requires one backward pass per Hvp, or "central" to use a more accurate central formula which requires two backward passes. "forward" is usually accurate enough. Defaults to "forward".
+            either ``"fd_forward"`` to use forward formula which requires one backward pass per hessian-vector product, or ``"fd_central"`` to use a more accurate central formula which requires two backward passes. ``"fd_forward"`` is usually accurate enough. Defaults to ``"fd_forward"``.
         h (float, optional): finite difference step size. Defaults to 1e-3.
         inner (Chainable | None, optional):
@@ -229,7 +198,7 @@ class NewtonCGSteihaug(Module):
     Trust-region Newton-CG:
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.NewtonCGSteihaug(),
     )
@@ -261,7 +230,7 @@ class NewtonCGSteihaug(Module):
         npc_terminate: bool = False,
         # hvp settings
-        hvp_method: Literal["forward", "central"] = "central",
+        hvp_method: Literal["fd_forward", "fd_central"] = "fd_central",
         h: float = 1e-3, # tuned 1e-4 or 1e-3
         # inner
@@ -269,72 +238,51 @@ class NewtonCGSteihaug(Module):
     ):
         defaults = locals().copy()
         del defaults['self'], defaults['inner']
-        super().__init__(defaults,)
-        if inner is not None:
-            self.set_child('inner', inner)
+        super().__init__(defaults, inner=inner)
         self._num_hvps = 0
         self._num_hvps_last_step = 0
-    @torch.no_grad
-    def step(self, var):
-        params = TensorList(var.params)
-        closure = var.closure
-        if closure is None: raise RuntimeError('NewtonCG requires closure')
-        tol = self.defaults['tol'] * self.global_state.get('tol_mul', 1)
-        solver = self.defaults['solver'].lower().strip()
-        (reg, maxiter, hvp_method, h, max_attempts, boundary_tol,
-         eta, nplus, nminus, rho_good, rho_bad, init, npc_terminate,
-         miniter, max_history, adapt_tol) = itemgetter(
-             "reg", "maxiter", "hvp_method", "h", "max_attempts", "boundary_tol",
-             "eta", "nplus", "nminus", "rho_good", "rho_bad", "init", "npc_terminate",
-             "miniter", "max_history", "adapt_tol",
-        )(self.defaults)
-        self._num_hvps_last_step = 0
+    @torch.no_grad
+    def update_states(self, objective, states, settings):
+        fs = settings[0]
+        hvp_method = fs['hvp_method']
+        h = fs['h']
         # ---------------------- Hessian vector product function --------------------- #
-        if hvp_method == 'autograd':
-            grad = var.get_grad(create_graph=True)
-            def H_mm(x):
-                self._num_hvps_last_step += 1
-                with torch.enable_grad():
-                    return TensorList(hvp(params, grad, x, retain_graph=True))
-        else:
-            with torch.enable_grad():
-                grad = var.get_grad()
+        _, H_mv = objective.list_Hvp_function(hvp_method=hvp_method, h=h, at_x0=True)
+        objective.temp = H_mv
-            if hvp_method == 'forward':
-                def H_mm(x):
-                    self._num_hvps_last_step += 1
-                    return TensorList(hvp_fd_forward(closure, params, x, h=h, g_0=grad, normalize=True)[1])
+    @torch.no_grad
+    def apply_states(self, objective, states, settings):
+        self._num_hvps_last_step = 0
-            elif hvp_method == 'central':
-                def H_mm(x):
-                    self._num_hvps_last_step += 1
-                    return TensorList(hvp_fd_central(closure, params, x, h=h, normalize=True)[1])
+        H_mv = objective.poptemp()
+        params = TensorList(objective.params)
+        fs = settings[0]
-            else:
-                raise ValueError(hvp_method)
+        tol = fs['tol'] * self.global_state.get('tol_mul', 1)
+        solver = fs['solver'].lower().strip()
+        reg=fs["reg"]
+        maxiter=fs["maxiter"]
+        max_attempts=fs["max_attempts"]
+        init=fs["init"]
+        npc_terminate=fs["npc_terminate"]
+        miniter=fs["miniter"]
+        max_history=fs["max_history"]
+        adapt_tol=fs["adapt_tol"]
-        # -------------------------------- inner step -------------------------------- #
-        b = var.get_update()
-        if 'inner' in self.children:
-            b = apply_transform(self.children['inner'], b, params=params, grads=grad, var=var)
-        b = as_tensorlist(b)
         # ------------------------------- trust region ------------------------------- #
         success = False
         d = None
-        x0 = [p.clone() for p in params]
+        orig_params = [p.clone() for p in params]
+        b = TensorList(objective.get_updates())
         solution = None
+        closure = objective.closure
+        assert closure is not None
         while not success:
             max_attempts -= 1
@@ -343,7 +291,7 @@ class NewtonCGSteihaug(Module):
             trust_radius = self.global_state.get('trust_radius', init)
             # -------------- make sure trust radius isn't too small or large ------------- #
-            finfo = torch.finfo(x0[0].dtype)
+            finfo = torch.finfo(orig_params[0].dtype)
             if trust_radius < finfo.tiny * 2:
                 trust_radius = self.global_state['trust_radius'] = init
                 if adapt_tol:
@@ -360,7 +308,7 @@ class NewtonCGSteihaug(Module):
             if d is None:
                 if solver == 'cg':
                     d, solution = cg(
-                        A_mm=H_mm,
+                        A_mv=H_mv,
                         b=b,
                         tol=tol,
                         maxiter=maxiter,
@@ -372,40 +320,40 @@ class NewtonCGSteihaug(Module):
                     )
                 elif solver == 'minres':
-                    d = minres(A_mm=H_mm, b=b, trust_radius=trust_radius, tol=tol, maxiter=maxiter, reg=reg, npc_terminate=npc_terminate)
+                    d = minres(A_mv=H_mv, b=b, trust_radius=trust_radius, tol=tol, maxiter=maxiter, reg=reg, npc_terminate=npc_terminate)
                 else:
                     raise ValueError(f"unknown solver {solver}")
             # ---------------------------- update trust radius --------------------------- #
             self.global_state["trust_radius"], success = default_radius(
-                params=params,
-                closure=closure,
-                f=tofloat(var.get_loss(False)),
-                g=b,
-                H=H_mm,
-                d=d,
-                trust_radius=trust_radius,
-                eta=eta,
-                nplus=nplus,
-                nminus=nminus,
-                rho_good=rho_good,
-                rho_bad=rho_bad,
-                boundary_tol=boundary_tol,
-                init=init, # init isn't used because check_overflow=False
-                state=self.global_state, # not used
-                settings=self.defaults, # not used
-                check_overflow=False, # this is checked manually to adapt tolerance
+                params = params,
+                closure = closure,
+                f = tofloat(objective.get_loss(False)),
+                g = b,
+                H = H_mv,
+                d = d,
+                trust_radius = trust_radius,
+                eta = fs["eta"],
+                nplus = fs["nplus"],
+                nminus = fs["nminus"],
+                rho_good = fs["rho_good"],
+                rho_bad = fs["rho_bad"],
+                boundary_tol = fs["boundary_tol"],
+                init = cast(int, None), # init isn't used because check_overflow=False
+                state = cast(dict, None), # not used
+                settings = cast(dict, None), # not used
+                check_overflow = False, # this is checked manually to adapt tolerance
             )
         # --------------------------- assign new direction --------------------------- #
         assert d is not None
         if success:
-            var.update = d
+            objective.updates = d
         else:
-            var.update = params.zeros_like()
+            objective.updates = params.zeros_like()
         self._num_hvps += self._num_hvps_last_step
-        return var
+        return objective

torchzero 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl