PyPI - torchzero - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

tests/test_opts.py +95 -69
tests/test_tensorlist.py +8 -7
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +2 -2
torchzero/core/module.py +225 -72
torchzero/core/reformulation.py +65 -0
torchzero/core/transform.py +44 -24
torchzero/modules/__init__.py +13 -5
torchzero/modules/{optimizers → adaptive}/__init__.py +5 -2
torchzero/modules/adaptive/adagrad.py +356 -0
torchzero/modules/{optimizers → adaptive}/adahessian.py +53 -52
torchzero/modules/{optimizers → adaptive}/adam.py +0 -3
torchzero/modules/{optimizers → adaptive}/adan.py +26 -40
torchzero/modules/{optimizers → adaptive}/adaptive_heavyball.py +3 -6
torchzero/modules/adaptive/aegd.py +54 -0
torchzero/modules/{optimizers → adaptive}/esgd.py +1 -1
torchzero/modules/{optimizers/ladagrad.py → adaptive/lmadagrad.py} +42 -39
torchzero/modules/{optimizers → adaptive}/mars.py +24 -36
torchzero/modules/adaptive/matrix_momentum.py +146 -0
torchzero/modules/{optimizers → adaptive}/msam.py +14 -12
torchzero/modules/{optimizers → adaptive}/muon.py +19 -20
torchzero/modules/adaptive/natural_gradient.py +175 -0
torchzero/modules/{optimizers → adaptive}/rprop.py +0 -2
torchzero/modules/{optimizers → adaptive}/sam.py +1 -1
torchzero/modules/{optimizers → adaptive}/shampoo.py +8 -4
torchzero/modules/{optimizers → adaptive}/soap.py +27 -50
torchzero/modules/{optimizers → adaptive}/sophia_h.py +2 -3
torchzero/modules/clipping/clipping.py +85 -92
torchzero/modules/clipping/ema_clipping.py +5 -5
torchzero/modules/conjugate_gradient/__init__.py +11 -0
torchzero/modules/{quasi_newton → conjugate_gradient}/cg.py +355 -369
torchzero/modules/experimental/__init__.py +9 -32
torchzero/modules/experimental/dct.py +2 -2
torchzero/modules/experimental/fft.py +2 -2
torchzero/modules/experimental/gradmin.py +4 -3
torchzero/modules/experimental/l_infinity.py +111 -0
torchzero/modules/{momentum/experimental.py → experimental/momentum.py} +3 -40
torchzero/modules/experimental/newton_solver.py +79 -17
torchzero/modules/experimental/newtonnewton.py +27 -14
torchzero/modules/experimental/scipy_newton_cg.py +105 -0
torchzero/modules/experimental/structural_projections.py +1 -1
torchzero/modules/functional.py +50 -14
torchzero/modules/grad_approximation/fdm.py +19 -20
torchzero/modules/grad_approximation/forward_gradient.py +4 -2
torchzero/modules/grad_approximation/grad_approximator.py +43 -47
torchzero/modules/grad_approximation/rfdm.py +144 -122
torchzero/modules/higher_order/__init__.py +1 -1
torchzero/modules/higher_order/higher_order_newton.py +31 -23
torchzero/modules/least_squares/__init__.py +1 -0
torchzero/modules/least_squares/gn.py +161 -0
torchzero/modules/line_search/__init__.py +2 -2
torchzero/modules/line_search/_polyinterp.py +289 -0
torchzero/modules/line_search/adaptive.py +69 -44
torchzero/modules/line_search/backtracking.py +83 -70
torchzero/modules/line_search/line_search.py +159 -68
torchzero/modules/line_search/scipy.py +1 -1
torchzero/modules/line_search/strong_wolfe.py +319 -218
torchzero/modules/misc/__init__.py +8 -0
torchzero/modules/misc/debug.py +4 -4
torchzero/modules/misc/escape.py +9 -7
torchzero/modules/misc/gradient_accumulation.py +88 -22
torchzero/modules/misc/homotopy.py +59 -0
torchzero/modules/misc/misc.py +82 -15
torchzero/modules/misc/multistep.py +47 -11
torchzero/modules/misc/regularization.py +5 -9
torchzero/modules/misc/split.py +55 -35
torchzero/modules/misc/switch.py +1 -1
torchzero/modules/momentum/__init__.py +1 -5
torchzero/modules/momentum/averaging.py +3 -3
torchzero/modules/momentum/cautious.py +42 -47
torchzero/modules/momentum/momentum.py +35 -1
torchzero/modules/ops/__init__.py +9 -1
torchzero/modules/ops/binary.py +9 -8
torchzero/modules/{momentum/ema.py → ops/higher_level.py} +10 -33
torchzero/modules/ops/multi.py +15 -15
torchzero/modules/ops/reduce.py +1 -1
torchzero/modules/ops/utility.py +12 -8
torchzero/modules/projections/projection.py +4 -4
torchzero/modules/quasi_newton/__init__.py +1 -16
torchzero/modules/quasi_newton/damping.py +105 -0
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +167 -163
torchzero/modules/quasi_newton/lbfgs.py +256 -200
torchzero/modules/quasi_newton/lsr1.py +167 -132
torchzero/modules/quasi_newton/quasi_newton.py +346 -446
torchzero/modules/restarts/__init__.py +7 -0
torchzero/modules/restarts/restars.py +252 -0
torchzero/modules/second_order/__init__.py +2 -1
torchzero/modules/second_order/multipoint.py +238 -0
torchzero/modules/second_order/newton.py +133 -88
torchzero/modules/second_order/newton_cg.py +141 -80
torchzero/modules/smoothing/__init__.py +1 -1
torchzero/modules/smoothing/sampling.py +300 -0
torchzero/modules/step_size/__init__.py +1 -1
torchzero/modules/step_size/adaptive.py +312 -47
torchzero/modules/termination/__init__.py +14 -0
torchzero/modules/termination/termination.py +207 -0
torchzero/modules/trust_region/__init__.py +5 -0
torchzero/modules/trust_region/cubic_regularization.py +170 -0
torchzero/modules/trust_region/dogleg.py +92 -0
torchzero/modules/trust_region/levenberg_marquardt.py +128 -0
torchzero/modules/trust_region/trust_cg.py +97 -0
torchzero/modules/trust_region/trust_region.py +350 -0
torchzero/modules/variance_reduction/__init__.py +1 -0
torchzero/modules/variance_reduction/svrg.py +208 -0
torchzero/modules/weight_decay/weight_decay.py +65 -64
torchzero/modules/zeroth_order/__init__.py +1 -0
torchzero/modules/zeroth_order/cd.py +359 -0
torchzero/optim/root.py +65 -0
torchzero/optim/utility/split.py +8 -8
torchzero/optim/wrappers/directsearch.py +0 -1
torchzero/optim/wrappers/fcmaes.py +3 -2
torchzero/optim/wrappers/nlopt.py +0 -2
torchzero/optim/wrappers/optuna.py +2 -2
torchzero/optim/wrappers/scipy.py +81 -22
torchzero/utils/__init__.py +40 -4
torchzero/utils/compile.py +1 -1
torchzero/utils/derivatives.py +123 -111
torchzero/utils/linalg/__init__.py +9 -2
torchzero/utils/linalg/linear_operator.py +329 -0
torchzero/utils/linalg/matrix_funcs.py +2 -2
torchzero/utils/linalg/orthogonalize.py +2 -1
torchzero/utils/linalg/qr.py +2 -2
torchzero/utils/linalg/solve.py +226 -154
torchzero/utils/metrics.py +83 -0
torchzero/utils/python_tools.py +6 -0
torchzero/utils/tensorlist.py +105 -34
torchzero/utils/torch_tools.py +9 -4
torchzero-0.3.13.dist-info/METADATA +14 -0
torchzero-0.3.13.dist-info/RECORD +166 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/top_level.txt +0 -1
docs/source/conf.py +0 -59
docs/source/docstring template.py +0 -46
torchzero/modules/experimental/absoap.py +0 -253
torchzero/modules/experimental/adadam.py +0 -118
torchzero/modules/experimental/adamY.py +0 -131
torchzero/modules/experimental/adam_lambertw.py +0 -149
torchzero/modules/experimental/adaptive_step_size.py +0 -90
torchzero/modules/experimental/adasoap.py +0 -177
torchzero/modules/experimental/cosine.py +0 -214
torchzero/modules/experimental/cubic_adam.py +0 -97
torchzero/modules/experimental/eigendescent.py +0 -120
torchzero/modules/experimental/etf.py +0 -195
torchzero/modules/experimental/exp_adam.py +0 -113
torchzero/modules/experimental/expanded_lbfgs.py +0 -141
torchzero/modules/experimental/hnewton.py +0 -85
torchzero/modules/experimental/modular_lbfgs.py +0 -265
torchzero/modules/experimental/parabolic_search.py +0 -220
torchzero/modules/experimental/subspace_preconditioners.py +0 -145
torchzero/modules/experimental/tensor_adagrad.py +0 -42
torchzero/modules/line_search/polynomial.py +0 -233
torchzero/modules/momentum/matrix_momentum.py +0 -193
torchzero/modules/optimizers/adagrad.py +0 -165
torchzero/modules/quasi_newton/trust_region.py +0 -397
torchzero/modules/smoothing/gaussian.py +0 -198
torchzero-0.3.11.dist-info/METADATA +0 -404
torchzero-0.3.11.dist-info/RECORD +0 -159
torchzero-0.3.11.dist-info/licenses/LICENSE +0 -21
/torchzero/modules/{optimizers → adaptive}/lion.py +0 -0
/torchzero/modules/{optimizers → adaptive}/orthograd.py +0 -0
/torchzero/modules/{optimizers → adaptive}/rmsprop.py +0 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/WHEEL +0 -0

torchzero/utils/linalg/solve.py CHANGED Viewed

@@ -1,99 +1,32 @@
 # pyright: reportArgumentType=false
+import math
+from collections import deque
 from collections.abc import Callable
-from typing import Any, overload
+from typing import Any, NamedTuple, overload
 import torch
 from .. import (
     TensorList,
     generic_eq,
-    generic_finfo_eps,
+    generic_finfo_tiny,
     generic_numel,
-    generic_randn_like,
     generic_vector_norm,
     generic_zeros_like,
 )
-def _make_A_mm_reg(A_mm: Callable | torch.Tensor, reg):
-    if callable(A_mm):
-        def A_mm_reg(x): # A_mm with regularization
-            Ax = A_mm(x)
-            if not generic_eq(reg, 0): Ax += x*reg
-            return Ax
-        return A_mm_reg
-    if not isinstance(A_mm, torch.Tensor): raise TypeError(type(A_mm))
-    def Ax_reg(x): # A_mm with regularization
-        if A_mm.ndim == 1: Ax = A_mm * x
-        else: Ax = A_mm @ x
-        if reg != 0: Ax += x*reg
+def _make_A_mm_reg(A_mm: Callable, reg):
+    def A_mm_reg(x): # A_mm with regularization
+        Ax = A_mm(x)
+        if not generic_eq(reg, 0): Ax += x*reg
         return Ax
-    return Ax_reg
+    return A_mm_reg
+def _identity(x): return x
-@overload
-def cg(
-    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
-    b: torch.Tensor,
-    x0_: torch.Tensor | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float = 0,
-) -> torch.Tensor: ...
-@overload
-def cg(
-    A_mm: Callable[[TensorList], TensorList],
-    b: TensorList,
-    x0_: TensorList | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float | list[float] | tuple[float] = 0,
-) -> TensorList: ...
-def cg(
-    A_mm: Callable | torch.Tensor,
-    b: torch.Tensor | TensorList,
-    x0_: torch.Tensor | TensorList | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float | list[float] | tuple[float] = 0,
-):
-    A_mm_reg = _make_A_mm_reg(A_mm, reg)
-    eps = generic_finfo_eps(b)
-    if tol is None: tol = eps
-    if maxiter is None: maxiter = generic_numel(b)
-    if x0_ is None: x0_ = generic_zeros_like(b)
-    x = x0_
-    residual = b - A_mm_reg(x)
-    p = residual.clone() # search direction
-    r_norm = generic_vector_norm(residual)
-    init_norm = r_norm
-    if r_norm < tol: return x
-    k = 0
-    while True:
-        Ap = A_mm_reg(p)
-        step_size = (r_norm**2) / p.dot(Ap)
-        x += step_size * p # Update solution
-        residual -= step_size * Ap # Update residual
-        new_r_norm = generic_vector_norm(residual)
-        k += 1
-        if new_r_norm <= tol * init_norm: return x
-        if k >= maxiter: return x
-        beta = (new_r_norm**2) / (r_norm**2)
-        p = residual + beta*p
-        r_norm = new_r_norm
-# https://arxiv.org/pdf/2110.02820 algorithm 2.1 apparently supposed to be diabolical
+# https://arxiv.org/pdf/2110.02820
 def nystrom_approximation(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     ndim: int,
@@ -115,7 +48,6 @@ def nystrom_approximation(
     lambd = (S.pow(2) - v).clip(min=0) #Remove shift, compute eigs
     return U, lambd
-# this one works worse
 def nystrom_sketch_and_solve(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     b: torch.Tensor,
@@ -141,7 +73,6 @@ def nystrom_sketch_and_solve(
     term2 = (1.0 / reg) * (b - U @ Uᵀb)
     return (term1 + term2).squeeze(-1)
-# this one is insane
 def nystrom_pcg(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     b: torch.Tensor,
@@ -161,7 +92,7 @@ def nystrom_pcg(
         generator=generator,
     )
     lambd += reg
-    eps = torch.finfo(b.dtype).eps ** 2
+    eps = torch.finfo(b.dtype).tiny * 2
     if tol is None: tol = eps
     def A_mm_reg(x): # A_mm with regularization
@@ -201,98 +132,239 @@ def nystrom_pcg(
 def _safe_clip(x: torch.Tensor):
-    """makes sure scalar tensor x is not smaller than epsilon"""
+    """makes sure scalar tensor x is not smaller than tiny"""
     assert x.numel() == 1, x.shape
-    eps = torch.finfo(x.dtype).eps
+    eps = torch.finfo(x.dtype).tiny * 2
     if x.abs() < eps: return x.new_full(x.size(), eps).copysign(x)
     return x
-def _trust_tau(x,d,trust_region):
+def _trust_tau(x,d,trust_radius):
     xx = x.dot(x)
     xd = x.dot(d)
     dd = _safe_clip(d.dot(d))
-    rad = (xd**2 - dd * (xx - trust_region**2)).clip(min=0).sqrt()
+    rad = (xd**2 - dd * (xx - trust_radius**2)).clip(min=0).sqrt()
     tau = (-xd + rad) / dd
     return x + tau * d
+class CG:
+    """Conjugate gradient method.
+    Args:
+        A_mm (Callable[[torch.Tensor], torch.Tensor] | torch.Tensor): Callable that returns matvec ``Ax``.
+        b (torch.Tensor): right hand side
+        x0 (torch.Tensor | None, optional): initial guess, defaults to zeros. Defaults to None.
+        tol (float | None, optional): tolerance for convergence. Defaults to 1e-8.
+        maxiter (int | None, optional):
+            maximum number of iterations, if None sets to number of dimensions. Defaults to None.
+        reg (float, optional): regularization. Defaults to 0.
+        trust_radius (float | None, optional):
+            CG is terminated whenever solution exceeds trust region, returning a solution modified to be within it. Defaults to None.
+        npc_terminate (bool, optional):
+            whether to terminate CG whenever negative curavture is detected. Defaults to False.
+        miniter (int, optional):
+            minimal number of iterations even if tolerance is satisfied, this ensures some progress
+            is always made.
+        history_size (int, optional):
+            number of past iterations to store, to re-use them when trust radius is decreased.
+        P_mm (Callable | torch.Tensor | None, optional):
+            Callable that returns inverse preconditioner times vector. Defaults to None.
+    """
+    def __init__(
+        self,
+        A_mm: Callable,
+        b: torch.Tensor | TensorList,
+        x0: torch.Tensor | TensorList | None = None,
+        tol: float | None = 1e-4,
+        maxiter: int | None = None,
+        reg: float = 0,
+        trust_radius: float | None = None,
+        npc_terminate: bool=False,
+        miniter: int = 0,
+        history_size: int = 0,
+        P_mm: Callable | None = None,
+):
+        # --------------------------------- set attrs -------------------------------- #
+        self.A_mm = _make_A_mm_reg(A_mm, reg)
+        self.b = b
+        if tol is None: tol = generic_finfo_tiny(b) * 2
+        self.tol = tol
+        self.eps = generic_finfo_tiny(b) * 2
+        if maxiter is None: maxiter = generic_numel(b)
+        self.maxiter = maxiter
+        self.miniter = miniter
+        self.trust_radius = trust_radius
+        self.npc_terminate = npc_terminate
+        self.P_mm = P_mm if P_mm is not None else _identity
+        if history_size > 0:
+            self.history = deque(maxlen = history_size)
+            """history of (x, x_norm, d)"""
+        else:
+            self.history = None
+        # -------------------------------- initialize -------------------------------- #
+        self.iter = 0
+        if x0 is None:
+            self.x = generic_zeros_like(b)
+            self.r = b
+        else:
+            self.x = x0
+            self.r = b - A_mm(self.x)
+        self.z = self.P_mm(self.r)
+        self.d = self.z
+        if self.history is not None:
+            self.history.append((self.x, generic_vector_norm(self.x), self.d))
+    def step(self) -> tuple[Any, bool]:
+        """returns ``(solution, should_terminate)``"""
+        x, b, d, r, z = self.x, self.b, self.d, self.r, self.z
+        if self.iter >= self.maxiter:
+            return x, True
+        Ad = self.A_mm(d)
+        dAd = d.dot(Ad)
+        # check negative curvature
+        if dAd <= self.eps:
+            if self.trust_radius is not None: return _trust_tau(x, d, self.trust_radius), True
+            if self.iter == 0: return b * (b.dot(b) / dAd).abs(), True
+            if self.npc_terminate: return x, True
+        rz = r.dot(z)
+        alpha = rz / dAd
+        x_next = x + alpha * d
+        # check if the step exceeds the trust-region boundary
+        x_next_norm = None
+        if self.trust_radius is not None:
+            x_next_norm = generic_vector_norm(x_next)
+            if x_next_norm >= self.trust_radius:
+                return _trust_tau(x, d, self.trust_radius), True
+        # update step, residual and direction
+        r_next = r - alpha * Ad
+        # check if r is sufficiently small
+        if self.iter >= self.miniter and generic_vector_norm(r_next) < self.tol:
+            return x_next, True
+        # update d, r, z
+        z_next = self.P_mm(r_next)
+        beta = r_next.dot(z_next) / rz
+        self.d = z_next + beta * d
+        self.x = x_next
+        self.r = r_next
+        self.z = z_next
+        # update history
+        if self.history is not None:
+            if x_next_norm is None: x_next_norm = generic_vector_norm(x_next)
+            self.history.append((self.x, x_next_norm, self.d))
+        self.iter += 1
+        return x, False
+    def solve(self):
+        # return initial guess if it is good enough
+        if self.miniter < 1 and generic_vector_norm(self.r) < self.tol:
+            return self.x
+        should_terminate = False
+        sol = None
+        while not should_terminate:
+            sol, should_terminate = self.step()
+        assert sol is not None
+        return sol
+def find_within_trust_radius(history, trust_radius: float):
+    """find first ``x`` in history that exceeds trust radius, if no such ``x`` exists, returns ``None``"""
+    for x, x_norm, d in reversed(tuple(history)):
+        if x_norm <= trust_radius:
+            return _trust_tau(x, d, trust_radius)
+    return None
+class _TensorSolution(NamedTuple):
+    x: torch.Tensor
+    solver: CG
+class _TensorListSolution(NamedTuple):
+    x: TensorList
+    solver: CG
 @overload
-def steihaug_toint_cg(
-    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
+def cg(
+    A_mm: Callable[[torch.Tensor], torch.Tensor],
     b: torch.Tensor,
-    trust_region: float,
     x0: torch.Tensor | None = None,
-    tol: float | None = 1e-4,
+    tol: float | None = 1e-8,
     maxiter: int | None = None,
     reg: float = 0,
-) -> torch.Tensor: ...
+    trust_radius: float | None = None,
+    npc_terminate: bool = False,
+    miniter: int = 0,
+    history_size: int = 0,
+    P_mm: Callable[[torch.Tensor], torch.Tensor] | None = None
+) -> _TensorSolution: ...
 @overload
-def steihaug_toint_cg(
+def cg(
     A_mm: Callable[[TensorList], TensorList],
     b: TensorList,
-    trust_region: float,
     x0: TensorList | None = None,
-    tol: float | None = 1e-4,
+    tol: float | None = 1e-8,
     maxiter: int | None = None,
     reg: float | list[float] | tuple[float] = 0,
-) -> TensorList: ...
-def steihaug_toint_cg(
-    A_mm: Callable | torch.Tensor,
+    trust_radius: float | None = None,
+    npc_terminate: bool=False,
+    miniter: int = 0,
+    history_size: int = 0,
+    P_mm: Callable[[TensorList], TensorList] | None = None
+) -> _TensorListSolution: ...
+def cg(
+    A_mm: Callable,
     b: torch.Tensor | TensorList,
-    trust_region: float,
     x0: torch.Tensor | TensorList | None = None,
-    tol: float | None = 1e-4,
+    tol: float | None = 1e-8,
     maxiter: int | None = None,
     reg: float | list[float] | tuple[float] = 0,
+    trust_radius: float | None = None,
+    npc_terminate: bool = False,
+    miniter: int = 0,
+    history_size:int = 0,
+    P_mm: Callable | None = None
 ):
-    """
-    Solution is bounded to have L2 norm no larger than :code:`trust_region`. If solution exceeds :code:`trust_region`, CG is terminated early, so it is also faster.
-    """
-    A_mm_reg = _make_A_mm_reg(A_mm, reg)
-    x = x0
-    if x is None: x = generic_zeros_like(b)
-    r = b
-    d = r.clone()
-    eps = generic_finfo_eps(b)**2
-    if tol is None: tol = eps
-    if generic_vector_norm(r) < tol:
-        return x
-    if maxiter is None:
-        maxiter = generic_numel(b)
-    for _ in range(maxiter):
-        Ad = A_mm_reg(d)
-        d_Ad = d.dot(Ad)
-        if d_Ad <= eps:
-            return _trust_tau(x, d, trust_region)
-        alpha = r.dot(r) / d_Ad
-        p_next = x + alpha * d
-        # check if the step exceeds the trust-region boundary
-        if generic_vector_norm(p_next) >= trust_region:
-            return _trust_tau(x, d, trust_region)
-        # update step, residual and direction
-        x = p_next
-        r_next = r - alpha * Ad
-        if generic_vector_norm(r_next) < tol:
-            return x
+    solver = CG(
+        A_mm=A_mm,
+        b=b,
+        x0=x0,
+        tol=tol,
+        maxiter=maxiter,
+        reg=reg,
+        trust_radius=trust_radius,
+        npc_terminate=npc_terminate,
+        miniter=miniter,
+        history_size=history_size,
+        P_mm=P_mm,
+    )
-        beta = r_next.dot(r_next) / r.dot(r)
-        d = r_next + beta * d
-        r = r_next
+    x = solver.solve()
-    return x
+    if isinstance(b, torch.Tensor):
+        return _TensorSolution(x, solver)
+    return _TensorListSolution(x, solver)
 # Liu, Yang, and Fred Roosta. "MINRES: From negative curvature detection to monotonicity properties." SIAM Journal on Optimization 32.4 (2022): 2636-2661.
@@ -305,7 +377,7 @@ def minres(
     maxiter: int | None = None,
     reg: float = 0,
     npc_terminate: bool=True,
-    trust_region: float | None = None,
+    trust_radius: float | None = None,
 ) -> torch.Tensor: ...
 @overload
 def minres(
@@ -316,7 +388,7 @@ def minres(
     maxiter: int | None = None,
     reg: float | list[float] | tuple[float] = 0,
     npc_terminate: bool=True,
-    trust_region: float | None = None,
+    trust_radius: float | None = None,
 ) -> TensorList: ...
 def minres(
     A_mm,
@@ -326,11 +398,11 @@ def minres(
     maxiter: int | None = None,
     reg: float | list[float] | tuple[float] = 0,
     npc_terminate: bool=True,
-    trust_region: float | None = None,
+    trust_radius: float | None = None, #trust region is experimental
 ):
     A_mm_reg = _make_A_mm_reg(A_mm, reg)
-    eps = generic_finfo_eps(b)
-    if tol is None: tol = eps**2
+    eps = math.sqrt(generic_finfo_tiny(b) * 2)
+    if tol is None: tol = eps
     if maxiter is None: maxiter = generic_numel(b)
     if x0 is None:
@@ -369,9 +441,9 @@ def minres(
         delta1 = -c*beta
         cgamma1 = c*gamma1
-        if trust_region is not None and cgamma1 >= 0:
-            if npc_terminate: return _trust_tau(X, R, trust_region)
-            return _trust_tau(X, D, trust_region)
+        if trust_radius is not None and cgamma1 >= 0:
+            if npc_terminate: return _trust_tau(X, R, trust_radius)
+            return _trust_tau(X, D, trust_radius)
         if npc_terminate and cgamma1 >= 0:
             return R
@@ -380,8 +452,8 @@ def minres(
         if abs(gamma2) <= eps: # singular system
             # c=0; s=1; tau=0
-            if trust_region is None: return X
-            return _trust_tau(X, D, trust_region)
+            if trust_radius is None: return X
+            return _trust_tau(X, D, trust_radius)
         c = gamma1 / gamma2
         s = beta/gamma2
@@ -393,9 +465,9 @@ def minres(
         e = e_next
         X = X + tau*D
-        if trust_region is not None:
-            if generic_vector_norm(X) > trust_region:
-                return _trust_tau(X, D, trust_region)
+        if trust_radius is not None:
+            if generic_vector_norm(X) > trust_radius:
+                return _trust_tau(X, D, trust_radius)
         if (abs(beta) < eps) or (phi / b_norm <= tol):
             # R = zeros(R)

torchzero/utils/metrics.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""convenience submodule which allows to calculate a metric based on its string name,
+used in many places"""
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, Literal, overload
+import torch
+if TYPE_CHECKING:
+    from .tensorlist import TensorList
+class Metric(ABC):
+    @abstractmethod
+    def evaluate_global(self, x: "TensorList") -> torch.Tensor:
+        """returns a global metric for a tensorlist"""
+    @abstractmethod
+    def evaluate_tensor(self, x: torch.Tensor, dim=None, keepdim=False) -> torch.Tensor:
+        """returns metric for a tensor"""
+    def evaluate_list(self, x: "TensorList") -> "TensorList":
+        """returns list of metrics for a tensorlist (possibly vectorized)"""
+        return x.map(self.evaluate_tensor)
+class _MAD(Metric):
+    def evaluate_global(self, x): return x.abs().global_mean()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.abs().mean(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.abs().mean()
+class _Std(Metric):
+    def evaluate_global(self, x): return x.global_std()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.std(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.std()
+class _Var(Metric):
+    def evaluate_global(self, x): return x.global_var()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.var(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.var()
+class _Sum(Metric):
+    def evaluate_global(self, x): return x.global_sum()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.sum(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.sum()
+class _Norm(Metric):
+    def __init__(self, ord): self.ord = ord
+    def evaluate_global(self, x): return x.global_vector_norm(self.ord)
+    def evaluate_tensor(self, x, dim=None, keepdim=False):
+        return torch.linalg.vector_norm(x, ord=self.ord, dim=dim, keepdim=keepdim) # pylint:disable=not-callable
+    def evaluate_list(self, x): return x.norm(self.ord)
+_METRIC_KEYS = Literal['mad', 'std', 'var', 'sum', 'l0', 'l1', 'l2', 'l3', 'l4', 'linf']
+_METRICS: dict[_METRIC_KEYS, Metric] = {
+    "mad": _MAD(),
+    "std": _Std(),
+    "var": _Var(),
+    "sum": _Sum(),
+    "l0": _Norm(0),
+    "l1": _Norm(1),
+    "l2": _Norm(2),
+    "l3": _Norm(3),
+    "l4": _Norm(4),
+    "linf": _Norm(torch.inf),
+}
+Metrics = _METRIC_KEYS | float | torch.Tensor
+def evaluate_metric(x: "torch.Tensor | TensorList", metric: Metrics) -> torch.Tensor:
+    if isinstance(metric, (int, float, torch.Tensor)):
+        if isinstance(x, torch.Tensor): return torch.linalg.vector_norm(x, ord=metric) # pylint:disable=not-callable
+        return x.global_vector_norm(ord=float(metric))
+    if isinstance(x, torch.Tensor): return _METRICS[metric].evaluate_tensor(x)
+    return _METRICS[metric].evaluate_global(x)
+def calculate_metric_list(x: "TensorList", metric: Metrics) -> "TensorList":
+    if isinstance(metric, (int, float, torch.Tensor)):
+        return x.norm(ord=float(metric))
+    return _METRICS[metric].evaluate_list(x)

torchzero/utils/python_tools.py CHANGED Viewed

@@ -61,3 +61,9 @@ def unpack_dicts(dicts: Iterable[Mapping[str, Any]], key:str, key2: str | None =
     values = [cls(s[k] for s in dicts) for k in keys] # pyright:ignore[reportCallIssue]
     if len(values) == 1: return values[0]
     return values
+def safe_dict_update_(d1_:dict, d2:dict):
+    inter = set(d1_.keys()).intersection(d2.keys())
+    if len(inter) > 0: raise RuntimeError(f"Duplicate keys {inter}")
+    d1_.update(d2)

torchzero 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl