PyPI - torchzero - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

torchzero 0.3.10py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

tests/test_identical.py +2 -3
tests/test_opts.py +140 -100
tests/test_tensorlist.py +8 -7
tests/test_vars.py +1 -0
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +2 -2
torchzero/core/module.py +335 -50
torchzero/core/reformulation.py +65 -0
torchzero/core/transform.py +197 -70
torchzero/modules/__init__.py +13 -4
torchzero/modules/adaptive/__init__.py +30 -0
torchzero/modules/adaptive/adagrad.py +356 -0
torchzero/modules/adaptive/adahessian.py +224 -0
torchzero/modules/{optimizers → adaptive}/adam.py +6 -8
torchzero/modules/adaptive/adan.py +96 -0
torchzero/modules/adaptive/adaptive_heavyball.py +54 -0
torchzero/modules/adaptive/aegd.py +54 -0
torchzero/modules/adaptive/esgd.py +171 -0
torchzero/modules/{optimizers → adaptive}/lion.py +1 -1
torchzero/modules/{experimental/spectral.py → adaptive/lmadagrad.py} +94 -71
torchzero/modules/adaptive/mars.py +79 -0
torchzero/modules/adaptive/matrix_momentum.py +146 -0
torchzero/modules/adaptive/msam.py +188 -0
torchzero/modules/{optimizers → adaptive}/muon.py +29 -5
torchzero/modules/adaptive/natural_gradient.py +175 -0
torchzero/modules/{optimizers → adaptive}/orthograd.py +1 -1
torchzero/modules/{optimizers → adaptive}/rmsprop.py +7 -4
torchzero/modules/{optimizers → adaptive}/rprop.py +42 -10
torchzero/modules/adaptive/sam.py +163 -0
torchzero/modules/{optimizers → adaptive}/shampoo.py +47 -9
torchzero/modules/{optimizers → adaptive}/soap.py +52 -65
torchzero/modules/adaptive/sophia_h.py +185 -0
torchzero/modules/clipping/clipping.py +115 -25
torchzero/modules/clipping/ema_clipping.py +31 -17
torchzero/modules/clipping/growth_clipping.py +8 -7
torchzero/modules/conjugate_gradient/__init__.py +11 -0
torchzero/modules/conjugate_gradient/cg.py +355 -0
torchzero/modules/experimental/__init__.py +13 -19
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/gradmin.py +4 -3
torchzero/modules/experimental/l_infinity.py +111 -0
torchzero/modules/{momentum/experimental.py → experimental/momentum.py} +5 -42
torchzero/modules/experimental/newton_solver.py +79 -17
torchzero/modules/experimental/newtonnewton.py +32 -15
torchzero/modules/experimental/reduce_outward_lr.py +4 -4
torchzero/modules/experimental/scipy_newton_cg.py +105 -0
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +13 -55
torchzero/modules/functional.py +52 -6
torchzero/modules/grad_approximation/fdm.py +30 -4
torchzero/modules/grad_approximation/forward_gradient.py +16 -4
torchzero/modules/grad_approximation/grad_approximator.py +51 -10
torchzero/modules/grad_approximation/rfdm.py +321 -52
torchzero/modules/higher_order/__init__.py +1 -1
torchzero/modules/higher_order/higher_order_newton.py +164 -93
torchzero/modules/least_squares/__init__.py +1 -0
torchzero/modules/least_squares/gn.py +161 -0
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/_polyinterp.py +289 -0
torchzero/modules/line_search/adaptive.py +124 -0
torchzero/modules/line_search/backtracking.py +95 -57
torchzero/modules/line_search/line_search.py +171 -22
torchzero/modules/line_search/scipy.py +3 -3
torchzero/modules/line_search/strong_wolfe.py +327 -199
torchzero/modules/misc/__init__.py +35 -0
torchzero/modules/misc/debug.py +48 -0
torchzero/modules/misc/escape.py +62 -0
torchzero/modules/misc/gradient_accumulation.py +136 -0
torchzero/modules/misc/homotopy.py +59 -0
torchzero/modules/misc/misc.py +383 -0
torchzero/modules/misc/multistep.py +194 -0
torchzero/modules/misc/regularization.py +167 -0
torchzero/modules/misc/split.py +123 -0
torchzero/modules/{ops → misc}/switch.py +45 -4
torchzero/modules/momentum/__init__.py +1 -5
torchzero/modules/momentum/averaging.py +9 -9
torchzero/modules/momentum/cautious.py +51 -19
torchzero/modules/momentum/momentum.py +37 -2
torchzero/modules/ops/__init__.py +11 -31
torchzero/modules/ops/accumulate.py +6 -10
torchzero/modules/ops/binary.py +81 -34
torchzero/modules/{momentum/ema.py → ops/higher_level.py} +16 -39
torchzero/modules/ops/multi.py +82 -21
torchzero/modules/ops/reduce.py +16 -8
torchzero/modules/ops/unary.py +29 -13
torchzero/modules/ops/utility.py +30 -18
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +3 -1
torchzero/modules/projections/projection.py +190 -96
torchzero/modules/quasi_newton/__init__.py +9 -14
torchzero/modules/quasi_newton/damping.py +105 -0
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +167 -0
torchzero/modules/quasi_newton/lbfgs.py +286 -173
torchzero/modules/quasi_newton/lsr1.py +185 -106
torchzero/modules/quasi_newton/quasi_newton.py +816 -268
torchzero/modules/restarts/__init__.py +7 -0
torchzero/modules/restarts/restars.py +252 -0
torchzero/modules/second_order/__init__.py +3 -2
torchzero/modules/second_order/multipoint.py +238 -0
torchzero/modules/second_order/newton.py +292 -68
torchzero/modules/second_order/newton_cg.py +365 -15
torchzero/modules/second_order/nystrom.py +104 -1
torchzero/modules/smoothing/__init__.py +1 -1
torchzero/modules/smoothing/laplacian.py +14 -4
torchzero/modules/smoothing/sampling.py +300 -0
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +387 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/termination/__init__.py +14 -0
torchzero/modules/termination/termination.py +207 -0
torchzero/modules/trust_region/__init__.py +5 -0
torchzero/modules/trust_region/cubic_regularization.py +170 -0
torchzero/modules/trust_region/dogleg.py +92 -0
torchzero/modules/trust_region/levenberg_marquardt.py +128 -0
torchzero/modules/trust_region/trust_cg.py +97 -0
torchzero/modules/trust_region/trust_region.py +350 -0
torchzero/modules/variance_reduction/__init__.py +1 -0
torchzero/modules/variance_reduction/svrg.py +208 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +94 -11
torchzero/modules/wrappers/optim_wrapper.py +29 -1
torchzero/modules/zeroth_order/__init__.py +1 -0
torchzero/modules/zeroth_order/cd.py +359 -0
torchzero/optim/root.py +65 -0
torchzero/optim/utility/split.py +8 -8
torchzero/optim/wrappers/directsearch.py +39 -3
torchzero/optim/wrappers/fcmaes.py +24 -15
torchzero/optim/wrappers/mads.py +5 -6
torchzero/optim/wrappers/nevergrad.py +16 -1
torchzero/optim/wrappers/nlopt.py +0 -2
torchzero/optim/wrappers/optuna.py +3 -3
torchzero/optim/wrappers/scipy.py +86 -25
torchzero/utils/__init__.py +40 -4
torchzero/utils/compile.py +1 -1
torchzero/utils/derivatives.py +126 -114
torchzero/utils/linalg/__init__.py +9 -2
torchzero/utils/linalg/linear_operator.py +329 -0
torchzero/utils/linalg/matrix_funcs.py +2 -2
torchzero/utils/linalg/orthogonalize.py +2 -1
torchzero/utils/linalg/qr.py +2 -2
torchzero/utils/linalg/solve.py +369 -58
torchzero/utils/metrics.py +83 -0
torchzero/utils/numberlist.py +2 -0
torchzero/utils/python_tools.py +16 -0
torchzero/utils/tensorlist.py +134 -51
torchzero/utils/torch_tools.py +9 -4
torchzero-0.3.13.dist-info/METADATA +14 -0
torchzero-0.3.13.dist-info/RECORD +166 -0
{torchzero-0.3.10.dist-info → torchzero-0.3.13.dist-info}/top_level.txt +0 -1
docs/source/conf.py +0 -57
torchzero/modules/experimental/absoap.py +0 -250
torchzero/modules/experimental/adadam.py +0 -112
torchzero/modules/experimental/adamY.py +0 -125
torchzero/modules/experimental/adasoap.py +0 -172
torchzero/modules/experimental/diagonal_higher_order_newton.py +0 -225
torchzero/modules/experimental/eigendescent.py +0 -117
torchzero/modules/experimental/etf.py +0 -172
torchzero/modules/experimental/soapy.py +0 -163
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/experimental/subspace_preconditioners.py +0 -138
torchzero/modules/experimental/tada.py +0 -38
torchzero/modules/line_search/trust_region.py +0 -73
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/adaptive.py +0 -93
torchzero/modules/lr/lr.py +0 -63
torchzero/modules/momentum/matrix_momentum.py +0 -166
torchzero/modules/ops/debug.py +0 -25
torchzero/modules/ops/misc.py +0 -418
torchzero/modules/ops/split.py +0 -75
torchzero/modules/optimizers/__init__.py +0 -18
torchzero/modules/optimizers/adagrad.py +0 -155
torchzero/modules/optimizers/sophia_h.py +0 -129
torchzero/modules/quasi_newton/cg.py +0 -268
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/experimental/modular_lbfgs.py +0 -266
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero/modules/smoothing/gaussian.py +0 -164
torchzero-0.3.10.dist-info/METADATA +0 -379
torchzero-0.3.10.dist-info/RECORD +0 -139
torchzero-0.3.10.dist-info/licenses/LICENSE +0 -21
{torchzero-0.3.10.dist-info → torchzero-0.3.13.dist-info}/WHEEL +0 -0

torchzero/utils/linalg/solve.py CHANGED Viewed

@@ -1,69 +1,32 @@
+# pyright: reportArgumentType=false
+import math
+from collections import deque
 from collections.abc import Callable
-from typing import overload
+from typing import Any, NamedTuple, overload
 import torch
-from .. import TensorList, generic_zeros_like, generic_vector_norm, generic_numel, generic_randn_like, generic_eq
+from .. import (
+    TensorList,
+    generic_eq,
+    generic_finfo_tiny,
+    generic_numel,
+    generic_vector_norm,
+    generic_zeros_like,
+)
-@overload
-def cg(
-    A_mm: Callable[[torch.Tensor], torch.Tensor],
-    b: torch.Tensor,
-    x0_: torch.Tensor | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float = 0,
-) -> torch.Tensor: ...
-@overload
-def cg(
-    A_mm: Callable[[TensorList], TensorList],
-    b: TensorList,
-    x0_: TensorList | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float | list[float] | tuple[float] = 0,
-) -> TensorList: ...
-def cg(
-    A_mm: Callable,
-    b: torch.Tensor | TensorList,
-    x0_: torch.Tensor | TensorList | None = None,
-    tol: float | None = 1e-4,
-    maxiter: int | None = None,
-    reg: float | list[float] | tuple[float] = 0,
-):
+def _make_A_mm_reg(A_mm: Callable, reg):
     def A_mm_reg(x): # A_mm with regularization
         Ax = A_mm(x)
         if not generic_eq(reg, 0): Ax += x*reg
         return Ax
+    return A_mm_reg
-    if maxiter is None: maxiter = generic_numel(b)
-    if x0_ is None: x0_ = generic_zeros_like(b)
-    x = x0_
-    residual = b - A_mm_reg(x)
-    p = residual.clone() # search direction
-    r_norm = generic_vector_norm(residual)
-    init_norm = r_norm
-    if tol is not None and r_norm < tol: return x
-    k = 0
-    while True:
-        Ap = A_mm_reg(p)
-        step_size = (r_norm**2) / p.dot(Ap)
-        x += step_size * p # Update solution
-        residual -= step_size * Ap # Update residual
-        new_r_norm = generic_vector_norm(residual)
-        k += 1
-        if tol is not None and new_r_norm <= tol * init_norm: return x
-        if k >= maxiter: return x
-        beta = (new_r_norm**2) / (r_norm**2)
-        p = residual + beta*p
-        r_norm = new_r_norm
+def _identity(x): return x
-# https://arxiv.org/pdf/2110.02820 algorithm 2.1 apparently supposed to be diabolical
+# https://arxiv.org/pdf/2110.02820
 def nystrom_approximation(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     ndim: int,
@@ -85,7 +48,6 @@ def nystrom_approximation(
     lambd = (S.pow(2) - v).clip(min=0) #Remove shift, compute eigs
     return U, lambd
-# this one works worse
 def nystrom_sketch_and_solve(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     b: torch.Tensor,
@@ -111,7 +73,6 @@ def nystrom_sketch_and_solve(
     term2 = (1.0 / reg) * (b - U @ Uᵀb)
     return (term1 + term2).squeeze(-1)
-# this one is insane
 def nystrom_pcg(
     A_mm: Callable[[torch.Tensor], torch.Tensor],
     b: torch.Tensor,
@@ -131,6 +92,8 @@ def nystrom_pcg(
         generator=generator,
     )
     lambd += reg
+    eps = torch.finfo(b.dtype).tiny * 2
+    if tol is None: tol = eps
     def A_mm_reg(x): # A_mm with regularization
         Ax = A_mm(x)
@@ -150,7 +113,7 @@ def nystrom_pcg(
     p = z.clone() # search direction
     init_norm = torch.linalg.vector_norm(residual) # pylint:disable=not-callable
-    if tol is not None and init_norm < tol: return x
+    if init_norm < tol: return x
     k = 0
     while True:
         Ap = A_mm_reg(p)
@@ -160,10 +123,358 @@ def nystrom_pcg(
         residual -= step_size * Ap
         k += 1
-        if tol is not None and torch.linalg.vector_norm(residual) <= tol * init_norm: return x # pylint:disable=not-callable
+        if torch.linalg.vector_norm(residual) <= tol * init_norm: return x # pylint:disable=not-callable
         if k >= maxiter: return x
         z = P_inv @ residual
         beta = residual.dot(z) / rz
         p = z + p*beta
+def _safe_clip(x: torch.Tensor):
+    """makes sure scalar tensor x is not smaller than tiny"""
+    assert x.numel() == 1, x.shape
+    eps = torch.finfo(x.dtype).tiny * 2
+    if x.abs() < eps: return x.new_full(x.size(), eps).copysign(x)
+    return x
+def _trust_tau(x,d,trust_radius):
+    xx = x.dot(x)
+    xd = x.dot(d)
+    dd = _safe_clip(d.dot(d))
+    rad = (xd**2 - dd * (xx - trust_radius**2)).clip(min=0).sqrt()
+    tau = (-xd + rad) / dd
+    return x + tau * d
+class CG:
+    """Conjugate gradient method.
+    Args:
+        A_mm (Callable[[torch.Tensor], torch.Tensor] | torch.Tensor): Callable that returns matvec ``Ax``.
+        b (torch.Tensor): right hand side
+        x0 (torch.Tensor | None, optional): initial guess, defaults to zeros. Defaults to None.
+        tol (float | None, optional): tolerance for convergence. Defaults to 1e-8.
+        maxiter (int | None, optional):
+            maximum number of iterations, if None sets to number of dimensions. Defaults to None.
+        reg (float, optional): regularization. Defaults to 0.
+        trust_radius (float | None, optional):
+            CG is terminated whenever solution exceeds trust region, returning a solution modified to be within it. Defaults to None.
+        npc_terminate (bool, optional):
+            whether to terminate CG whenever negative curavture is detected. Defaults to False.
+        miniter (int, optional):
+            minimal number of iterations even if tolerance is satisfied, this ensures some progress
+            is always made.
+        history_size (int, optional):
+            number of past iterations to store, to re-use them when trust radius is decreased.
+        P_mm (Callable | torch.Tensor | None, optional):
+            Callable that returns inverse preconditioner times vector. Defaults to None.
+    """
+    def __init__(
+        self,
+        A_mm: Callable,
+        b: torch.Tensor | TensorList,
+        x0: torch.Tensor | TensorList | None = None,
+        tol: float | None = 1e-4,
+        maxiter: int | None = None,
+        reg: float = 0,
+        trust_radius: float | None = None,
+        npc_terminate: bool=False,
+        miniter: int = 0,
+        history_size: int = 0,
+        P_mm: Callable | None = None,
+):
+        # --------------------------------- set attrs -------------------------------- #
+        self.A_mm = _make_A_mm_reg(A_mm, reg)
+        self.b = b
+        if tol is None: tol = generic_finfo_tiny(b) * 2
+        self.tol = tol
+        self.eps = generic_finfo_tiny(b) * 2
+        if maxiter is None: maxiter = generic_numel(b)
+        self.maxiter = maxiter
+        self.miniter = miniter
+        self.trust_radius = trust_radius
+        self.npc_terminate = npc_terminate
+        self.P_mm = P_mm if P_mm is not None else _identity
+        if history_size > 0:
+            self.history = deque(maxlen = history_size)
+            """history of (x, x_norm, d)"""
+        else:
+            self.history = None
+        # -------------------------------- initialize -------------------------------- #
+        self.iter = 0
+        if x0 is None:
+            self.x = generic_zeros_like(b)
+            self.r = b
+        else:
+            self.x = x0
+            self.r = b - A_mm(self.x)
+        self.z = self.P_mm(self.r)
+        self.d = self.z
+        if self.history is not None:
+            self.history.append((self.x, generic_vector_norm(self.x), self.d))
+    def step(self) -> tuple[Any, bool]:
+        """returns ``(solution, should_terminate)``"""
+        x, b, d, r, z = self.x, self.b, self.d, self.r, self.z
+        if self.iter >= self.maxiter:
+            return x, True
+        Ad = self.A_mm(d)
+        dAd = d.dot(Ad)
+        # check negative curvature
+        if dAd <= self.eps:
+            if self.trust_radius is not None: return _trust_tau(x, d, self.trust_radius), True
+            if self.iter == 0: return b * (b.dot(b) / dAd).abs(), True
+            if self.npc_terminate: return x, True
+        rz = r.dot(z)
+        alpha = rz / dAd
+        x_next = x + alpha * d
+        # check if the step exceeds the trust-region boundary
+        x_next_norm = None
+        if self.trust_radius is not None:
+            x_next_norm = generic_vector_norm(x_next)
+            if x_next_norm >= self.trust_radius:
+                return _trust_tau(x, d, self.trust_radius), True
+        # update step, residual and direction
+        r_next = r - alpha * Ad
+        # check if r is sufficiently small
+        if self.iter >= self.miniter and generic_vector_norm(r_next) < self.tol:
+            return x_next, True
+        # update d, r, z
+        z_next = self.P_mm(r_next)
+        beta = r_next.dot(z_next) / rz
+        self.d = z_next + beta * d
+        self.x = x_next
+        self.r = r_next
+        self.z = z_next
+        # update history
+        if self.history is not None:
+            if x_next_norm is None: x_next_norm = generic_vector_norm(x_next)
+            self.history.append((self.x, x_next_norm, self.d))
+        self.iter += 1
+        return x, False
+    def solve(self):
+        # return initial guess if it is good enough
+        if self.miniter < 1 and generic_vector_norm(self.r) < self.tol:
+            return self.x
+        should_terminate = False
+        sol = None
+        while not should_terminate:
+            sol, should_terminate = self.step()
+        assert sol is not None
+        return sol
+def find_within_trust_radius(history, trust_radius: float):
+    """find first ``x`` in history that exceeds trust radius, if no such ``x`` exists, returns ``None``"""
+    for x, x_norm, d in reversed(tuple(history)):
+        if x_norm <= trust_radius:
+            return _trust_tau(x, d, trust_radius)
+    return None
+class _TensorSolution(NamedTuple):
+    x: torch.Tensor
+    solver: CG
+class _TensorListSolution(NamedTuple):
+    x: TensorList
+    solver: CG
+@overload
+def cg(
+    A_mm: Callable[[torch.Tensor], torch.Tensor],
+    b: torch.Tensor,
+    x0: torch.Tensor | None = None,
+    tol: float | None = 1e-8,
+    maxiter: int | None = None,
+    reg: float = 0,
+    trust_radius: float | None = None,
+    npc_terminate: bool = False,
+    miniter: int = 0,
+    history_size: int = 0,
+    P_mm: Callable[[torch.Tensor], torch.Tensor] | None = None
+) -> _TensorSolution: ...
+@overload
+def cg(
+    A_mm: Callable[[TensorList], TensorList],
+    b: TensorList,
+    x0: TensorList | None = None,
+    tol: float | None = 1e-8,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    trust_radius: float | None = None,
+    npc_terminate: bool=False,
+    miniter: int = 0,
+    history_size: int = 0,
+    P_mm: Callable[[TensorList], TensorList] | None = None
+) -> _TensorListSolution: ...
+def cg(
+    A_mm: Callable,
+    b: torch.Tensor | TensorList,
+    x0: torch.Tensor | TensorList | None = None,
+    tol: float | None = 1e-8,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    trust_radius: float | None = None,
+    npc_terminate: bool = False,
+    miniter: int = 0,
+    history_size:int = 0,
+    P_mm: Callable | None = None
+):
+    solver = CG(
+        A_mm=A_mm,
+        b=b,
+        x0=x0,
+        tol=tol,
+        maxiter=maxiter,
+        reg=reg,
+        trust_radius=trust_radius,
+        npc_terminate=npc_terminate,
+        miniter=miniter,
+        history_size=history_size,
+        P_mm=P_mm,
+    )
+    x = solver.solve()
+    if isinstance(b, torch.Tensor):
+        return _TensorSolution(x, solver)
+    return _TensorListSolution(x, solver)
+# Liu, Yang, and Fred Roosta. "MINRES: From negative curvature detection to monotonicity properties." SIAM Journal on Optimization 32.4 (2022): 2636-2661.
+@overload
+def minres(
+    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
+    b: torch.Tensor,
+    x0: torch.Tensor | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float = 0,
+    npc_terminate: bool=True,
+    trust_radius: float | None = None,
+) -> torch.Tensor: ...
+@overload
+def minres(
+    A_mm: Callable[[TensorList], TensorList],
+    b: TensorList,
+    x0: TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    npc_terminate: bool=True,
+    trust_radius: float | None = None,
+) -> TensorList: ...
+def minres(
+    A_mm,
+    b,
+    x0: torch.Tensor | TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    npc_terminate: bool=True,
+    trust_radius: float | None = None, #trust region is experimental
+):
+    A_mm_reg = _make_A_mm_reg(A_mm, reg)
+    eps = math.sqrt(generic_finfo_tiny(b) * 2)
+    if tol is None: tol = eps
+    if maxiter is None: maxiter = generic_numel(b)
+    if x0 is None:
+        R = b
+        x0 = generic_zeros_like(b)
+    else:
+        R = b - A_mm_reg(x0)
+    X: Any = x0
+    beta = b_norm = generic_vector_norm(b)
+    if b_norm < eps**2:
+        return generic_zeros_like(b)
+    V = b / beta
+    V_prev = generic_zeros_like(b)
+    D = generic_zeros_like(b)
+    D_prev = generic_zeros_like(b)
+    c = -1
+    phi = tau = beta
+    s = delta1 = e = 0
+    for _ in range(maxiter):
+        P = A_mm_reg(V)
+        alpha = V.dot(P)
+        P -= beta*V_prev
+        P -= alpha*V
+        beta = generic_vector_norm(P)
+        delta2 = c*delta1 + s*alpha
+        gamma1 = s*delta1 - c*alpha
+        e_next = s*beta
+        delta1 = -c*beta
+        cgamma1 = c*gamma1
+        if trust_radius is not None and cgamma1 >= 0:
+            if npc_terminate: return _trust_tau(X, R, trust_radius)
+            return _trust_tau(X, D, trust_radius)
+        if npc_terminate and cgamma1 >= 0:
+            return R
+        gamma2 = (gamma1**2 + beta**2)**(1/2)
+        if abs(gamma2) <= eps: # singular system
+            # c=0; s=1; tau=0
+            if trust_radius is None: return X
+            return _trust_tau(X, D, trust_radius)
+        c = gamma1 / gamma2
+        s = beta/gamma2
+        tau = c*phi
+        phi = s*phi
+        D_prev = D
+        D = (V - delta2*D - e*D_prev) / gamma2
+        e = e_next
+        X = X + tau*D
+        if trust_radius is not None:
+            if generic_vector_norm(X) > trust_radius:
+                return _trust_tau(X, D, trust_radius)
+        if (abs(beta) < eps) or (phi / b_norm <= tol):
+            # R = zeros(R)
+            return X
+        V_prev = V
+        V = P/beta
+        R = s**2*R - phi*c*V
+    return X

torchzero/utils/metrics.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""convenience submodule which allows to calculate a metric based on its string name,
+used in many places"""
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, Literal, overload
+import torch
+if TYPE_CHECKING:
+    from .tensorlist import TensorList
+class Metric(ABC):
+    @abstractmethod
+    def evaluate_global(self, x: "TensorList") -> torch.Tensor:
+        """returns a global metric for a tensorlist"""
+    @abstractmethod
+    def evaluate_tensor(self, x: torch.Tensor, dim=None, keepdim=False) -> torch.Tensor:
+        """returns metric for a tensor"""
+    def evaluate_list(self, x: "TensorList") -> "TensorList":
+        """returns list of metrics for a tensorlist (possibly vectorized)"""
+        return x.map(self.evaluate_tensor)
+class _MAD(Metric):
+    def evaluate_global(self, x): return x.abs().global_mean()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.abs().mean(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.abs().mean()
+class _Std(Metric):
+    def evaluate_global(self, x): return x.global_std()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.std(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.std()
+class _Var(Metric):
+    def evaluate_global(self, x): return x.global_var()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.var(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.var()
+class _Sum(Metric):
+    def evaluate_global(self, x): return x.global_sum()
+    def evaluate_tensor(self, x, dim=None, keepdim=False): return x.sum(dim=dim, keepdim=keepdim)
+    def evaluate_list(self, x): return x.sum()
+class _Norm(Metric):
+    def __init__(self, ord): self.ord = ord
+    def evaluate_global(self, x): return x.global_vector_norm(self.ord)
+    def evaluate_tensor(self, x, dim=None, keepdim=False):
+        return torch.linalg.vector_norm(x, ord=self.ord, dim=dim, keepdim=keepdim) # pylint:disable=not-callable
+    def evaluate_list(self, x): return x.norm(self.ord)
+_METRIC_KEYS = Literal['mad', 'std', 'var', 'sum', 'l0', 'l1', 'l2', 'l3', 'l4', 'linf']
+_METRICS: dict[_METRIC_KEYS, Metric] = {
+    "mad": _MAD(),
+    "std": _Std(),
+    "var": _Var(),
+    "sum": _Sum(),
+    "l0": _Norm(0),
+    "l1": _Norm(1),
+    "l2": _Norm(2),
+    "l3": _Norm(3),
+    "l4": _Norm(4),
+    "linf": _Norm(torch.inf),
+}
+Metrics = _METRIC_KEYS | float | torch.Tensor
+def evaluate_metric(x: "torch.Tensor | TensorList", metric: Metrics) -> torch.Tensor:
+    if isinstance(metric, (int, float, torch.Tensor)):
+        if isinstance(x, torch.Tensor): return torch.linalg.vector_norm(x, ord=metric) # pylint:disable=not-callable
+        return x.global_vector_norm(ord=float(metric))
+    if isinstance(x, torch.Tensor): return _METRICS[metric].evaluate_tensor(x)
+    return _METRICS[metric].evaluate_global(x)
+def calculate_metric_list(x: "TensorList", metric: Metrics) -> "TensorList":
+    if isinstance(metric, (int, float, torch.Tensor)):
+        return x.norm(ord=float(metric))
+    return _METRICS[metric].evaluate_list(x)

torchzero/utils/numberlist.py CHANGED Viewed

@@ -129,4 +129,6 @@ class NumberList(list[int | float | Any]):
         return self.__class__(fn(i, *args, **kwargs) for i in self)
     def clamp(self, min=None, max=None):
+        return self.zipmap_args(_clamp, min, max)
+    def clip(self, min=None, max=None):
         return self.zipmap_args(_clamp, min, max)

torchzero/utils/python_tools.py CHANGED Viewed

@@ -31,6 +31,16 @@ def generic_eq(x: int | float | Iterable[int | float], y: int | float | Iterable
         return all(i==y for i in x)
     return all(i==j for i,j in zip(x,y))
+def generic_ne(x: int | float | Iterable[int | float], y: int | float | Iterable[int | float]) -> bool:
+    """generic not equals function that supports scalars and lists of numbers. Faster than not generic_eq"""
+    if isinstance(x, (int,float)):
+        if isinstance(y, (int,float)): return x!=y
+        return any(i!=x for i in y)
+    if isinstance(y, (int,float)):
+        return any(i!=y for i in x)
+    return any(i!=j for i,j in zip(x,y))
 def zipmap(self, fn: Callable, other: Any | list | tuple, *args, **kwargs):
     """If `other` is list/tuple, applies `fn` to self zipped with `other`.
     Otherwise applies `fn` to this sequence and `other`.
@@ -51,3 +61,9 @@ def unpack_dicts(dicts: Iterable[Mapping[str, Any]], key:str, key2: str | None =
     values = [cls(s[k] for s in dicts) for k in keys] # pyright:ignore[reportCallIssue]
     if len(values) == 1: return values[0]
     return values
+def safe_dict_update_(d1_:dict, d2:dict):
+    inter = set(d1_.keys()).intersection(d2.keys())
+    if len(inter) > 0: raise RuntimeError(f"Duplicate keys {inter}")
+    d1_.update(d2)

torchzero 0.3.10__py3-none-any.whl → 0.3.13__py3-none-any.whl

torchzero 0.3.10py3-none-any.whl → 0.3.13py3-none-any.whl