PyPI - torchzero - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

tests/test_opts.py +95 -69
tests/test_tensorlist.py +8 -7
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +2 -2
torchzero/core/module.py +225 -72
torchzero/core/reformulation.py +65 -0
torchzero/core/transform.py +44 -24
torchzero/modules/__init__.py +13 -5
torchzero/modules/{optimizers → adaptive}/__init__.py +5 -2
torchzero/modules/adaptive/adagrad.py +356 -0
torchzero/modules/{optimizers → adaptive}/adahessian.py +53 -52
torchzero/modules/{optimizers → adaptive}/adam.py +0 -3
torchzero/modules/{optimizers → adaptive}/adan.py +26 -40
torchzero/modules/{optimizers → adaptive}/adaptive_heavyball.py +3 -6
torchzero/modules/adaptive/aegd.py +54 -0
torchzero/modules/{optimizers → adaptive}/esgd.py +1 -1
torchzero/modules/{optimizers/ladagrad.py → adaptive/lmadagrad.py} +42 -39
torchzero/modules/{optimizers → adaptive}/mars.py +24 -36
torchzero/modules/adaptive/matrix_momentum.py +146 -0
torchzero/modules/{optimizers → adaptive}/msam.py +14 -12
torchzero/modules/{optimizers → adaptive}/muon.py +19 -20
torchzero/modules/adaptive/natural_gradient.py +175 -0
torchzero/modules/{optimizers → adaptive}/rprop.py +0 -2
torchzero/modules/{optimizers → adaptive}/sam.py +1 -1
torchzero/modules/{optimizers → adaptive}/shampoo.py +8 -4
torchzero/modules/{optimizers → adaptive}/soap.py +27 -50
torchzero/modules/{optimizers → adaptive}/sophia_h.py +2 -3
torchzero/modules/clipping/clipping.py +85 -92
torchzero/modules/clipping/ema_clipping.py +5 -5
torchzero/modules/conjugate_gradient/__init__.py +11 -0
torchzero/modules/{quasi_newton → conjugate_gradient}/cg.py +355 -369
torchzero/modules/experimental/__init__.py +9 -32
torchzero/modules/experimental/dct.py +2 -2
torchzero/modules/experimental/fft.py +2 -2
torchzero/modules/experimental/gradmin.py +4 -3
torchzero/modules/experimental/l_infinity.py +111 -0
torchzero/modules/{momentum/experimental.py → experimental/momentum.py} +3 -40
torchzero/modules/experimental/newton_solver.py +79 -17
torchzero/modules/experimental/newtonnewton.py +27 -14
torchzero/modules/experimental/scipy_newton_cg.py +105 -0
torchzero/modules/experimental/structural_projections.py +1 -1
torchzero/modules/functional.py +50 -14
torchzero/modules/grad_approximation/fdm.py +19 -20
torchzero/modules/grad_approximation/forward_gradient.py +4 -2
torchzero/modules/grad_approximation/grad_approximator.py +43 -47
torchzero/modules/grad_approximation/rfdm.py +144 -122
torchzero/modules/higher_order/__init__.py +1 -1
torchzero/modules/higher_order/higher_order_newton.py +31 -23
torchzero/modules/least_squares/__init__.py +1 -0
torchzero/modules/least_squares/gn.py +161 -0
torchzero/modules/line_search/__init__.py +2 -2
torchzero/modules/line_search/_polyinterp.py +289 -0
torchzero/modules/line_search/adaptive.py +69 -44
torchzero/modules/line_search/backtracking.py +83 -70
torchzero/modules/line_search/line_search.py +159 -68
torchzero/modules/line_search/scipy.py +1 -1
torchzero/modules/line_search/strong_wolfe.py +319 -218
torchzero/modules/misc/__init__.py +8 -0
torchzero/modules/misc/debug.py +4 -4
torchzero/modules/misc/escape.py +9 -7
torchzero/modules/misc/gradient_accumulation.py +88 -22
torchzero/modules/misc/homotopy.py +59 -0
torchzero/modules/misc/misc.py +82 -15
torchzero/modules/misc/multistep.py +47 -11
torchzero/modules/misc/regularization.py +5 -9
torchzero/modules/misc/split.py +55 -35
torchzero/modules/misc/switch.py +1 -1
torchzero/modules/momentum/__init__.py +1 -5
torchzero/modules/momentum/averaging.py +3 -3
torchzero/modules/momentum/cautious.py +42 -47
torchzero/modules/momentum/momentum.py +35 -1
torchzero/modules/ops/__init__.py +9 -1
torchzero/modules/ops/binary.py +9 -8
torchzero/modules/{momentum/ema.py → ops/higher_level.py} +10 -33
torchzero/modules/ops/multi.py +15 -15
torchzero/modules/ops/reduce.py +1 -1
torchzero/modules/ops/utility.py +12 -8
torchzero/modules/projections/projection.py +4 -4
torchzero/modules/quasi_newton/__init__.py +1 -16
torchzero/modules/quasi_newton/damping.py +105 -0
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +167 -163
torchzero/modules/quasi_newton/lbfgs.py +256 -200
torchzero/modules/quasi_newton/lsr1.py +167 -132
torchzero/modules/quasi_newton/quasi_newton.py +346 -446
torchzero/modules/restarts/__init__.py +7 -0
torchzero/modules/restarts/restars.py +252 -0
torchzero/modules/second_order/__init__.py +2 -1
torchzero/modules/second_order/multipoint.py +238 -0
torchzero/modules/second_order/newton.py +133 -88
torchzero/modules/second_order/newton_cg.py +141 -80
torchzero/modules/smoothing/__init__.py +1 -1
torchzero/modules/smoothing/sampling.py +300 -0
torchzero/modules/step_size/__init__.py +1 -1
torchzero/modules/step_size/adaptive.py +312 -47
torchzero/modules/termination/__init__.py +14 -0
torchzero/modules/termination/termination.py +207 -0
torchzero/modules/trust_region/__init__.py +5 -0
torchzero/modules/trust_region/cubic_regularization.py +170 -0
torchzero/modules/trust_region/dogleg.py +92 -0
torchzero/modules/trust_region/levenberg_marquardt.py +128 -0
torchzero/modules/trust_region/trust_cg.py +97 -0
torchzero/modules/trust_region/trust_region.py +350 -0
torchzero/modules/variance_reduction/__init__.py +1 -0
torchzero/modules/variance_reduction/svrg.py +208 -0
torchzero/modules/weight_decay/weight_decay.py +65 -64
torchzero/modules/zeroth_order/__init__.py +1 -0
torchzero/modules/zeroth_order/cd.py +359 -0
torchzero/optim/root.py +65 -0
torchzero/optim/utility/split.py +8 -8
torchzero/optim/wrappers/directsearch.py +0 -1
torchzero/optim/wrappers/fcmaes.py +3 -2
torchzero/optim/wrappers/nlopt.py +0 -2
torchzero/optim/wrappers/optuna.py +2 -2
torchzero/optim/wrappers/scipy.py +81 -22
torchzero/utils/__init__.py +40 -4
torchzero/utils/compile.py +1 -1
torchzero/utils/derivatives.py +123 -111
torchzero/utils/linalg/__init__.py +9 -2
torchzero/utils/linalg/linear_operator.py +329 -0
torchzero/utils/linalg/matrix_funcs.py +2 -2
torchzero/utils/linalg/orthogonalize.py +2 -1
torchzero/utils/linalg/qr.py +2 -2
torchzero/utils/linalg/solve.py +226 -154
torchzero/utils/metrics.py +83 -0
torchzero/utils/python_tools.py +6 -0
torchzero/utils/tensorlist.py +105 -34
torchzero/utils/torch_tools.py +9 -4
torchzero-0.3.13.dist-info/METADATA +14 -0
torchzero-0.3.13.dist-info/RECORD +166 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/top_level.txt +0 -1
docs/source/conf.py +0 -59
docs/source/docstring template.py +0 -46
torchzero/modules/experimental/absoap.py +0 -253
torchzero/modules/experimental/adadam.py +0 -118
torchzero/modules/experimental/adamY.py +0 -131
torchzero/modules/experimental/adam_lambertw.py +0 -149
torchzero/modules/experimental/adaptive_step_size.py +0 -90
torchzero/modules/experimental/adasoap.py +0 -177
torchzero/modules/experimental/cosine.py +0 -214
torchzero/modules/experimental/cubic_adam.py +0 -97
torchzero/modules/experimental/eigendescent.py +0 -120
torchzero/modules/experimental/etf.py +0 -195
torchzero/modules/experimental/exp_adam.py +0 -113
torchzero/modules/experimental/expanded_lbfgs.py +0 -141
torchzero/modules/experimental/hnewton.py +0 -85
torchzero/modules/experimental/modular_lbfgs.py +0 -265
torchzero/modules/experimental/parabolic_search.py +0 -220
torchzero/modules/experimental/subspace_preconditioners.py +0 -145
torchzero/modules/experimental/tensor_adagrad.py +0 -42
torchzero/modules/line_search/polynomial.py +0 -233
torchzero/modules/momentum/matrix_momentum.py +0 -193
torchzero/modules/optimizers/adagrad.py +0 -165
torchzero/modules/quasi_newton/trust_region.py +0 -397
torchzero/modules/smoothing/gaussian.py +0 -198
torchzero-0.3.11.dist-info/METADATA +0 -404
torchzero-0.3.11.dist-info/RECORD +0 -159
torchzero-0.3.11.dist-info/licenses/LICENSE +0 -21
/torchzero/modules/{optimizers → adaptive}/lion.py +0 -0
/torchzero/modules/{optimizers → adaptive}/orthograd.py +0 -0
/torchzero/modules/{optimizers → adaptive}/rmsprop.py +0 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/WHEEL +0 -0

torchzero/utils/linalg/linear_operator.py ADDED Viewed

@@ -0,0 +1,329 @@
+"""simplified version of https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.LinearOperator.html. This is used for trust regions."""
+import math
+from abc import ABC, abstractmethod
+from functools import partial
+from importlib.util import find_spec
+from typing import cast, final
+import torch
+from ..torch_tools import tofloat, tonumpy, totensor
+if find_spec('scipy') is not None:
+    from scipy.sparse.linalg import LinearOperator as _ScipyLinearOperator
+else:
+    _ScipyLinearOperator = None
+class LinearOperator(ABC):
+    """this is used for trust region"""
+    device: torch.types.Device
+    dtype: torch.dtype | None
+    def matvec(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement matvec")
+    def rmatvec(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement rmatvec")
+    def matmat(self, x: torch.Tensor) -> "LinearOperator":
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement matmul")
+    def solve(self, b: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement solve")
+    def solve_bounded(self, b: torch.Tensor, bound:float, ord:float=2) -> torch.Tensor:
+        """solve with a norm bound on x"""
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement solve_bounded")
+    def update(self, *args, **kwargs) -> None:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement update")
+    def add(self, x: torch.Tensor) -> "LinearOperator":
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement add")
+    def __add__(self, x: torch.Tensor) -> "LinearOperator":
+        return self.add(x)
+    def add_diagonal(self, x: torch.Tensor | float) -> "LinearOperator":
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement add_diagonal")
+    def diagonal(self) -> torch.Tensor:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement diagonal")
+    def inv(self) -> "LinearOperator":
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement inverse")
+    def transpose(self) -> "LinearOperator":
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement transpose")
+    @property
+    def T(self): return self.transpose()
+    def to_tensor(self) -> torch.Tensor:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement to_tensor")
+    def to_dense(self) -> "Dense":
+        return Dense(self) # calls to_tensor
+    def size(self) -> tuple[int, ...]:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement size")
+    @property
+    def shape(self) -> tuple[int, ...]:
+        return self.size()
+    def numel(self) -> int:
+        return math.prod(self.size())
+    def ndimension(self) -> int:
+        return len(self.size())
+    @property
+    def ndim(self) -> int:
+        return self.ndimension()
+    def _numpy_matvec(self, x, dtype=None):
+        """returns Ax ndarray for scipy's LinearOperator"""
+        Ax = self.matvec(totensor(x, device=self.device, dtype=self.dtype))
+        Ax = tonumpy(Ax)
+        if dtype is not None: Ax = Ax.astype(dtype)
+        return Ax
+    def _numpy_rmatvec(self, x, dtype=None):
+        """returns Ax ndarray for scipy's LinearOperator"""
+        Ax = self.rmatvec(totensor(x, device=self.device, dtype=self.dtype))
+        Ax = tonumpy(Ax)
+        if dtype is not None: Ax = Ax.astype(dtype)
+        return Ax
+    def scipy_linop(self, dtype=None):
+        if _ScipyLinearOperator is None: raise ModuleNotFoundError("Scipy needs to be installed")
+        return _ScipyLinearOperator(
+            dtype=dtype,
+            shape=self.size(),
+            matvec=partial(self._numpy_matvec, dtype=dtype), # pyright:ignore[reportCallIssue]
+            rmatvec=partial(self._numpy_rmatvec, dtype=dtype), # pyright:ignore[reportCallIssue]
+        )
+    def is_dense(self) -> bool:
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement is_dense")
+def _solve(A: torch.Tensor, b: torch.Tensor) -> torch.Tensor: # should I keep this or separate solve and lstsq?
+    sol, info = torch.linalg.solve_ex(A, b) # pylint:disable=not-callable
+    if info == 0: return sol
+    return torch.linalg.lstsq(A, b).solution # pylint:disable=not-callable
+def _inv(A: torch.Tensor) -> torch.Tensor:
+    sol, info = torch.linalg.inv_ex(A) # pylint:disable=not-callable
+    if info == 0: return sol
+    return torch.linalg.pinv(A) # pylint:disable=not-callable
+class Dense(LinearOperator):
+    def __init__(self, A: torch.Tensor | LinearOperator):
+        if isinstance(A, LinearOperator): A = A.to_tensor()
+        self.A: torch.Tensor = A
+        self.device = self.A.device
+        self.dtype = self.A.dtype
+    def matvec(self, x): return self.A.mv(x)
+    def rmatvec(self, x): return self.A.mH.mv(x)
+    def matmat(self, x): return Dense(self.A.mm(x))
+    def rmatmat(self, x): return Dense(self.A.mH.mm(x))
+    def solve(self, b): return _solve(self.A, b)
+    def add(self, x): return Dense(self.A + x)
+    def add_diagonal(self, x):
+        if isinstance(x, torch.Tensor) and x.numel() <= 1: x = x.item()
+        if isinstance(x, (int,float)): x = torch.full((self.shape[0],), fill_value=x, device=self.A.device, dtype=self.A.dtype)
+        return Dense(self.A + torch.diag_embed(x))
+    def diagonal(self): return self.A.diagonal()
+    def inv(self): return Dense(_inv(self.A)) # pylint:disable=not-callable
+    def to_tensor(self): return self.A
+    def size(self): return self.A.size()
+    def is_dense(self): return True
+    def transpose(self): return Dense(self.A.mH)
+class DenseInverse(LinearOperator):
+    """Represents inverse of a dense matrix A."""
+    def __init__(self, A_inv: torch.Tensor):
+        self.A_inv: torch.Tensor = A_inv
+        self.device = self.A_inv.device
+        self.dtype = self.A_inv.dtype
+    def matvec(self, x): return _solve(self.A_inv, x) # pylint:disable=not-callable
+    def rmatvec(self, x): return _solve(self.A_inv.mH, x) # pylint:disable=not-callable
+    def matmat(self, x): return Dense(_solve(self.A_inv, x)) # pylint:disable=not-callable
+    def rmatmat(self, x): return Dense(_solve(self.A_inv.mH, x)) # pylint:disable=not-callable
+    def solve(self, b): return self.A_inv.mv(b)
+    def inv(self): return Dense(self.A_inv) # pylint:disable=not-callable
+    def to_tensor(self): return _inv(self.A_inv) # pylint:disable=not-callable
+    def size(self): return self.A_inv.size()
+    def is_dense(self): return True
+    def transpose(self): return DenseInverse(self.A_inv.mH)
+class DenseWithInverse(Dense):
+    """Represents a matrix where both the matrix and the inverse are known.
+    ``matmat``, ``rmatmat``, ``add`` and ``add_diagonal`` will return a Dense matrix, inverse will be lost.
+    """
+    def __init__(self, A: torch.Tensor, A_inv: torch.Tensor):
+        super().__init__(A)
+        self.A_inv: torch.Tensor = A_inv
+    def solve(self, b): return self.A_inv.mv(b)
+    def inv(self): return DenseWithInverse(self.A_inv, self.A) # pylint:disable=not-callable
+    def transpose(self): return DenseWithInverse(self.A.mH, self.A_inv.mH)
+class Diagonal(LinearOperator):
+    def __init__(self, x: torch.Tensor):
+        assert x.ndim == 1
+        self.A: torch.Tensor = x
+        self.device = self.A.device
+        self.dtype = self.A.dtype
+    def matvec(self, x): return self.A * x
+    def rmatvec(self, x): return self.A * x
+    def matmat(self, x): return Dense(x * self.A.unsqueeze(-1))
+    def rmatmat(self, x): return Dense(x * self.A.unsqueeze(-1))
+    def solve(self, b): return b/self.A
+    def add(self, x): return Dense(x + self.A.diag_embed())
+    def add_diagonal(self, x): return Diagonal(self.A + x)
+    def diagonal(self): return self.A
+    def inv(self): return Diagonal(1/self.A)
+    def to_tensor(self): return self.A.diag_embed()
+    def size(self): return (self.A.numel(), self.A.numel())
+    def is_dense(self): return False
+    def transpose(self): return Diagonal(self.A)
+class ScaledIdentity(LinearOperator):
+    def __init__(self, s: float | torch.Tensor = 1., shape=None, device=None, dtype=None):
+        self.device = self.dtype = None
+        if isinstance(s, torch.Tensor):
+            self.device = s.device
+            self.dtype = s.dtype
+        if device is not None: self.device = device
+        if dtype is not None: self.dtype = dtype
+        self.s = tofloat(s)
+        self._shape = shape
+    def matvec(self, x): return x * self.s
+    def rmatvec(self, x): return x * self.s
+    def matmat(self, x): return Dense(x * self.s)
+    def rmatmat(self, x): return Dense(x * self.s)
+    def solve(self, b): return b / self.s
+    def solve_bounded(self, b, bound, ord = 2):
+        b_norm = torch.linalg.vector_norm(b, ord=ord) # pylint:disable=not-callable
+        sol = b / self.s
+        sol_norm = b_norm / abs(self.s)
+        if sol_norm > bound:
+            if not math.isfinite(sol_norm):
+                if b_norm > bound: return b * (bound / b_norm)
+                return b
+            return sol * (bound / sol_norm)
+        return sol
+    def add(self, x): return Dense(x + self.s)
+    def add_diagonal(self, x):
+        if isinstance(x, torch.Tensor) and x.numel() <= 1: x = x.item()
+        if isinstance(x, (int,float)): return ScaledIdentity(x + self.s, shape=self._shape, device=self.device, dtype=self.dtype)
+        return Diagonal(x + self.s)
+    def diagonal(self):
+        if self._shape is None: raise RuntimeError("Shape is None")
+        return torch.full(self._shape, fill_value=self.s, device=self.device, dtype=self.dtype)
+    def inv(self): return ScaledIdentity(1 / self.s, shape=self._shape, device=self.device, dtype=self.dtype)
+    def to_tensor(self):
+        if self._shape is None: raise RuntimeError("Shape is None")
+        return torch.eye(*self.shape, device=self.device, dtype=self.dtype).mul_(self.s)
+    def size(self):
+        if self._shape is None: raise RuntimeError("Shape is None")
+        return self._shape
+    def __repr__(self):
+        return f"ScaledIdentity(s={self.s}, shape={self._shape}, dtype={self.dtype}, device={self.device})"
+    def is_dense(self): return False
+    def transpose(self): return ScaledIdentity(self.s, shape=self.shape, device=self.device, dtype=self.dtype)
+class AtA(LinearOperator):
+    def __init__(self, A: torch.Tensor):
+        self.A = A
+    def matvec(self, x): return self.A.mH.mv(self.A.mv(x))
+    def rmatvec(self, x): return self.matvec(x)
+    def matmat(self, x): return Dense(torch.linalg.multi_dot([self.A.mH, self.A, x])) # pylint:disable=not-callable
+    def rmatmat(self, x): return Dense(torch.linalg.multi_dot([self.A.mH, self.A, x])) # pylint:disable=not-callable
+    def is_dense(self): return False
+    def to_tensor(self): return self.A.mH @ self.A
+    def transpose(self): return AtA(self.A)
+    def add_diagonal(self, x):
+        if isinstance(x, torch.Tensor) and x.numel() <= 1: x = x.item()
+        if isinstance(x, (int,float)): x = torch.full((self.shape[0],), fill_value=x, device=self.A.device, dtype=self.A.dtype)
+        return Dense(self.to_tensor() + torch.diag_embed(x))
+    def solve(self, b):
+        return Dense(self.to_tensor()).solve(b)
+    def inv(self):
+        return Dense(self.to_tensor()).inv()
+    def diagonal(self):
+        return self.A.pow(2).sum(1)
+    def size(self):
+        n = self.A.size(1)
+        return (n,n)
+class AAT(LinearOperator):
+    def __init__(self, A: torch.Tensor):
+        self.A = A
+    def matvec(self, x): return self.A.mv(self.A.mH.mv(x))
+    def rmatvec(self, x): return self.matvec(x)
+    def matmat(self, x): return Dense(torch.linalg.multi_dot([self.A, self.A.mH, x])) # pylint:disable=not-callable
+    def rmatmat(self, x): return Dense(torch.linalg.multi_dot([self.A, self.A.mH, x])) # pylint:disable=not-callable
+    def is_dense(self): return False
+    def to_tensor(self): return self.A @ self.A.mH
+    def transpose(self): return AAT(self.A)
+    def add_diagonal(self, x):
+        if isinstance(x, torch.Tensor) and x.numel() <= 1: x = x.item()
+        if isinstance(x, (int,float)): x = torch.full((self.shape[0],), fill_value=x, device=self.A.device, dtype=self.A.dtype)
+        return Dense(self.to_tensor() + torch.diag_embed(x))
+    def solve(self, b):
+        return Dense(self.to_tensor()).solve(b)
+    def inv(self):
+        return Dense(self.to_tensor()).inv()
+    def diagonal(self):
+        return self.A.pow(2).sum(0)
+    def size(self):
+        n = self.A.size(1)
+        return (n,n)

torchzero/utils/linalg/matrix_funcs.py CHANGED Viewed

@@ -15,13 +15,13 @@ def singular_vals_func(A: torch.Tensor, fn: Callable[[torch.Tensor], torch.Tenso
 def matrix_power_eigh(A: torch.Tensor, pow:float):
     L, Q = torch.linalg.eigh(A) # pylint:disable=not-callable
-    if pow % 2 != 0: L.clip_(min = torch.finfo(A.dtype).eps)
+    if pow % 2 != 0: L.clip_(min = torch.finfo(A.dtype).tiny * 2)
     return (Q * L.pow(pow).unsqueeze(-2)) @ Q.mH
 def inv_sqrt_2x2(A: torch.Tensor, force_pd: bool=False) -> torch.Tensor:
     """Inverse square root of a possibly batched 2x2 matrix using a general formula for 2x2 matrices so that this is way faster than torch linalg. I tried doing a hierarchical 2x2 preconditioning but it didn't work well."""
-    eps = torch.finfo(A.dtype).eps
+    eps = torch.finfo(A.dtype).tiny * 2
     a = A[..., 0, 0]
     b = A[..., 0, 1]

torchzero/utils/linalg/orthogonalize.py CHANGED Viewed

@@ -8,4 +8,5 @@ def gram_schmidt(x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.
 def gram_schmidt(x: TensorList, y: TensorList) -> tuple[TensorList, TensorList]: ...
 def gram_schmidt(x, y):
     """makes two orthogonal vectors, only y is changed"""
-    return x, y - (x*y) / ((x*x) + 1e-8)
+    min = torch.finfo(x.dtype).tiny * 2
+    return x, y - (x*y) / (x*x).clip(min=min)

torchzero/utils/linalg/qr.py CHANGED Viewed

@@ -20,7 +20,7 @@ def _get_w_tau(R: torch.Tensor, i: int, eps: float):
 def _qr_householder_complete(A:torch.Tensor):
     *b,m,n = A.shape
     k = min(m,n)
-    eps = torch.finfo(A.dtype).eps
+    eps = torch.finfo(A.dtype).tiny * 2
     Q = torch.eye(m, dtype=A.dtype, device=A.device).expand(*b, m, m).clone() # clone because expanded dims refer to same memory
     R = A.clone()
@@ -36,7 +36,7 @@ def _qr_householder_complete(A:torch.Tensor):
 def _qr_householder_reduced(A:torch.Tensor):
     *b,m,n = A.shape
     k = min(m,n)
-    eps = torch.finfo(A.dtype).eps
+    eps = torch.finfo(A.dtype).tiny * 2
     R = A.clone()

torchzero 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl