PyPI - torchzero - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

tests/test_opts.py +95 -69
tests/test_tensorlist.py +8 -7
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +2 -2
torchzero/core/module.py +225 -72
torchzero/core/reformulation.py +65 -0
torchzero/core/transform.py +44 -24
torchzero/modules/__init__.py +13 -5
torchzero/modules/{optimizers → adaptive}/__init__.py +5 -2
torchzero/modules/adaptive/adagrad.py +356 -0
torchzero/modules/{optimizers → adaptive}/adahessian.py +53 -52
torchzero/modules/{optimizers → adaptive}/adam.py +0 -3
torchzero/modules/{optimizers → adaptive}/adan.py +26 -40
torchzero/modules/{optimizers → adaptive}/adaptive_heavyball.py +3 -6
torchzero/modules/adaptive/aegd.py +54 -0
torchzero/modules/{optimizers → adaptive}/esgd.py +1 -1
torchzero/modules/{optimizers/ladagrad.py → adaptive/lmadagrad.py} +42 -39
torchzero/modules/{optimizers → adaptive}/mars.py +24 -36
torchzero/modules/adaptive/matrix_momentum.py +146 -0
torchzero/modules/{optimizers → adaptive}/msam.py +14 -12
torchzero/modules/{optimizers → adaptive}/muon.py +19 -20
torchzero/modules/adaptive/natural_gradient.py +175 -0
torchzero/modules/{optimizers → adaptive}/rprop.py +0 -2
torchzero/modules/{optimizers → adaptive}/sam.py +1 -1
torchzero/modules/{optimizers → adaptive}/shampoo.py +8 -4
torchzero/modules/{optimizers → adaptive}/soap.py +27 -50
torchzero/modules/{optimizers → adaptive}/sophia_h.py +2 -3
torchzero/modules/clipping/clipping.py +85 -92
torchzero/modules/clipping/ema_clipping.py +5 -5
torchzero/modules/conjugate_gradient/__init__.py +11 -0
torchzero/modules/{quasi_newton → conjugate_gradient}/cg.py +355 -369
torchzero/modules/experimental/__init__.py +9 -32
torchzero/modules/experimental/dct.py +2 -2
torchzero/modules/experimental/fft.py +2 -2
torchzero/modules/experimental/gradmin.py +4 -3
torchzero/modules/experimental/l_infinity.py +111 -0
torchzero/modules/{momentum/experimental.py → experimental/momentum.py} +3 -40
torchzero/modules/experimental/newton_solver.py +79 -17
torchzero/modules/experimental/newtonnewton.py +27 -14
torchzero/modules/experimental/scipy_newton_cg.py +105 -0
torchzero/modules/experimental/structural_projections.py +1 -1
torchzero/modules/functional.py +50 -14
torchzero/modules/grad_approximation/fdm.py +19 -20
torchzero/modules/grad_approximation/forward_gradient.py +4 -2
torchzero/modules/grad_approximation/grad_approximator.py +43 -47
torchzero/modules/grad_approximation/rfdm.py +144 -122
torchzero/modules/higher_order/__init__.py +1 -1
torchzero/modules/higher_order/higher_order_newton.py +31 -23
torchzero/modules/least_squares/__init__.py +1 -0
torchzero/modules/least_squares/gn.py +161 -0
torchzero/modules/line_search/__init__.py +2 -2
torchzero/modules/line_search/_polyinterp.py +289 -0
torchzero/modules/line_search/adaptive.py +69 -44
torchzero/modules/line_search/backtracking.py +83 -70
torchzero/modules/line_search/line_search.py +159 -68
torchzero/modules/line_search/scipy.py +1 -1
torchzero/modules/line_search/strong_wolfe.py +319 -218
torchzero/modules/misc/__init__.py +8 -0
torchzero/modules/misc/debug.py +4 -4
torchzero/modules/misc/escape.py +9 -7
torchzero/modules/misc/gradient_accumulation.py +88 -22
torchzero/modules/misc/homotopy.py +59 -0
torchzero/modules/misc/misc.py +82 -15
torchzero/modules/misc/multistep.py +47 -11
torchzero/modules/misc/regularization.py +5 -9
torchzero/modules/misc/split.py +55 -35
torchzero/modules/misc/switch.py +1 -1
torchzero/modules/momentum/__init__.py +1 -5
torchzero/modules/momentum/averaging.py +3 -3
torchzero/modules/momentum/cautious.py +42 -47
torchzero/modules/momentum/momentum.py +35 -1
torchzero/modules/ops/__init__.py +9 -1
torchzero/modules/ops/binary.py +9 -8
torchzero/modules/{momentum/ema.py → ops/higher_level.py} +10 -33
torchzero/modules/ops/multi.py +15 -15
torchzero/modules/ops/reduce.py +1 -1
torchzero/modules/ops/utility.py +12 -8
torchzero/modules/projections/projection.py +4 -4
torchzero/modules/quasi_newton/__init__.py +1 -16
torchzero/modules/quasi_newton/damping.py +105 -0
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +167 -163
torchzero/modules/quasi_newton/lbfgs.py +256 -200
torchzero/modules/quasi_newton/lsr1.py +167 -132
torchzero/modules/quasi_newton/quasi_newton.py +346 -446
torchzero/modules/restarts/__init__.py +7 -0
torchzero/modules/restarts/restars.py +252 -0
torchzero/modules/second_order/__init__.py +2 -1
torchzero/modules/second_order/multipoint.py +238 -0
torchzero/modules/second_order/newton.py +133 -88
torchzero/modules/second_order/newton_cg.py +141 -80
torchzero/modules/smoothing/__init__.py +1 -1
torchzero/modules/smoothing/sampling.py +300 -0
torchzero/modules/step_size/__init__.py +1 -1
torchzero/modules/step_size/adaptive.py +312 -47
torchzero/modules/termination/__init__.py +14 -0
torchzero/modules/termination/termination.py +207 -0
torchzero/modules/trust_region/__init__.py +5 -0
torchzero/modules/trust_region/cubic_regularization.py +170 -0
torchzero/modules/trust_region/dogleg.py +92 -0
torchzero/modules/trust_region/levenberg_marquardt.py +128 -0
torchzero/modules/trust_region/trust_cg.py +97 -0
torchzero/modules/trust_region/trust_region.py +350 -0
torchzero/modules/variance_reduction/__init__.py +1 -0
torchzero/modules/variance_reduction/svrg.py +208 -0
torchzero/modules/weight_decay/weight_decay.py +65 -64
torchzero/modules/zeroth_order/__init__.py +1 -0
torchzero/modules/zeroth_order/cd.py +359 -0
torchzero/optim/root.py +65 -0
torchzero/optim/utility/split.py +8 -8
torchzero/optim/wrappers/directsearch.py +0 -1
torchzero/optim/wrappers/fcmaes.py +3 -2
torchzero/optim/wrappers/nlopt.py +0 -2
torchzero/optim/wrappers/optuna.py +2 -2
torchzero/optim/wrappers/scipy.py +81 -22
torchzero/utils/__init__.py +40 -4
torchzero/utils/compile.py +1 -1
torchzero/utils/derivatives.py +123 -111
torchzero/utils/linalg/__init__.py +9 -2
torchzero/utils/linalg/linear_operator.py +329 -0
torchzero/utils/linalg/matrix_funcs.py +2 -2
torchzero/utils/linalg/orthogonalize.py +2 -1
torchzero/utils/linalg/qr.py +2 -2
torchzero/utils/linalg/solve.py +226 -154
torchzero/utils/metrics.py +83 -0
torchzero/utils/python_tools.py +6 -0
torchzero/utils/tensorlist.py +105 -34
torchzero/utils/torch_tools.py +9 -4
torchzero-0.3.13.dist-info/METADATA +14 -0
torchzero-0.3.13.dist-info/RECORD +166 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/top_level.txt +0 -1
docs/source/conf.py +0 -59
docs/source/docstring template.py +0 -46
torchzero/modules/experimental/absoap.py +0 -253
torchzero/modules/experimental/adadam.py +0 -118
torchzero/modules/experimental/adamY.py +0 -131
torchzero/modules/experimental/adam_lambertw.py +0 -149
torchzero/modules/experimental/adaptive_step_size.py +0 -90
torchzero/modules/experimental/adasoap.py +0 -177
torchzero/modules/experimental/cosine.py +0 -214
torchzero/modules/experimental/cubic_adam.py +0 -97
torchzero/modules/experimental/eigendescent.py +0 -120
torchzero/modules/experimental/etf.py +0 -195
torchzero/modules/experimental/exp_adam.py +0 -113
torchzero/modules/experimental/expanded_lbfgs.py +0 -141
torchzero/modules/experimental/hnewton.py +0 -85
torchzero/modules/experimental/modular_lbfgs.py +0 -265
torchzero/modules/experimental/parabolic_search.py +0 -220
torchzero/modules/experimental/subspace_preconditioners.py +0 -145
torchzero/modules/experimental/tensor_adagrad.py +0 -42
torchzero/modules/line_search/polynomial.py +0 -233
torchzero/modules/momentum/matrix_momentum.py +0 -193
torchzero/modules/optimizers/adagrad.py +0 -165
torchzero/modules/quasi_newton/trust_region.py +0 -397
torchzero/modules/smoothing/gaussian.py +0 -198
torchzero-0.3.11.dist-info/METADATA +0 -404
torchzero-0.3.11.dist-info/RECORD +0 -159
torchzero-0.3.11.dist-info/licenses/LICENSE +0 -21
/torchzero/modules/{optimizers → adaptive}/lion.py +0 -0
/torchzero/modules/{optimizers → adaptive}/orthograd.py +0 -0
/torchzero/modules/{optimizers → adaptive}/rmsprop.py +0 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.13.dist-info}/WHEEL +0 -0

torchzero/modules/quasi_newton/damping.py ADDED Viewed

@@ -0,0 +1,105 @@
+import math
+from typing import Literal, Protocol, overload
+import torch
+from ...utils import TensorList
+from ...utils.linalg.linear_operator import DenseInverse, LinearOperator
+from ..functional import safe_clip
+class DampingStrategy(Protocol):
+    def __call__(
+        self,
+        s: torch.Tensor,
+        y: torch.Tensor,
+        g: torch.Tensor,
+        H: LinearOperator,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        return s, y
+def _sy_Hs_sHs(s:torch.Tensor, y:torch.Tensor, H:LinearOperator):
+    if isinstance(H, DenseInverse):
+        Hs = H.solve(y)
+        sHs = y.dot(Hs)
+    else:
+        Hs = H.matvec(s)
+        sHs = s.dot(Hs)
+    return s.dot(y), Hs, sHs
+def powell_damping(s:torch.Tensor, y:torch.Tensor, g:torch.Tensor, H:LinearOperator, u=0.2):
+    # here H is hessian! not the inverse
+    sy, Hs, sHs = _sy_Hs_sHs(s, y, H)
+    if sy < u*sHs:
+        phi = ((1-u) * sHs) / safe_clip((sHs - sy))
+        s = phi * s + (1 - phi) * Hs
+    return s, y
+def double_damping(s:torch.Tensor, y:torch.Tensor, g:torch.Tensor, H:LinearOperator, u1=0.2, u2=1/3):
+    # Goldfarb, Donald, Yi Ren, and Achraf Bahamou. "Practical quasi-newton methods for training deep neural networks." Advances in Neural Information Processing Systems 33 (2020): 2386-2396.
+    # Powell’s damping on H
+    sy, Hs, sHs = _sy_Hs_sHs(s, y, H)
+    if sy < u1*sHs:
+        phi = ((1-u1) * sHs) / safe_clip(sHs - sy)
+        s = phi * s + (1 - phi) * Hs
+    # Powell’s damping with B = I
+    sy = s.dot(y)
+    ss = s.dot(s)
+    if sy < u2*ss:
+        phi = ((1-u2) * ss) / safe_clip(ss - sy)
+        y = phi * y + (1 - phi) * s
+    return s, y
+_DAMPING_KEYS = Literal["powell", "double"]
+_DAMPING_STRATEGIES: dict[_DAMPING_KEYS, DampingStrategy] = {
+    "powell": powell_damping,
+    "double": double_damping,
+}
+DampingStrategyType = _DAMPING_KEYS | DampingStrategy | None
+@overload
+def apply_damping(
+    strategy: DampingStrategyType,
+    s: torch.Tensor,
+    y: torch.Tensor,
+    g: torch.Tensor,
+    H: LinearOperator,
+) -> tuple[torch.Tensor, torch.Tensor]: ...
+@overload
+def apply_damping(
+    strategy: DampingStrategyType,
+    s: TensorList,
+    y: TensorList,
+    g: TensorList,
+    H: LinearOperator,
+) -> tuple[TensorList, TensorList]: ...
+def apply_damping(
+    strategy: DampingStrategyType,
+    s,
+    y,
+    g,
+    H: LinearOperator,
+):
+    if strategy is None: return s, y
+    if isinstance(strategy, str): strategy = _DAMPING_STRATEGIES[strategy]
+    if isinstance(s, TensorList):
+        assert isinstance(y, TensorList) and isinstance(g, TensorList)
+        s_vec, y_vec = strategy(s.to_vec(), y.to_vec(), g.to_vec(), H)
+        return s.from_vec(s_vec), y.from_vec(y_vec)
+    assert isinstance(y, torch.Tensor) and isinstance(g, torch.Tensor)
+    return strategy(s, y, g, H)

torchzero/modules/quasi_newton/diagonal_quasi_newton.py CHANGED Viewed

@@ -1,163 +1,167 @@
-from collections.abc import Callable
-import torch
-from .quasi_newton import (
-    HessianUpdateStrategy,
-    _HessianUpdateStrategyDefaults,
-    _InverseHessianUpdateStrategyDefaults,
-    _safe_clip,
-)
-def _diag_Bv(self: HessianUpdateStrategy):
-    B, is_inverse = self.get_B()
-    if is_inverse:
-        H=B
-        def Hxv(v): return v/H
-        return Hxv
-    def Bv(v): return B*v
-    return Bv
-def _diag_Hv(self: HessianUpdateStrategy):
-    H, is_inverse = self.get_H()
-    if is_inverse:
-        B=H
-        def Bxv(v): return v/B
-        return Bxv
-    def Hv(v): return H*v
-    return Hv
-def diagonal_bfgs_H_(H:torch.Tensor, s: torch.Tensor, y:torch.Tensor, tol: float):
-    sy = s.dot(y)
-    if sy < tol: return H
-    sy_sq = _safe_clip(sy**2)
-    num1 = (sy + (y * H * y)) * s*s
-    term1 = num1.div_(sy_sq)
-    num2 = (H * y * s).add_(s * y * H)
-    term2 = num2.div_(sy)
-    H += term1.sub_(term2)
-    return H
-class DiagonalBFGS(_InverseHessianUpdateStrategyDefaults):
-    """Diagonal BFGS. This is simply BFGS with only the diagonal being updated and used. It doesn't satisfy the secant equation but may still be useful."""
-    def update_H(self, H, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_bfgs_H_(H=H, s=s, y=y, tol=setting['tol'])
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
-def diagonal_sr1_(H:torch.Tensor, s: torch.Tensor, y:torch.Tensor, tol:float):
-    z = s - H*y
-    denom = z.dot(y)
-    z_norm = torch.linalg.norm(z) # pylint:disable=not-callable
-    y_norm = torch.linalg.norm(y) # pylint:disable=not-callable
-    # if y_norm*z_norm < tol: return H
-    # check as in Nocedal, Wright. “Numerical optimization” 2nd p.146
-    if denom.abs() <= tol * y_norm * z_norm: return H # pylint:disable=not-callable
-    H += (z*z).div_(_safe_clip(denom))
-    return H
-class DiagonalSR1(_InverseHessianUpdateStrategyDefaults):
-    """Diagonal SR1. This is simply SR1 with only the diagonal being updated and used. It doesn't satisfy the secant equation but may still be useful."""
-    def update_H(self, H, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_sr1_(H=H, s=s, y=y, tol=setting['tol'])
-    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_sr1_(H=B, s=y, y=s, tol=setting['tol'])
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
-# Zhu M., Nazareth J. L., Wolkowicz H. The quasi-Cauchy relation and diagonal updating //SIAM Journal on Optimization. – 1999. – Т. 9. – №. 4. – С. 1192-1204.
-def diagonal_qc_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
-    denom = _safe_clip((s**4).sum())
-    num = s.dot(y) - (s*B).dot(s)
-    B += s**2 * (num/denom)
-    return B
-class DiagonalQuasiCauchi(_HessianUpdateStrategyDefaults):
-    """Diagonal quasi-cauchi method.
-    Reference:
-        Zhu M., Nazareth J. L., Wolkowicz H. The quasi-Cauchy relation and diagonal updating //SIAM Journal on Optimization. – 1999. – Т. 9. – №. 4. – С. 1192-1204.
-    """
-    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_qc_B_(B=B, s=s, y=y)
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
-# Leong, Wah June, Sharareh Enshaei, and Sie Long Kek. "Diagonal quasi-Newton methods via least change updating principle with weighted Frobenius norm." Numerical Algorithms 86 (2021): 1225-1241.
-def diagonal_wqc_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
-    E_sq = s**2 * B**2
-    denom = _safe_clip((s*E_sq).dot(s))
-    num = s.dot(y) - (s*B).dot(s)
-    B += E_sq * (num/denom)
-    return B
-class DiagonalWeightedQuasiCauchi(_HessianUpdateStrategyDefaults):
-    """Diagonal quasi-cauchi method.
-    Reference:
-        Leong, Wah June, Sharareh Enshaei, and Sie Long Kek. "Diagonal quasi-Newton methods via least change updating principle with weighted Frobenius norm." Numerical Algorithms 86 (2021): 1225-1241.
-    """
-    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_wqc_B_(B=B, s=s, y=y)
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
-# Andrei, Neculai. "A diagonal quasi-Newton updating method for unconstrained optimization." Numerical Algorithms 81.2 (2019): 575-590.
-def dnrtr_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
-    denom = _safe_clip((s**4).sum())
-    num = s.dot(y) + s.dot(s) - (s*B).dot(s)
-    B += s**2 * (num/denom) - 1
-    return B
-class DNRTR(_HessianUpdateStrategyDefaults):
-    """Diagonal quasi-newton method.
-    Reference:
-        Andrei, Neculai. "A diagonal quasi-Newton updating method for unconstrained optimization." Numerical Algorithms 81.2 (2019): 575-590.
-    """
-    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
-        return diagonal_wqc_B_(B=B, s=s, y=y)
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
-# Nosrati, Mahsa, and Keyvan Amini. "A new diagonal quasi-Newton algorithm for unconstrained optimization problems." Applications of Mathematics 69.4 (2024): 501-512.
-def new_dqn_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
-    denom = _safe_clip((s**4).sum())
-    num = s.dot(y)
-    B += s**2 * (num/denom)
-    return B
-class NewDQN(_HessianUpdateStrategyDefaults):
-    """Diagonal quasi-newton method.
-    Reference:
-        Nosrati, Mahsa, and Keyvan Amini. "A new diagonal quasi-Newton algorithm for unconstrained optimization problems." Applications of Mathematics 69.4 (2024): 501-512.
-    """
-    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
-        return new_dqn_B_(B=B, s=s, y=y)
-    def _init_M(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
-    def make_Bv(self): return _diag_Bv(self)
-    def make_Hv(self): return _diag_Hv(self)
+from typing import Any, Literal
+from collections.abc import Callable
+import torch
+from ...core import Chainable
+from .quasi_newton import (
+    HessianUpdateStrategy,
+    _HessianUpdateStrategyDefaults,
+    _InverseHessianUpdateStrategyDefaults,
+)
+from ..functional import safe_clip
+def diagonal_bfgs_H_(H:torch.Tensor, s: torch.Tensor, y:torch.Tensor, tol: float):
+    sy = s.dot(y)
+    if sy < tol: return H
+    sy_sq = safe_clip(sy**2)
+    num1 = (sy + (y * H * y)) * s*s
+    term1 = num1.div_(sy_sq)
+    num2 = (H * y * s).add_(s * y * H)
+    term2 = num2.div_(sy)
+    H += term1.sub_(term2)
+    return H
+class DiagonalBFGS(_InverseHessianUpdateStrategyDefaults):
+    """Diagonal BFGS. This is simply BFGS with only the diagonal being updated and used. It doesn't satisfy the secant equation but may still be useful."""
+    def update_H(self, H, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_bfgs_H_(H=H, s=s, y=y, tol=setting['tol'])
+    def initialize_P(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
+def diagonal_sr1_(H:torch.Tensor, s: torch.Tensor, y:torch.Tensor, tol:float):
+    z = s - H*y
+    denom = z.dot(y)
+    z_norm = torch.linalg.norm(z) # pylint:disable=not-callable
+    y_norm = torch.linalg.norm(y) # pylint:disable=not-callable
+    # if y_norm*z_norm < tol: return H
+    # check as in Nocedal, Wright. “Numerical optimization” 2nd p.146
+    if denom.abs() <= tol * y_norm * z_norm: return H # pylint:disable=not-callable
+    H += (z*z).div_(safe_clip(denom))
+    return H
+class DiagonalSR1(_InverseHessianUpdateStrategyDefaults):
+    """Diagonal SR1. This is simply SR1 with only the diagonal being updated and used. It doesn't satisfy the secant equation but may still be useful."""
+    def update_H(self, H, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_sr1_(H=H, s=s, y=y, tol=setting['tol'])
+    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_sr1_(H=B, s=y, y=s, tol=setting['tol'])
+    def initialize_P(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
+# Zhu M., Nazareth J. L., Wolkowicz H. The quasi-Cauchy relation and diagonal updating //SIAM Journal on Optimization. – 1999. – Т. 9. – №. 4. – С. 1192-1204.
+def diagonal_qc_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
+    denom = safe_clip((s**4).sum())
+    num = s.dot(y) - (s*B).dot(s)
+    B += s**2 * (num/denom)
+    return B
+class DiagonalQuasiCauchi(_HessianUpdateStrategyDefaults):
+    """Diagonal quasi-cauchi method.
+    Reference:
+        Zhu M., Nazareth J. L., Wolkowicz H. The quasi-Cauchy relation and diagonal updating //SIAM Journal on Optimization. – 1999. – Т. 9. – №. 4. – С. 1192-1204.
+    """
+    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_qc_B_(B=B, s=s, y=y)
+    def initialize_P(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
+# Leong, Wah June, Sharareh Enshaei, and Sie Long Kek. "Diagonal quasi-Newton methods via least change updating principle with weighted Frobenius norm." Numerical Algorithms 86 (2021): 1225-1241.
+def diagonal_wqc_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
+    E_sq = s**2 * B**2
+    denom = safe_clip((s*E_sq).dot(s))
+    num = s.dot(y) - (s*B).dot(s)
+    B += E_sq * (num/denom)
+    return B
+class DiagonalWeightedQuasiCauchi(_HessianUpdateStrategyDefaults):
+    """Diagonal quasi-cauchi method.
+    Reference:
+        Leong, Wah June, Sharareh Enshaei, and Sie Long Kek. "Diagonal quasi-Newton methods via least change updating principle with weighted Frobenius norm." Numerical Algorithms 86 (2021): 1225-1241.
+    """
+    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_wqc_B_(B=B, s=s, y=y)
+    def initialize_P(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
+def _truncate(B: torch.Tensor, lb, ub):
+    return torch.where((B>lb).logical_and(B<ub), B, 1)
+# Andrei, Neculai. "A diagonal quasi-Newton updating method for unconstrained optimization." Numerical Algorithms 81.2 (2019): 575-590.
+def dnrtr_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
+    denom = safe_clip((s**4).sum())
+    num = s.dot(y) + s.dot(s) - (s*B).dot(s)
+    B += s**2 * (num/denom) - 1
+    return B
+class DNRTR(HessianUpdateStrategy):
+    """Diagonal quasi-newton method.
+    Reference:
+        Andrei, Neculai. "A diagonal quasi-Newton updating method for unconstrained optimization." Numerical Algorithms 81.2 (2019): 575-590.
+    """
+    def __init__(
+        self,
+        lb: float = 1e-2,
+        ub: float = 1e5,
+        init_scale: float | Literal["auto"] = "auto",
+        tol: float = 1e-32,
+        ptol: float | None = 1e-32,
+        ptol_restart: bool = False,
+        gtol: float | None = 1e-32,
+        restart_interval: int | None | Literal['auto'] = None,
+        beta: float | None = None,
+        update_freq: int = 1,
+        scale_first: bool = False,
+        concat_params: bool = True,
+        inner: Chainable | None = None,
+    ):
+        defaults = dict(lb=lb, ub=ub)
+        super().__init__(
+            defaults=defaults,
+            init_scale=init_scale,
+            tol=tol,
+            ptol=ptol,
+            ptol_restart=ptol_restart,
+            gtol=gtol,
+            restart_interval=restart_interval,
+            beta=beta,
+            update_freq=update_freq,
+            scale_first=scale_first,
+            concat_params=concat_params,
+            inverse=False,
+            inner=inner,
+        )
+    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
+        return diagonal_wqc_B_(B=B, s=s, y=y)
+    def modify_B(self, B, state, setting):
+        return _truncate(B, setting['lb'], setting['ub'])
+    def initialize_P(self, size:int, device, dtype, is_inverse:bool): return torch.ones(size, device=device, dtype=dtype)
+# Nosrati, Mahsa, and Keyvan Amini. "A new diagonal quasi-Newton algorithm for unconstrained optimization problems." Applications of Mathematics 69.4 (2024): 501-512.
+def new_dqn_B_(B:torch.Tensor, s: torch.Tensor, y:torch.Tensor):
+    denom = safe_clip((s**4).sum())
+    num = s.dot(y)
+    B += s**2 * (num/denom)
+    return B
+class NewDQN(DNRTR):
+    """Diagonal quasi-newton method.
+    Reference:
+        Nosrati, Mahsa, and Keyvan Amini. "A new diagonal quasi-Newton algorithm for unconstrained optimization problems." Applications of Mathematics 69.4 (2024): 501-512.
+    """
+    def update_B(self, B, s, y, p, g, p_prev, g_prev, state, setting):
+        return new_dqn_B_(B=B, s=s, y=y)

torchzero 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

torchzero 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl