PyPI - torchzero - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

docs/source/conf.py +6 -4
docs/source/docstring template.py +46 -0
tests/test_identical.py +2 -3
tests/test_opts.py +115 -68
tests/test_tensorlist.py +2 -2
tests/test_vars.py +62 -61
torchzero/core/__init__.py +2 -3
torchzero/core/module.py +185 -53
torchzero/core/transform.py +327 -159
torchzero/modules/__init__.py +3 -1
torchzero/modules/clipping/clipping.py +120 -23
torchzero/modules/clipping/ema_clipping.py +37 -22
torchzero/modules/clipping/growth_clipping.py +20 -21
torchzero/modules/experimental/__init__.py +30 -4
torchzero/modules/experimental/absoap.py +53 -156
torchzero/modules/experimental/adadam.py +22 -15
torchzero/modules/experimental/adamY.py +21 -25
torchzero/modules/experimental/adam_lambertw.py +149 -0
torchzero/modules/{line_search/trust_region.py → experimental/adaptive_step_size.py} +37 -8
torchzero/modules/experimental/adasoap.py +24 -129
torchzero/modules/experimental/cosine.py +214 -0
torchzero/modules/experimental/cubic_adam.py +97 -0
torchzero/modules/experimental/curveball.py +12 -12
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/experimental/eigendescent.py +120 -0
torchzero/modules/experimental/etf.py +195 -0
torchzero/modules/experimental/exp_adam.py +113 -0
torchzero/modules/experimental/expanded_lbfgs.py +141 -0
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/hnewton.py +85 -0
torchzero/modules/{quasi_newton/experimental → experimental}/modular_lbfgs.py +49 -50
torchzero/modules/experimental/newton_solver.py +11 -11
torchzero/modules/experimental/newtonnewton.py +92 -0
torchzero/modules/experimental/parabolic_search.py +220 -0
torchzero/modules/experimental/reduce_outward_lr.py +10 -7
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +12 -54
torchzero/modules/experimental/subspace_preconditioners.py +20 -10
torchzero/modules/experimental/tensor_adagrad.py +42 -0
torchzero/modules/functional.py +12 -2
torchzero/modules/grad_approximation/fdm.py +31 -4
torchzero/modules/grad_approximation/forward_gradient.py +17 -7
torchzero/modules/grad_approximation/grad_approximator.py +69 -24
torchzero/modules/grad_approximation/rfdm.py +310 -50
torchzero/modules/higher_order/__init__.py +1 -0
torchzero/modules/higher_order/higher_order_newton.py +319 -0
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/adaptive.py +99 -0
torchzero/modules/line_search/backtracking.py +75 -31
torchzero/modules/line_search/line_search.py +107 -49
torchzero/modules/line_search/polynomial.py +233 -0
torchzero/modules/line_search/scipy.py +20 -5
torchzero/modules/line_search/strong_wolfe.py +52 -36
torchzero/modules/misc/__init__.py +27 -0
torchzero/modules/misc/debug.py +48 -0
torchzero/modules/misc/escape.py +60 -0
torchzero/modules/misc/gradient_accumulation.py +70 -0
torchzero/modules/misc/misc.py +316 -0
torchzero/modules/misc/multistep.py +158 -0
torchzero/modules/misc/regularization.py +171 -0
torchzero/modules/misc/split.py +103 -0
torchzero/modules/{ops → misc}/switch.py +48 -7
torchzero/modules/momentum/__init__.py +1 -1
torchzero/modules/momentum/averaging.py +25 -10
torchzero/modules/momentum/cautious.py +115 -40
torchzero/modules/momentum/ema.py +92 -41
torchzero/modules/momentum/experimental.py +21 -13
torchzero/modules/momentum/matrix_momentum.py +145 -76
torchzero/modules/momentum/momentum.py +25 -4
torchzero/modules/ops/__init__.py +3 -31
torchzero/modules/ops/accumulate.py +51 -25
torchzero/modules/ops/binary.py +108 -62
torchzero/modules/ops/multi.py +95 -34
torchzero/modules/ops/reduce.py +31 -23
torchzero/modules/ops/unary.py +37 -21
torchzero/modules/ops/utility.py +53 -45
torchzero/modules/optimizers/__init__.py +12 -3
torchzero/modules/optimizers/adagrad.py +48 -29
torchzero/modules/optimizers/adahessian.py +223 -0
torchzero/modules/optimizers/adam.py +35 -37
torchzero/modules/optimizers/adan.py +110 -0
torchzero/modules/optimizers/adaptive_heavyball.py +57 -0
torchzero/modules/optimizers/esgd.py +171 -0
torchzero/modules/optimizers/ladagrad.py +183 -0
torchzero/modules/optimizers/lion.py +4 -4
torchzero/modules/optimizers/mars.py +91 -0
torchzero/modules/optimizers/msam.py +186 -0
torchzero/modules/optimizers/muon.py +32 -7
torchzero/modules/optimizers/orthograd.py +4 -5
torchzero/modules/optimizers/rmsprop.py +19 -19
torchzero/modules/optimizers/rprop.py +89 -52
torchzero/modules/optimizers/sam.py +163 -0
torchzero/modules/optimizers/shampoo.py +55 -27
torchzero/modules/optimizers/soap.py +40 -37
torchzero/modules/optimizers/sophia_h.py +82 -25
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +4 -2
torchzero/modules/projections/projection.py +212 -118
torchzero/modules/quasi_newton/__init__.py +44 -5
torchzero/modules/quasi_newton/cg.py +190 -39
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +163 -0
torchzero/modules/quasi_newton/lbfgs.py +154 -97
torchzero/modules/quasi_newton/lsr1.py +102 -58
torchzero/modules/quasi_newton/quasi_newton.py +1032 -177
torchzero/modules/quasi_newton/trust_region.py +397 -0
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/newton.py +245 -54
torchzero/modules/second_order/newton_cg.py +311 -21
torchzero/modules/second_order/nystrom.py +124 -21
torchzero/modules/smoothing/gaussian.py +55 -21
torchzero/modules/smoothing/laplacian.py +20 -12
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +122 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +126 -10
torchzero/modules/wrappers/optim_wrapper.py +40 -12
torchzero/optim/wrappers/directsearch.py +281 -0
torchzero/optim/wrappers/fcmaes.py +105 -0
torchzero/optim/wrappers/mads.py +89 -0
torchzero/optim/wrappers/nevergrad.py +20 -5
torchzero/optim/wrappers/nlopt.py +28 -14
torchzero/optim/wrappers/optuna.py +70 -0
torchzero/optim/wrappers/scipy.py +167 -16
torchzero/utils/__init__.py +3 -7
torchzero/utils/derivatives.py +5 -4
torchzero/utils/linalg/__init__.py +1 -1
torchzero/utils/linalg/solve.py +251 -12
torchzero/utils/numberlist.py +2 -0
torchzero/utils/optimizer.py +55 -74
torchzero/utils/python_tools.py +27 -4
torchzero/utils/tensorlist.py +40 -28
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/METADATA +76 -51
torchzero-0.3.11.dist-info/RECORD +159 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/WHEEL +1 -1
torchzero/core/preconditioner.py +0 -138
torchzero/modules/experimental/algebraic_newton.py +0 -145
torchzero/modules/experimental/soapy.py +0 -290
torchzero/modules/experimental/spectral.py +0 -288
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/experimental/tropical_newton.py +0 -136
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/lr.py +0 -59
torchzero/modules/lr/step_size.py +0 -97
torchzero/modules/ops/debug.py +0 -25
torchzero/modules/ops/misc.py +0 -419
torchzero/modules/ops/split.py +0 -75
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero-0.3.9.dist-info/RECORD +0 -131
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/licenses/LICENSE +0 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/top_level.txt +0 -0

torchzero/utils/linalg/solve.py CHANGED Viewed

@@ -1,12 +1,41 @@
+# pyright: reportArgumentType=false
 from collections.abc import Callable
-from typing import overload
+from typing import Any, overload
 import torch
-from .. import TensorList, generic_zeros_like, generic_vector_norm, generic_numel, generic_randn_like, generic_eq
+from .. import (
+    TensorList,
+    generic_eq,
+    generic_finfo_eps,
+    generic_numel,
+    generic_randn_like,
+    generic_vector_norm,
+    generic_zeros_like,
+)
+def _make_A_mm_reg(A_mm: Callable | torch.Tensor, reg):
+    if callable(A_mm):
+        def A_mm_reg(x): # A_mm with regularization
+            Ax = A_mm(x)
+            if not generic_eq(reg, 0): Ax += x*reg
+            return Ax
+        return A_mm_reg
+    if not isinstance(A_mm, torch.Tensor): raise TypeError(type(A_mm))
+    def Ax_reg(x): # A_mm with regularization
+        if A_mm.ndim == 1: Ax = A_mm * x
+        else: Ax = A_mm @ x
+        if reg != 0: Ax += x*reg
+        return Ax
+    return Ax_reg
 @overload
 def cg(
-    A_mm: Callable[[torch.Tensor], torch.Tensor],
+    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
     b: torch.Tensor,
     x0_: torch.Tensor | None = None,
     tol: float | None = 1e-4,
@@ -24,17 +53,17 @@ def cg(
 ) -> TensorList: ...
 def cg(
-    A_mm: Callable,
+    A_mm: Callable | torch.Tensor,
     b: torch.Tensor | TensorList,
     x0_: torch.Tensor | TensorList | None = None,
     tol: float | None = 1e-4,
     maxiter: int | None = None,
     reg: float | list[float] | tuple[float] = 0,
 ):
-    def A_mm_reg(x): # A_mm with regularization
-        Ax = A_mm(x)
-        if not generic_eq(reg, 0): Ax += x*reg
-        return Ax
+    A_mm_reg = _make_A_mm_reg(A_mm, reg)
+    eps = generic_finfo_eps(b)
+    if tol is None: tol = eps
     if maxiter is None: maxiter = generic_numel(b)
     if x0_ is None: x0_ = generic_zeros_like(b)
@@ -44,9 +73,10 @@ def cg(
     p = residual.clone() # search direction
     r_norm = generic_vector_norm(residual)
     init_norm = r_norm
-    if tol is not None and r_norm < tol: return x
+    if r_norm < tol: return x
     k = 0
     while True:
         Ap = A_mm_reg(p)
         step_size = (r_norm**2) / p.dot(Ap)
@@ -55,7 +85,7 @@ def cg(
         new_r_norm = generic_vector_norm(residual)
         k += 1
-        if tol is not None and new_r_norm <= tol * init_norm: return x
+        if new_r_norm <= tol * init_norm: return x
         if k >= maxiter: return x
         beta = (new_r_norm**2) / (r_norm**2)
@@ -131,6 +161,8 @@ def nystrom_pcg(
         generator=generator,
     )
     lambd += reg
+    eps = torch.finfo(b.dtype).eps ** 2
+    if tol is None: tol = eps
     def A_mm_reg(x): # A_mm with regularization
         Ax = A_mm(x)
@@ -150,7 +182,7 @@ def nystrom_pcg(
     p = z.clone() # search direction
     init_norm = torch.linalg.vector_norm(residual) # pylint:disable=not-callable
-    if tol is not None and init_norm < tol: return x
+    if init_norm < tol: return x
     k = 0
     while True:
         Ap = A_mm_reg(p)
@@ -160,10 +192,217 @@ def nystrom_pcg(
         residual -= step_size * Ap
         k += 1
-        if tol is not None and torch.linalg.vector_norm(residual) <= tol * init_norm: return x # pylint:disable=not-callable
+        if torch.linalg.vector_norm(residual) <= tol * init_norm: return x # pylint:disable=not-callable
         if k >= maxiter: return x
         z = P_inv @ residual
         beta = residual.dot(z) / rz
         p = z + p*beta
+def _safe_clip(x: torch.Tensor):
+    """makes sure scalar tensor x is not smaller than epsilon"""
+    assert x.numel() == 1, x.shape
+    eps = torch.finfo(x.dtype).eps
+    if x.abs() < eps: return x.new_full(x.size(), eps).copysign(x)
+    return x
+def _trust_tau(x,d,trust_region):
+    xx = x.dot(x)
+    xd = x.dot(d)
+    dd = _safe_clip(d.dot(d))
+    rad = (xd**2 - dd * (xx - trust_region**2)).clip(min=0).sqrt()
+    tau = (-xd + rad) / dd
+    return x + tau * d
+@overload
+def steihaug_toint_cg(
+    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
+    b: torch.Tensor,
+    trust_region: float,
+    x0: torch.Tensor | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float = 0,
+) -> torch.Tensor: ...
+@overload
+def steihaug_toint_cg(
+    A_mm: Callable[[TensorList], TensorList],
+    b: TensorList,
+    trust_region: float,
+    x0: TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+) -> TensorList: ...
+def steihaug_toint_cg(
+    A_mm: Callable | torch.Tensor,
+    b: torch.Tensor | TensorList,
+    trust_region: float,
+    x0: torch.Tensor | TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+):
+    """
+    Solution is bounded to have L2 norm no larger than :code:`trust_region`. If solution exceeds :code:`trust_region`, CG is terminated early, so it is also faster.
+    """
+    A_mm_reg = _make_A_mm_reg(A_mm, reg)
+    x = x0
+    if x is None: x = generic_zeros_like(b)
+    r = b
+    d = r.clone()
+    eps = generic_finfo_eps(b)**2
+    if tol is None: tol = eps
+    if generic_vector_norm(r) < tol:
+        return x
+    if maxiter is None:
+        maxiter = generic_numel(b)
+    for _ in range(maxiter):
+        Ad = A_mm_reg(d)
+        d_Ad = d.dot(Ad)
+        if d_Ad <= eps:
+            return _trust_tau(x, d, trust_region)
+        alpha = r.dot(r) / d_Ad
+        p_next = x + alpha * d
+        # check if the step exceeds the trust-region boundary
+        if generic_vector_norm(p_next) >= trust_region:
+            return _trust_tau(x, d, trust_region)
+        # update step, residual and direction
+        x = p_next
+        r_next = r - alpha * Ad
+        if generic_vector_norm(r_next) < tol:
+            return x
+        beta = r_next.dot(r_next) / r.dot(r)
+        d = r_next + beta * d
+        r = r_next
+    return x
+# Liu, Yang, and Fred Roosta. "MINRES: From negative curvature detection to monotonicity properties." SIAM Journal on Optimization 32.4 (2022): 2636-2661.
+@overload
+def minres(
+    A_mm: Callable[[torch.Tensor], torch.Tensor] | torch.Tensor,
+    b: torch.Tensor,
+    x0: torch.Tensor | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float = 0,
+    npc_terminate: bool=True,
+    trust_region: float | None = None,
+) -> torch.Tensor: ...
+@overload
+def minres(
+    A_mm: Callable[[TensorList], TensorList],
+    b: TensorList,
+    x0: TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    npc_terminate: bool=True,
+    trust_region: float | None = None,
+) -> TensorList: ...
+def minres(
+    A_mm,
+    b,
+    x0: torch.Tensor | TensorList | None = None,
+    tol: float | None = 1e-4,
+    maxiter: int | None = None,
+    reg: float | list[float] | tuple[float] = 0,
+    npc_terminate: bool=True,
+    trust_region: float | None = None,
+):
+    A_mm_reg = _make_A_mm_reg(A_mm, reg)
+    eps = generic_finfo_eps(b)
+    if tol is None: tol = eps**2
+    if maxiter is None: maxiter = generic_numel(b)
+    if x0 is None:
+        R = b
+        x0 = generic_zeros_like(b)
+    else:
+        R = b - A_mm_reg(x0)
+    X: Any = x0
+    beta = b_norm = generic_vector_norm(b)
+    if b_norm < eps**2:
+        return generic_zeros_like(b)
+    V = b / beta
+    V_prev = generic_zeros_like(b)
+    D = generic_zeros_like(b)
+    D_prev = generic_zeros_like(b)
+    c = -1
+    phi = tau = beta
+    s = delta1 = e = 0
+    for _ in range(maxiter):
+        P = A_mm_reg(V)
+        alpha = V.dot(P)
+        P -= beta*V_prev
+        P -= alpha*V
+        beta = generic_vector_norm(P)
+        delta2 = c*delta1 + s*alpha
+        gamma1 = s*delta1 - c*alpha
+        e_next = s*beta
+        delta1 = -c*beta
+        cgamma1 = c*gamma1
+        if trust_region is not None and cgamma1 >= 0:
+            if npc_terminate: return _trust_tau(X, R, trust_region)
+            return _trust_tau(X, D, trust_region)
+        if npc_terminate and cgamma1 >= 0:
+            return R
+        gamma2 = (gamma1**2 + beta**2)**(1/2)
+        if abs(gamma2) <= eps: # singular system
+            # c=0; s=1; tau=0
+            if trust_region is None: return X
+            return _trust_tau(X, D, trust_region)
+        c = gamma1 / gamma2
+        s = beta/gamma2
+        tau = c*phi
+        phi = s*phi
+        D_prev = D
+        D = (V - delta2*D - e*D_prev) / gamma2
+        e = e_next
+        X = X + tau*D
+        if trust_region is not None:
+            if generic_vector_norm(X) > trust_region:
+                return _trust_tau(X, D, trust_region)
+        if (abs(beta) < eps) or (phi / b_norm <= tol):
+            # R = zeros(R)
+            return X
+        V_prev = V
+        V = P/beta
+        R = s**2*R - phi*c*V
+    return X

torchzero/utils/numberlist.py CHANGED Viewed

@@ -129,4 +129,6 @@ class NumberList(list[int | float | Any]):
         return self.__class__(fn(i, *args, **kwargs) for i in self)
     def clamp(self, min=None, max=None):
+        return self.zipmap_args(_clamp, min, max)
+    def clip(self, min=None, max=None):
         return self.zipmap_args(_clamp, min, max)

torchzero/utils/optimizer.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Mapping, MutableSequence, Sequence, MutableMapping
 from typing import Any, Literal, TypeVar, overload
@@ -132,65 +133,7 @@ def get_state_vals(state: Mapping[torch.Tensor, MutableMapping[str, Any]], param
     return values
-def loss_at_params(closure, params: Iterable[torch.Tensor],
-                   new_params: Sequence[torch.Tensor] | Any, backward: bool, restore=False):
-    params = TensorList(params)
-    old_params = params.clone() if restore else None
-    if isinstance(new_params, Sequence) and isinstance(new_params[0], torch.Tensor):
-        # when not restoring, copy new_params to params to avoid unexpected bugs due to shared storage
-        # when restoring params will be set back to old_params so its fine
-        if restore: params.set_(new_params)
-        else: params.copy_(new_params) # type:ignore
-    else:
-        new_params = totensor(new_params)
-        params.from_vec_(new_params)
-    if backward: loss = closure()
-    else: loss = closure(False)
-    if restore:
-        assert old_params is not None
-        params.set_(old_params)
-    return tofloat(loss)
-def loss_grad_at_params(closure, params: Iterable[torch.Tensor], new_params: Sequence[torch.Tensor], restore=False):
-    params = TensorList(params)
-    old_params = params.clone() if restore else None
-    loss = loss_at_params(closure, params, new_params, backward=True, restore=False)
-    grad = params.ensure_grad_().grad
-    if restore:
-        assert old_params is not None
-        params.set_(old_params)
-    return loss, grad
-def grad_at_params(closure, params: Iterable[torch.Tensor], new_params: Sequence[torch.Tensor], restore=False):
-    return loss_grad_at_params(closure=closure,params=params,new_params=new_params,restore=restore)[1]
-def loss_grad_vec_at_params(closure, params: Iterable[torch.Tensor], new_params: Any, restore=False):
-    params = TensorList(params)
-    old_params = params.clone() if restore else None
-    loss = loss_at_params(closure, params, new_params, backward=True, restore=False)
-    grad = params.ensure_grad_().grad.to_vec()
-    if restore:
-        assert old_params is not None
-        params.set_(old_params)
-    return loss, grad
-def grad_vec_at_params(closure, params: Iterable[torch.Tensor], new_params: Any, restore=False):
-    return loss_grad_vec_at_params(closure=closure,params=params,new_params=new_params,restore=restore)[1]
-class Optimizer(torch.optim.Optimizer):
+class Optimizer(torch.optim.Optimizer, ABC):
     """subclass of torch.optim.Optimizer with some helper methods for fast experimentation, it's not used anywhere in torchzero.
     Args:
@@ -251,21 +194,10 @@ class Optimizer(torch.optim.Optimizer):
         return get_state_vals(self.state, params, key, key2, *keys, init = init, cls = cls) # type:ignore[reportArgumentType]
-    def loss_at_params(self, closure, params: Sequence[torch.Tensor] | Any, backward: bool, restore=False):
-        return loss_at_params(closure=closure,params=self.get_params(),new_params=params,backward=backward,restore=restore)
-    def loss_grad_at_params(self, closure, params: Sequence[torch.Tensor] | Any, restore=False):
-        return loss_grad_at_params(closure=closure,params=self.get_params(),new_params=params,restore=restore)
-    def grad_at_params(self, closure, new_params: Sequence[torch.Tensor], restore=False):
-        return self.loss_grad_at_params(closure=closure,params=new_params,restore=restore)[1]
-    def loss_grad_vec_at_params(self, closure, params: Any, restore=False):
-        return loss_grad_vec_at_params(closure=closure,params=self.get_params(),new_params=params,restore=restore)
-    def grad_vec_at_params(self, closure, params: Any, restore=False):
-        return self.loss_grad_vec_at_params(closure=closure,params=params,restore=restore)[1]
+    # shut up pylance
+    @abstractmethod
+    def step(self, closure) -> Any: ... # pylint:disable=signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
 def zero_grad_(params: Iterable[torch.Tensor], set_to_none):
     if set_to_none:
@@ -281,4 +213,53 @@ def zero_grad_(params: Iterable[torch.Tensor], set_to_none):
             else:
                 grad.requires_grad_(False)
-        torch._foreach_zero_(grads)
+        torch._foreach_zero_(grads)
+@overload
+def unpack_states(states: Sequence[MutableMapping[str, Any]], tensors: Sequence[torch.Tensor],
+                   key: str, *,
+                   must_exist: bool = False, init: Init = torch.zeros_like,
+                   cls: type[ListLike] = list) -> ListLike: ...
+@overload
+def unpack_states(states: Sequence[MutableMapping[str, Any]], tensors: Sequence[torch.Tensor],
+                   key: list[str] | tuple[str,...], *,
+                   must_exist: bool = False, init: Init | Sequence[Init] = torch.zeros_like,
+                   cls: type[ListLike] = list) -> list[ListLike]: ...
+@overload
+def unpack_states(states: Sequence[MutableMapping[str, Any]], tensors: Sequence[torch.Tensor],
+                   key: str,  key2: str, *keys: str,
+                   must_exist: bool = False, init: Init | Sequence[Init] = torch.zeros_like,
+                   cls: type[ListLike] = list) -> list[ListLike]: ...
+def unpack_states(states: Sequence[MutableMapping[str, Any]], tensors: Sequence[torch.Tensor],
+                   key: str | list[str] | tuple[str,...], key2: str | None = None,  *keys: str,
+                   must_exist: bool = False, init: Init | Sequence[Init] = torch.zeros_like,
+                   cls: type[ListLike] = list) -> ListLike | list[ListLike]:
+    # single key, return single cls
+    if isinstance(key, str) and key2 is None:
+        values = cls()
+        for i,s in enumerate(states):
+            if key not in s:
+                if must_exist: raise KeyError(f"Key {key} doesn't exist in state with keys {tuple(s.keys())}")
+                s[key] = _make_initial_state_value(tensors[i], init, i)
+            values.append(s[key])
+        return values
+    # multiple keys
+    k1 = (key,) if isinstance(key, str) else tuple(key)
+    k2 = () if key2 is None else (key2,)
+    keys = k1 + k2 + keys
+    values = [cls() for _ in keys]
+    for i,s in enumerate(states):
+        for k_i, key in enumerate(keys):
+            if key not in s:
+                if must_exist: raise KeyError(f"Key {key} doesn't exist in state with keys {tuple(s.keys())}")
+                k_init = init[k_i] if isinstance(init, (list,tuple)) else init
+                s[key] = _make_initial_state_value(tensors[i], k_init, i)
+            values[k_i].append(s[key])
+    return values

torchzero/utils/python_tools.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import functools
 import operator
-from typing import Any, TypeVar
-from collections.abc import Iterable, Callable
+from typing import Any, TypeVar, overload
+from collections.abc import Iterable, Callable, Mapping, MutableSequence
 from collections import UserDict
@@ -17,8 +17,8 @@ def flatten(iterable: Iterable) -> list[Any]:
     raise TypeError(f'passed object is not an iterable, {type(iterable) = }')
 X = TypeVar("X")
-# def reduce_dim[X](x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
-def reduce_dim(x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
+# def reduce_dim[X](x:Iterable[Iterable[X]]) -> list[X]:
+def reduce_dim(x:Iterable[Iterable[X]]) -> list[X]:
     """Reduces one level of nesting. Takes an iterable of iterables of X, and returns an iterable of X."""
     return functools.reduce(operator.iconcat, x, [])
@@ -31,6 +31,16 @@ def generic_eq(x: int | float | Iterable[int | float], y: int | float | Iterable
         return all(i==y for i in x)
     return all(i==j for i,j in zip(x,y))
+def generic_ne(x: int | float | Iterable[int | float], y: int | float | Iterable[int | float]) -> bool:
+    """generic not equals function that supports scalars and lists of numbers. Faster than not generic_eq"""
+    if isinstance(x, (int,float)):
+        if isinstance(y, (int,float)): return x!=y
+        return any(i!=x for i in y)
+    if isinstance(y, (int,float)):
+        return any(i!=y for i in x)
+    return any(i!=j for i,j in zip(x,y))
 def zipmap(self, fn: Callable, other: Any | list | tuple, *args, **kwargs):
     """If `other` is list/tuple, applies `fn` to self zipped with `other`.
     Otherwise applies `fn` to this sequence and `other`.
@@ -38,3 +48,16 @@ def zipmap(self, fn: Callable, other: Any | list | tuple, *args, **kwargs):
     if isinstance(other, (list, tuple)): return self.__class__(fn(i, j, *args, **kwargs) for i, j in zip(self, other))
     return self.__class__(fn(i, other, *args, **kwargs) for i in self)
+ListLike = TypeVar('ListLike', bound=MutableSequence)
+@overload
+def unpack_dicts(dicts: Iterable[Mapping[str, Any]], key:str, *, cls:type[ListLike]=list) -> ListLike: ...
+@overload
+def unpack_dicts(dicts: Iterable[Mapping[str, Any]], key:str, key2: str, *keys:str, cls:type[ListLike]=list) -> list[ListLike]: ...
+def unpack_dicts(dicts: Iterable[Mapping[str, Any]], key:str, key2: str | None = None, *keys:str, cls:type[ListLike]=list) -> ListLike | list[ListLike]:
+    k1 = (key,) if isinstance(key, str) else tuple(key)
+    k2 = () if key2 is None else (key2,)
+    keys = k1 + k2 + keys
+    values = [cls(s[k] for s in dicts) for k in keys] # pyright:ignore[reportCallIssue]
+    if len(values) == 1: return values[0]
+    return values

torchzero 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl