PyPI - torchzero - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

docs/source/conf.py +6 -4
docs/source/docstring template.py +46 -0
tests/test_identical.py +2 -3
tests/test_opts.py +115 -68
tests/test_tensorlist.py +2 -2
tests/test_vars.py +62 -61
torchzero/core/__init__.py +2 -3
torchzero/core/module.py +185 -53
torchzero/core/transform.py +327 -159
torchzero/modules/__init__.py +3 -1
torchzero/modules/clipping/clipping.py +120 -23
torchzero/modules/clipping/ema_clipping.py +37 -22
torchzero/modules/clipping/growth_clipping.py +20 -21
torchzero/modules/experimental/__init__.py +30 -4
torchzero/modules/experimental/absoap.py +53 -156
torchzero/modules/experimental/adadam.py +22 -15
torchzero/modules/experimental/adamY.py +21 -25
torchzero/modules/experimental/adam_lambertw.py +149 -0
torchzero/modules/{line_search/trust_region.py → experimental/adaptive_step_size.py} +37 -8
torchzero/modules/experimental/adasoap.py +24 -129
torchzero/modules/experimental/cosine.py +214 -0
torchzero/modules/experimental/cubic_adam.py +97 -0
torchzero/modules/experimental/curveball.py +12 -12
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/experimental/eigendescent.py +120 -0
torchzero/modules/experimental/etf.py +195 -0
torchzero/modules/experimental/exp_adam.py +113 -0
torchzero/modules/experimental/expanded_lbfgs.py +141 -0
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/hnewton.py +85 -0
torchzero/modules/{quasi_newton/experimental → experimental}/modular_lbfgs.py +49 -50
torchzero/modules/experimental/newton_solver.py +11 -11
torchzero/modules/experimental/newtonnewton.py +92 -0
torchzero/modules/experimental/parabolic_search.py +220 -0
torchzero/modules/experimental/reduce_outward_lr.py +10 -7
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +12 -54
torchzero/modules/experimental/subspace_preconditioners.py +20 -10
torchzero/modules/experimental/tensor_adagrad.py +42 -0
torchzero/modules/functional.py +12 -2
torchzero/modules/grad_approximation/fdm.py +31 -4
torchzero/modules/grad_approximation/forward_gradient.py +17 -7
torchzero/modules/grad_approximation/grad_approximator.py +69 -24
torchzero/modules/grad_approximation/rfdm.py +310 -50
torchzero/modules/higher_order/__init__.py +1 -0
torchzero/modules/higher_order/higher_order_newton.py +319 -0
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/adaptive.py +99 -0
torchzero/modules/line_search/backtracking.py +75 -31
torchzero/modules/line_search/line_search.py +107 -49
torchzero/modules/line_search/polynomial.py +233 -0
torchzero/modules/line_search/scipy.py +20 -5
torchzero/modules/line_search/strong_wolfe.py +52 -36
torchzero/modules/misc/__init__.py +27 -0
torchzero/modules/misc/debug.py +48 -0
torchzero/modules/misc/escape.py +60 -0
torchzero/modules/misc/gradient_accumulation.py +70 -0
torchzero/modules/misc/misc.py +316 -0
torchzero/modules/misc/multistep.py +158 -0
torchzero/modules/misc/regularization.py +171 -0
torchzero/modules/misc/split.py +103 -0
torchzero/modules/{ops → misc}/switch.py +48 -7
torchzero/modules/momentum/__init__.py +1 -1
torchzero/modules/momentum/averaging.py +25 -10
torchzero/modules/momentum/cautious.py +115 -40
torchzero/modules/momentum/ema.py +92 -41
torchzero/modules/momentum/experimental.py +21 -13
torchzero/modules/momentum/matrix_momentum.py +145 -76
torchzero/modules/momentum/momentum.py +25 -4
torchzero/modules/ops/__init__.py +3 -31
torchzero/modules/ops/accumulate.py +51 -25
torchzero/modules/ops/binary.py +108 -62
torchzero/modules/ops/multi.py +95 -34
torchzero/modules/ops/reduce.py +31 -23
torchzero/modules/ops/unary.py +37 -21
torchzero/modules/ops/utility.py +53 -45
torchzero/modules/optimizers/__init__.py +12 -3
torchzero/modules/optimizers/adagrad.py +48 -29
torchzero/modules/optimizers/adahessian.py +223 -0
torchzero/modules/optimizers/adam.py +35 -37
torchzero/modules/optimizers/adan.py +110 -0
torchzero/modules/optimizers/adaptive_heavyball.py +57 -0
torchzero/modules/optimizers/esgd.py +171 -0
torchzero/modules/optimizers/ladagrad.py +183 -0
torchzero/modules/optimizers/lion.py +4 -4
torchzero/modules/optimizers/mars.py +91 -0
torchzero/modules/optimizers/msam.py +186 -0
torchzero/modules/optimizers/muon.py +32 -7
torchzero/modules/optimizers/orthograd.py +4 -5
torchzero/modules/optimizers/rmsprop.py +19 -19
torchzero/modules/optimizers/rprop.py +89 -52
torchzero/modules/optimizers/sam.py +163 -0
torchzero/modules/optimizers/shampoo.py +55 -27
torchzero/modules/optimizers/soap.py +40 -37
torchzero/modules/optimizers/sophia_h.py +82 -25
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +4 -2
torchzero/modules/projections/projection.py +212 -118
torchzero/modules/quasi_newton/__init__.py +44 -5
torchzero/modules/quasi_newton/cg.py +190 -39
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +163 -0
torchzero/modules/quasi_newton/lbfgs.py +154 -97
torchzero/modules/quasi_newton/lsr1.py +102 -58
torchzero/modules/quasi_newton/quasi_newton.py +1032 -177
torchzero/modules/quasi_newton/trust_region.py +397 -0
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/newton.py +245 -54
torchzero/modules/second_order/newton_cg.py +311 -21
torchzero/modules/second_order/nystrom.py +124 -21
torchzero/modules/smoothing/gaussian.py +55 -21
torchzero/modules/smoothing/laplacian.py +20 -12
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +122 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +126 -10
torchzero/modules/wrappers/optim_wrapper.py +40 -12
torchzero/optim/wrappers/directsearch.py +281 -0
torchzero/optim/wrappers/fcmaes.py +105 -0
torchzero/optim/wrappers/mads.py +89 -0
torchzero/optim/wrappers/nevergrad.py +20 -5
torchzero/optim/wrappers/nlopt.py +28 -14
torchzero/optim/wrappers/optuna.py +70 -0
torchzero/optim/wrappers/scipy.py +167 -16
torchzero/utils/__init__.py +3 -7
torchzero/utils/derivatives.py +5 -4
torchzero/utils/linalg/__init__.py +1 -1
torchzero/utils/linalg/solve.py +251 -12
torchzero/utils/numberlist.py +2 -0
torchzero/utils/optimizer.py +55 -74
torchzero/utils/python_tools.py +27 -4
torchzero/utils/tensorlist.py +40 -28
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/METADATA +76 -51
torchzero-0.3.11.dist-info/RECORD +159 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/WHEEL +1 -1
torchzero/core/preconditioner.py +0 -138
torchzero/modules/experimental/algebraic_newton.py +0 -145
torchzero/modules/experimental/soapy.py +0 -290
torchzero/modules/experimental/spectral.py +0 -288
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/experimental/tropical_newton.py +0 -136
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/lr.py +0 -59
torchzero/modules/lr/step_size.py +0 -97
torchzero/modules/ops/debug.py +0 -25
torchzero/modules/ops/misc.py +0 -419
torchzero/modules/ops/split.py +0 -75
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero-0.3.9.dist-info/RECORD +0 -131
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/licenses/LICENSE +0 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/top_level.txt +0 -0

torchzero/modules/optimizers/soap.py CHANGED Viewed

@@ -2,7 +2,7 @@ from operator import itemgetter
 import torch
-from ...core import Chainable, Transform, apply
+from ...core import Chainable, Transform, apply_transform
 from ...modules.optimizers.shampoo import _merge_small_dims, _unmerge_small_dims
 @torch.no_grad
@@ -24,11 +24,9 @@ def project(tensors: torch.Tensor, Q: list[torch.Tensor | None]):
     Projects the gradient to the eigenbases of the preconditioner.
     """
     for mat in Q:
-        if mat is None: continue
-        if len(mat) > 0:
+        if mat is not None and len(mat) > 0:
             tensors = torch.tensordot(tensors, mat, dims=[[0], [0]]) # pyright:ignore[reportArgumentType]
         else:
-            # I don't understand this part but it is in https://github.com/nikhilvyas/SOAP/blob/main/soap.py
             permute_order = list(range(1, len(tensors.shape))) + [0]
             tensors = tensors.permute(permute_order)
@@ -40,8 +38,7 @@ def project_back(tensors: torch.Tensor, Q: list[torch.Tensor| None]):
     Projects the gradient back to the original space.
     """
     for mat in Q:
-        if mat is None: continue
-        if len(mat) > 0:
+        if mat is not None and len(mat) > 0:
             tensors = torch.tensordot(tensors, mat,dims=[[0], [1]]) # pyright:ignore[reportArgumentType]
         else:
             permute_order = list(range(1, len(tensors.shape))) + [0]
@@ -59,8 +56,7 @@ def get_orthogonal_matrix(mat: list[torch.Tensor | None]):
     float_data = False
     original_type = original_device = None
     for m in mat:
-        if m is None: continue
-        if len(m) == 0:
+        if m is None or len(m) == 0:
             matrix.append([])
             continue
         if m.dtype != torch.float:
@@ -100,13 +96,11 @@ def get_orthogonal_matrix_QR(exp_avg_sq: torch.Tensor, GG: list[torch.Tensor | N
     float_data = False
     original_type = original_device = None
     for m,o in zip(GG, Q_list):
-        if m is None: continue
-        assert o is not None
-        if len(m) == 0:
+        if m is None or len(m) == 0:
             matrix.append([])
             orth_matrix.append([])
             continue
+        assert o is not None
         if m.data.dtype != torch.float:
             original_type = m.data.dtype
             original_device = m.data.device
@@ -152,11 +146,28 @@ class SOAP(Transform):
             epsilon for dividing first momentum by second. Defaults to 1e-8.
         decay (float | None, optional):
             Decays covariance matrix accumulators, this may be useful if `shampoo_beta` is None. Defaults to None.
-        unprojected_exp_avg (bool, optional):
-            whether to update first momentum in unprojected space. Both true and false work and lead to different
-            results but True usually works better. Defaults to True.
+        alpha (float, optional):
+            learning rate. Defaults to 1.
         bias_correction (bool, optional):
             enables adam bias correction. Defaults to True.
+    Examples:
+        SOAP:
+        .. code-block:: python
+            opt = tz.Modular(model.parameters(), tz.m.SOAP(), tz.m.LR(1e-3))
+        Stabilized SOAP:
+        .. code-block:: python
+            opt = tz.Modular(
+                model.parameters(),
+                tz.m.SOAP(),
+                tz.m.NormalizeByEMA(max_ema_growth=1.2),
+                tz.m.LR(1e-2)
+            )
     """
     def __init__(
         self,
@@ -170,7 +181,6 @@ class SOAP(Transform):
         eps: float = 1e-8,
         decay: float | None = None,
         alpha: float = 1,
-        unprojected_exp_avg: bool = True,
         bias_correction: bool = True,
     ):
         defaults = dict(
@@ -183,21 +193,18 @@ class SOAP(Transform):
             precondition_1d=precondition_1d,
             eps=eps,
             decay=decay,
-            unprojected_exp_avg=unprojected_exp_avg,
             bias_correction=bias_correction,
             alpha=alpha,
         )
         super().__init__(defaults, uses_grad=False)
     @torch.no_grad
-    def transform(self, tensors, params, grads, vars):
+    def apply_tensors(self, tensors, params, grads, loss, states, settings):
         updates = []
         # update preconditioners
-        for i,(p,t) in enumerate(zip(params, tensors)):
-            state = self.state[p]
-            settings = self.settings[p]
-            beta1, beta2, shampoo_beta, merge_small, max_dim, precondition_1d, eps, unprojected_exp_avg,alpha = itemgetter(
-                'beta1', 'beta2', 'shampoo_beta', 'merge_small', 'max_dim', 'precondition_1d', 'eps', 'unprojected_exp_avg','alpha')(settings)
+        for i,(p,t, state, setting) in enumerate(zip(params, tensors, states, settings)):
+            beta1, beta2, shampoo_beta, merge_small, max_dim, precondition_1d, eps,alpha = itemgetter(
+                'beta1', 'beta2', 'shampoo_beta', 'merge_small', 'max_dim', 'precondition_1d', 'eps','alpha')(setting)
             if merge_small:
                 t, state['flat_sizes'], state['sort_idxs'] = _merge_small_dims(t, max_dim)
@@ -205,7 +212,7 @@ class SOAP(Transform):
             # initialize state on 1st step
             if 'GG' not in state:
                 state["exp_avg"] = torch.zeros_like(t)
-                state["exp_avg_sq"] = torch.zeros_like(t)
+                state["exp_avg_sq_projected"] = torch.zeros_like(t)
                 if not precondition_1d and t.ndim <= 1:
                     state['GG'] = []
@@ -235,35 +242,31 @@ class SOAP(Transform):
             # exponential moving averages
             # this part could be foreached but I will do that at some point its not a big difference compared to preconditioning
             exp_avg: torch.Tensor = state["exp_avg"]
-            exp_avg_sq: torch.Tensor = state["exp_avg_sq"]
+            exp_avg_sq_projected: torch.Tensor = state["exp_avg_sq_projected"]
-            if unprojected_exp_avg or t_projected is None:
-                exp_avg.lerp_(t, 1-beta1)
-            else:
-                exp_avg.lerp_(t_projected, 1-beta1)
+            exp_avg.lerp_(t, 1-beta1)
             if t_projected is None:
-                exp_avg_sq.mul_(beta2).addcmul_(t, t, value=1-beta2)
+                exp_avg_sq_projected.mul_(beta2).addcmul_(t, t, value=1-beta2)
             else:
-                exp_avg_sq.mul_(beta2).addcmul_(t_projected, t_projected, value=1-beta2)
+                exp_avg_sq_projected.mul_(beta2).addcmul_(t_projected, t_projected, value=1-beta2)
             # project exponential moving averages if they are accumulated unprojected
             exp_avg_projected = exp_avg
-            if unprojected_exp_avg and t_projected is not None:
+            if t_projected is not None:
                 exp_avg_projected = project(exp_avg, state['Q'])
-            exp_avg_sq_projected = exp_avg_sq
             denom = exp_avg_sq_projected.sqrt().add_(eps)
             # print(f'{t_projected = }, {exp_avg = }, {exp_avg_projected = }, {exp_avg_sq = }, {exp_avg_sq_projected = }, {denom = }')
             # Projecting back the preconditioned (by Adam) exponential moving average of gradients
             # to the original space
             update = exp_avg_projected / denom
             if t_projected is not None:
                 update = project_back(update, state["Q"])
-            if settings['bias_correction']:
+            if setting['bias_correction']:
                 bias_correction1 = 1.0 - beta1 ** (state["step"]+1)
                 bias_correction2 = 1.0 - beta2 ** (state["step"]+1)
                 update *= ((bias_correction2 ** .5) / bias_correction1) * alpha
@@ -279,7 +282,7 @@ class SOAP(Transform):
             # Update is done after the gradient step to avoid using current gradients in the projection.
             if state['GG'] is not None:
                 update_soap_covariances_(t, state['GG'], shampoo_beta)
-                if state['step'] % settings['precond_freq'] == 0:
-                    state['Q'], state['exp_avg_sq'] = get_orthogonal_matrix_QR(exp_avg_sq, state['GG'], state['Q'])
+                if state['step'] % setting['precond_freq'] == 0:
+                    state['Q'], state['exp_avg_sq_projected'] = get_orthogonal_matrix_QR(exp_avg_sq_projected, state['GG'], state['Q'])
         return updates

torchzero/modules/optimizers/sophia_h.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Literal
 from collections.abc import Callable
 import torch
-from ...core import Module, Target, Transform, Chainable, apply
+from ...core import Module, Target, Transform, Chainable, apply_transform
 from ...utils import NumberList, TensorList, as_tensorlist
 from ...utils.derivatives import hvp, hvp_fd_forward, hvp_fd_central
@@ -35,6 +35,74 @@ def sophia_H(
 class SophiaH(Module):
+    """SophiaH optimizer from https://arxiv.org/abs/2305.14342
+    This is similar to Adam, but the second momentum is replaced by an exponential moving average of randomized hessian diagonal estimates, and the update is agressively clipped.
+    .. note::
+        In most cases SophiaH should be the first module in the chain because it relies on autograd. Use the :code:`inner` argument if you wish to apply SophiaH preconditioning to another module's output.
+    .. note::
+        If you are using gradient estimators or reformulations, set :code:`hvp_method` to "forward" or "central".
+    .. note::
+        This module requires the a closure passed to the optimizer step,
+        as it needs to re-evaluate the loss and gradients for calculating HVPs.
+        The closure must accept a ``backward`` argument (refer to documentation).
+    Args:
+        beta1 (float, optional): first momentum. Defaults to 0.96.
+        beta2 (float, optional): momentum for hessian diagonal estimate. Defaults to 0.99.
+        update_freq (int, optional):
+            frequency of updating hessian diagonal estimate via a hessian-vector product. Defaults to 10.
+        precond_scale (float, optional):
+            scale of the preconditioner. Defaults to 1.
+        clip (float, optional):
+            clips update to (-clip, clip). Defaults to 1.
+        eps (float, optional):
+            clips hessian diagonal esimate to be no less than this value. Defaults to 1e-12.
+        hvp_method (str, optional):
+            Determines how Hessian-vector products are evaluated.
+            - ``"autograd"``: Use PyTorch's autograd to calculate exact HVPs.
+              This requires creating a graph for the gradient.
+            - ``"forward"``: Use a forward finite difference formula to
+              approximate the HVP. This requires one extra gradient evaluation.
+            - ``"central"``: Use a central finite difference formula for a
+              more accurate HVP approximation. This requires two extra
+              gradient evaluations.
+            Defaults to "autograd".
+        h (float, optional): finite difference step size if :code:`hvp_method` is "forward" or "central". Defaults to 1e-3.
+        n_samples (int, optional):
+            number of hessian-vector products with random vectors to evaluate each time when updating
+            the preconditioner. Larger values may lead to better hessian diagonal estimate. Defaults to 1.
+        seed (int | None, optional): seed for random vectors. Defaults to None.
+        inner (Chainable | None, optional): preconditioning is applied to the output of this module. Defaults to None.
+    Examples:
+        Using SophiaH:
+        .. code-block:: python
+            opt = tz.Modular(
+                model.parameters(),
+                tz.m.SophiaH(),
+                tz.m.LR(0.1)
+            )
+        SophiaH preconditioner can be applied to any other module by passing it to the :code:`inner` argument.
+        Turn off SophiaH's first momentum to get just the preconditioning. Here is an example of applying
+        SophiaH preconditioning to nesterov momentum (:code:`tz.m.NAG`):
+        .. code-block:: python
+            opt = tz.Modular(
+                model.parameters(),
+                tz.m.SophiaH(beta1=0, inner=tz.m.NAG(0.96)),
+                tz.m.LR(0.1)
+            )
+    """
     def __init__(
         self,
         beta1: float = 0.96,
@@ -56,8 +124,8 @@ class SophiaH(Module):
             self.set_child('inner', inner)
     @torch.no_grad
-    def step(self, vars):
-        params = vars.params
+    def step(self, var):
+        params = var.params
         settings = self.settings[params[0]]
         hvp_method = settings['hvp_method']
         fd_h = settings['fd_h']
@@ -71,37 +139,26 @@ class SophiaH(Module):
                 self.global_state['generator'] = torch.Generator(params[0].device).manual_seed(seed)
             generator = self.global_state['generator']
-        beta1, beta2, precond_scale, clip, eps = self.get_settings(
-            'beta1', 'beta2', 'precond_scale', 'clip', 'eps', params=params, cls=NumberList)
+        beta1, beta2, precond_scale, clip, eps = self.get_settings(params,
+            'beta1', 'beta2', 'precond_scale', 'clip', 'eps', cls=NumberList)
-        exp_avg, h_exp_avg = self.get_state('exp_avg', 'h_exp_avg', params=params, cls=TensorList)
+        exp_avg, h_exp_avg = self.get_state(params, 'exp_avg', 'h_exp_avg', cls=TensorList)
         step = self.global_state.get('step', 0)
         self.global_state['step'] = step + 1
-        closure = vars.closure
+        closure = var.closure
         assert closure is not None
         h = None
         if step % update_freq == 0:
-            grad=None
+            rgrad=None
             for i in range(n_samples):
                 u = [torch.randn(p.shape, device=p.device, dtype=p.dtype, generator=generator) for p in params]
-                if hvp_method == 'autograd':
-                    if grad is None: grad = vars.get_grad(create_graph=True)
-                    assert grad is not None
-                    Hvp = hvp(params, grad, u, retain_graph=i < n_samples-1)
-                elif hvp_method == 'forward':
-                    loss, Hvp = hvp_fd_forward(closure, params, u, h=fd_h, g_0=vars.get_grad(), normalize=True)
-                elif hvp_method == 'central':
-                    loss, Hvp = hvp_fd_central(closure, params, u, h=fd_h, normalize=True)
-                else:
-                    raise ValueError(hvp_method)
+                Hvp, rgrad = self.Hvp(u, at_x0=True, var=var, rgrad=rgrad, hvp_method=hvp_method,
+                                     h=fd_h, normalize=True, retain_grad=i < n_samples-1)
                 if h is None: h = Hvp
                 else: torch._foreach_add_(h, Hvp)
@@ -109,11 +166,11 @@ class SophiaH(Module):
             assert h is not None
             if n_samples > 1: torch._foreach_div_(h, n_samples)
-        update = vars.get_update()
+        update = var.get_update()
         if 'inner' in self.children:
-            update = apply(self.children['inner'], tensors=update, params=params, grads=vars.grad, vars=vars)
+            update = apply_transform(self.children['inner'], tensors=update, params=params, grads=var.grad, var=var)
-        vars.update = sophia_H(
+        var.update = sophia_H(
             tensors=TensorList(update),
             h=TensorList(h) if h is not None else None,
             exp_avg_=exp_avg,
@@ -126,4 +183,4 @@ class SophiaH(Module):
             eps=eps,
             step=step,
         )
-        return vars
+        return var

torchzero/modules/projections/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from .projection import Projection
-from .fft import FFTProjection
-from .structural import VectorProjection, TensorizeProjection, BlockPartition, TensorNormsProjection
+from .projection import ProjectionBase, VectorProjection, ScalarProjection
+from .cast import To, ViewAsReal
 # from .galore import GaLore

torchzero/modules/projections/cast.py ADDED Viewed

@@ -0,0 +1,51 @@
+import torch
+from .projection import ProjectionBase
+from ...core import Chainable
+class To(ProjectionBase):
+    """Cast modules to specified device and dtype"""
+    def __init__(self, modules: Chainable, dtype: torch.dtype | None, device:torch.types.Device | None = None):
+        defaults = dict(dtype=dtype, device=device)
+        super().__init__(modules, project_update=True, project_params=True, project_grad=True, defaults=defaults)
+    @torch.no_grad
+    def project(self, tensors, params, grads, loss, states, settings, current):
+        casted = []
+        for tensor, state, setting in zip(tensors,states, settings):
+            state['dtype'] = tensor.dtype
+            state['device'] = tensor.device
+            tensor = tensor.to(dtype=setting['dtype'], device=setting['device'])
+            casted.append(tensor)
+        return casted
+    @torch.no_grad
+    def unproject(self, projected_tensors, params, grads, loss, states, settings, current):
+        uncasted = []
+        for tensor, state in zip(projected_tensors, states):
+            tensor = tensor.to(dtype=state['dtype'], device=state['device'])
+            uncasted.append(tensor)
+        return uncasted
+class ViewAsReal(ProjectionBase):
+    """View complex tensors as real tensors. Doesn't affect tensors that are already."""
+    def __init__(self, modules: Chainable):
+        super().__init__(modules, project_update=True, project_params=True, project_grad=True, defaults=None)
+    @torch.no_grad
+    def project(self, tensors, params, grads, loss, states, settings, current):
+        views = []
+        for tensor, state in zip(tensors,states):
+            is_complex = torch.is_complex(tensor)
+            state['is_complex'] = is_complex
+            if is_complex: tensor = torch.view_as_real(tensor)
+            views.append(tensor)
+        return views
+    @torch.no_grad
+    def unproject(self, projected_tensors, params, grads, loss, states, settings, current):
+        un_views = []
+        for tensor, state in zip(projected_tensors, states):
+            if state['is_complex']: tensor = torch.view_as_complex(tensor)
+            un_views.append(tensor)
+        return un_views

torchzero/modules/projections/galore.py CHANGED Viewed

@@ -6,5 +6,7 @@ from typing import Any, Literal
 import torch
-from ...core import Chainable, Module, Vars
-from .projection import Projection
+from ...core import Chainable, Module, Var
+from .projection import ProjectionBase
+# TODO

torchzero 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl