PyPI - torchzero - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

docs/source/conf.py +6 -4
docs/source/docstring template.py +46 -0
tests/test_identical.py +2 -3
tests/test_opts.py +115 -68
tests/test_tensorlist.py +2 -2
tests/test_vars.py +62 -61
torchzero/core/__init__.py +2 -3
torchzero/core/module.py +185 -53
torchzero/core/transform.py +327 -159
torchzero/modules/__init__.py +3 -1
torchzero/modules/clipping/clipping.py +120 -23
torchzero/modules/clipping/ema_clipping.py +37 -22
torchzero/modules/clipping/growth_clipping.py +20 -21
torchzero/modules/experimental/__init__.py +30 -4
torchzero/modules/experimental/absoap.py +53 -156
torchzero/modules/experimental/adadam.py +22 -15
torchzero/modules/experimental/adamY.py +21 -25
torchzero/modules/experimental/adam_lambertw.py +149 -0
torchzero/modules/{line_search/trust_region.py → experimental/adaptive_step_size.py} +37 -8
torchzero/modules/experimental/adasoap.py +24 -129
torchzero/modules/experimental/cosine.py +214 -0
torchzero/modules/experimental/cubic_adam.py +97 -0
torchzero/modules/experimental/curveball.py +12 -12
torchzero/modules/{projections → experimental}/dct.py +11 -11
torchzero/modules/experimental/eigendescent.py +120 -0
torchzero/modules/experimental/etf.py +195 -0
torchzero/modules/experimental/exp_adam.py +113 -0
torchzero/modules/experimental/expanded_lbfgs.py +141 -0
torchzero/modules/{projections → experimental}/fft.py +10 -10
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/hnewton.py +85 -0
torchzero/modules/{quasi_newton/experimental → experimental}/modular_lbfgs.py +49 -50
torchzero/modules/experimental/newton_solver.py +11 -11
torchzero/modules/experimental/newtonnewton.py +92 -0
torchzero/modules/experimental/parabolic_search.py +220 -0
torchzero/modules/experimental/reduce_outward_lr.py +10 -7
torchzero/modules/{projections/structural.py → experimental/structural_projections.py} +12 -54
torchzero/modules/experimental/subspace_preconditioners.py +20 -10
torchzero/modules/experimental/tensor_adagrad.py +42 -0
torchzero/modules/functional.py +12 -2
torchzero/modules/grad_approximation/fdm.py +31 -4
torchzero/modules/grad_approximation/forward_gradient.py +17 -7
torchzero/modules/grad_approximation/grad_approximator.py +69 -24
torchzero/modules/grad_approximation/rfdm.py +310 -50
torchzero/modules/higher_order/__init__.py +1 -0
torchzero/modules/higher_order/higher_order_newton.py +319 -0
torchzero/modules/line_search/__init__.py +4 -4
torchzero/modules/line_search/adaptive.py +99 -0
torchzero/modules/line_search/backtracking.py +75 -31
torchzero/modules/line_search/line_search.py +107 -49
torchzero/modules/line_search/polynomial.py +233 -0
torchzero/modules/line_search/scipy.py +20 -5
torchzero/modules/line_search/strong_wolfe.py +52 -36
torchzero/modules/misc/__init__.py +27 -0
torchzero/modules/misc/debug.py +48 -0
torchzero/modules/misc/escape.py +60 -0
torchzero/modules/misc/gradient_accumulation.py +70 -0
torchzero/modules/misc/misc.py +316 -0
torchzero/modules/misc/multistep.py +158 -0
torchzero/modules/misc/regularization.py +171 -0
torchzero/modules/misc/split.py +103 -0
torchzero/modules/{ops → misc}/switch.py +48 -7
torchzero/modules/momentum/__init__.py +1 -1
torchzero/modules/momentum/averaging.py +25 -10
torchzero/modules/momentum/cautious.py +115 -40
torchzero/modules/momentum/ema.py +92 -41
torchzero/modules/momentum/experimental.py +21 -13
torchzero/modules/momentum/matrix_momentum.py +145 -76
torchzero/modules/momentum/momentum.py +25 -4
torchzero/modules/ops/__init__.py +3 -31
torchzero/modules/ops/accumulate.py +51 -25
torchzero/modules/ops/binary.py +108 -62
torchzero/modules/ops/multi.py +95 -34
torchzero/modules/ops/reduce.py +31 -23
torchzero/modules/ops/unary.py +37 -21
torchzero/modules/ops/utility.py +53 -45
torchzero/modules/optimizers/__init__.py +12 -3
torchzero/modules/optimizers/adagrad.py +48 -29
torchzero/modules/optimizers/adahessian.py +223 -0
torchzero/modules/optimizers/adam.py +35 -37
torchzero/modules/optimizers/adan.py +110 -0
torchzero/modules/optimizers/adaptive_heavyball.py +57 -0
torchzero/modules/optimizers/esgd.py +171 -0
torchzero/modules/optimizers/ladagrad.py +183 -0
torchzero/modules/optimizers/lion.py +4 -4
torchzero/modules/optimizers/mars.py +91 -0
torchzero/modules/optimizers/msam.py +186 -0
torchzero/modules/optimizers/muon.py +32 -7
torchzero/modules/optimizers/orthograd.py +4 -5
torchzero/modules/optimizers/rmsprop.py +19 -19
torchzero/modules/optimizers/rprop.py +89 -52
torchzero/modules/optimizers/sam.py +163 -0
torchzero/modules/optimizers/shampoo.py +55 -27
torchzero/modules/optimizers/soap.py +40 -37
torchzero/modules/optimizers/sophia_h.py +82 -25
torchzero/modules/projections/__init__.py +2 -4
torchzero/modules/projections/cast.py +51 -0
torchzero/modules/projections/galore.py +4 -2
torchzero/modules/projections/projection.py +212 -118
torchzero/modules/quasi_newton/__init__.py +44 -5
torchzero/modules/quasi_newton/cg.py +190 -39
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +163 -0
torchzero/modules/quasi_newton/lbfgs.py +154 -97
torchzero/modules/quasi_newton/lsr1.py +102 -58
torchzero/modules/quasi_newton/quasi_newton.py +1032 -177
torchzero/modules/quasi_newton/trust_region.py +397 -0
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/newton.py +245 -54
torchzero/modules/second_order/newton_cg.py +311 -21
torchzero/modules/second_order/nystrom.py +124 -21
torchzero/modules/smoothing/gaussian.py +55 -21
torchzero/modules/smoothing/laplacian.py +20 -12
torchzero/modules/step_size/__init__.py +2 -0
torchzero/modules/step_size/adaptive.py +122 -0
torchzero/modules/step_size/lr.py +154 -0
torchzero/modules/weight_decay/__init__.py +1 -1
torchzero/modules/weight_decay/weight_decay.py +126 -10
torchzero/modules/wrappers/optim_wrapper.py +40 -12
torchzero/optim/wrappers/directsearch.py +281 -0
torchzero/optim/wrappers/fcmaes.py +105 -0
torchzero/optim/wrappers/mads.py +89 -0
torchzero/optim/wrappers/nevergrad.py +20 -5
torchzero/optim/wrappers/nlopt.py +28 -14
torchzero/optim/wrappers/optuna.py +70 -0
torchzero/optim/wrappers/scipy.py +167 -16
torchzero/utils/__init__.py +3 -7
torchzero/utils/derivatives.py +5 -4
torchzero/utils/linalg/__init__.py +1 -1
torchzero/utils/linalg/solve.py +251 -12
torchzero/utils/numberlist.py +2 -0
torchzero/utils/optimizer.py +55 -74
torchzero/utils/python_tools.py +27 -4
torchzero/utils/tensorlist.py +40 -28
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/METADATA +76 -51
torchzero-0.3.11.dist-info/RECORD +159 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/WHEEL +1 -1
torchzero/core/preconditioner.py +0 -138
torchzero/modules/experimental/algebraic_newton.py +0 -145
torchzero/modules/experimental/soapy.py +0 -290
torchzero/modules/experimental/spectral.py +0 -288
torchzero/modules/experimental/structured_newton.py +0 -111
torchzero/modules/experimental/tropical_newton.py +0 -136
torchzero/modules/lr/__init__.py +0 -2
torchzero/modules/lr/lr.py +0 -59
torchzero/modules/lr/step_size.py +0 -97
torchzero/modules/ops/debug.py +0 -25
torchzero/modules/ops/misc.py +0 -419
torchzero/modules/ops/split.py +0 -75
torchzero/modules/quasi_newton/experimental/__init__.py +0 -1
torchzero/modules/quasi_newton/olbfgs.py +0 -196
torchzero-0.3.9.dist-info/RECORD +0 -131
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/licenses/LICENSE +0 -0
{torchzero-0.3.9.dist-info → torchzero-0.3.11.dist-info}/top_level.txt +0 -0

torchzero/optim/wrappers/nlopt.py CHANGED Viewed

@@ -69,7 +69,7 @@ def _ensure_tensor(x):
 inf = float('inf')
 Closure = Callable[[bool], Any]
-class NLOptOptimizer(Optimizer):
+class NLOptWrapper(Optimizer):
     """Use nlopt as pytorch optimizer, with gradient supplied by pytorch autograd.
     Note that this performs full minimization on each step,
     so usually you would want to perform a single step, although performing multiple steps will refine the
@@ -96,9 +96,9 @@ class NLOptOptimizer(Optimizer):
         self,
         params,
         algorithm: int | _ALGOS_LITERAL,
-        maxeval: int | None,
         lb: float | None = None,
         ub: float | None = None,
+        maxeval: int | None = 10000, # None can stall on some algos and because they are threaded C you can't even interrupt them
         stopval: float | None = None,
         ftol_rel: float | None = None,
         ftol_abs: float | None = None,
@@ -122,22 +122,33 @@ class NLOptOptimizer(Optimizer):
         self._last_loss = None
     def _f(self, x: np.ndarray, grad: np.ndarray, closure, params: TensorList):
-        t = _ensure_tensor(x)
-        if t is None:
+        if self.raised:
             if self.opt is not None: self.opt.force_stop()
-            return None
-        params.from_vec_(t.to(params[0], copy=False))
-        if grad.size > 0:
-            with torch.enable_grad(): loss = closure()
-            self._last_loss = _ensure_float(loss)
-            grad[:] = params.ensure_grad_().grad.to_vec().reshape(grad.shape).detach().cpu().numpy()
+            return np.inf
+        try:
+            t = _ensure_tensor(x)
+            if t is None:
+                if self.opt is not None: self.opt.force_stop()
+                return None
+            params.from_vec_(t.to(params[0], copy=False))
+            if grad.size > 0:
+                with torch.enable_grad(): loss = closure()
+                self._last_loss = _ensure_float(loss)
+                grad[:] = params.ensure_grad_().grad.to_vec().reshape(grad.shape).detach().cpu().numpy()
+                return self._last_loss
+            self._last_loss = _ensure_float(closure(False))
             return self._last_loss
-        self._last_loss = _ensure_float(closure(False))
-        return self._last_loss
+        except Exception as e:
+            self.e = e
+            self.raised = True
+            if self.opt is not None: self.opt.force_stop()
+            return np.inf
     @torch.no_grad
     def step(self, closure: Closure): # pylint: disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
+        self.e = None
+        self.raised = False
         params = self.get_params()
         # make bounds
@@ -175,6 +186,9 @@ class NLOptOptimizer(Optimizer):
         except Exception as e:
             raise e from None
+        if x is not None: params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        if self.e is not None: raise self.e from None
         if self._last_loss is None or x is None: return closure(False)
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         return self._last_loss

torchzero/optim/wrappers/optuna.py ADDED Viewed

@@ -0,0 +1,70 @@
+import typing
+from collections import abc
+import numpy as np
+import torch
+import optuna
+from ...utils import Optimizer
+def silence_optuna():
+    optuna.logging.set_verbosity(optuna.logging.WARNING)
+def _ensure_float(x) -> float:
+    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
+    if isinstance(x, np.ndarray): return float(x.item())
+    return float(x)
+class OptunaSampler(Optimizer):
+    """Optimize your next SOTA model using hyperparameter optimization.
+    Note - optuna is surprisingly scalable to large number of parameters (up to 10,000), despite literally requiring a for-loop because it only supports scalars. Default TPESampler is good for BBO. Maybe not for NNs...
+    Args:
+        params: iterable of parameters to optimize or dicts defining parameter groups.
+        lb (float): lower bounds.
+        ub (float): upper bounds.
+        sampler (optuna.samplers.BaseSampler | type[optuna.samplers.BaseSampler] | None, optional): sampler. Defaults to None.
+        silence (bool, optional): makes optuna not write a lot of very useful information to console. Defaults to True.
+    """
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        sampler: "optuna.samplers.BaseSampler | type[optuna.samplers.BaseSampler] | None" = None,
+        silence: bool = True,
+    ):
+        if silence: silence_optuna()
+        super().__init__(params, lb=lb, ub=ub)
+        if isinstance(sampler, type): sampler = sampler()
+        self.sampler = sampler
+        self.study = None
+    @torch.no_grad
+    def step(self, closure):
+        params = self.get_params()
+        if self.study is None:
+            self.study = optuna.create_study(sampler=self.sampler)
+        # some optuna samplers use torch
+        with torch.enable_grad():
+            trial = self.study.ask()
+            suggested = []
+            for gi,g in enumerate(self.param_groups):
+                for pi,p in enumerate(g['params']):
+                    lb, ub =  g['lb'], g['ub']
+                    suggested.extend(trial.suggest_float(f'g{gi}_p{pi}_w{i}', lb, ub) for i in range(p.numel()))
+        vec = torch.as_tensor(suggested).to(params[0])
+        params.from_vec_(vec)
+        loss = closure()
+        with torch.enable_grad(): self.study.tell(trial, loss)
+        return loss

torchzero/optim/wrappers/scipy.py CHANGED Viewed

@@ -11,9 +11,9 @@ from ...utils import Optimizer, TensorList
 from ...utils.derivatives import jacobian_and_hessian_mat_wrt, jacobian_wrt
 from ...modules.second_order.newton import tikhonov_
-def _ensure_float(x):
+def _ensure_float(x) -> float:
     if isinstance(x, torch.Tensor): return x.detach().cpu().item()
-    if isinstance(x, np.ndarray): return x.item()
+    if isinstance(x, np.ndarray): return float(x.item())
     return float(x)
 def _ensure_numpy(x):
@@ -139,9 +139,11 @@ class ScipyMinimize(Optimizer):
         # make bounds
         lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
+        bounds = None
+        if any(b is not None for b in lb) or any(b is not None for b in ub):
+            bounds = []
+            for p, l, u in zip(params, lb, ub):
+                bounds.extend([(l, u)] * p.numel())
         if self.method is not None and (self.method.lower() == 'tnc' or self.method.lower() == 'slsqp'):
             x0 = x0.astype(np.float64) # those methods error without this
@@ -265,7 +267,8 @@ class ScipyDE(Optimizer):
     def __init__(
         self,
         params,
-        bounds: tuple[float,float],
+        lb: float,
+        ub: float,
         strategy: Literal['best1bin', 'best1exp', 'rand1bin', 'rand1exp', 'rand2bin', 'rand2exp',
             'randtobest1bin', 'randtobest1exp', 'currenttobest1bin', 'currenttobest1exp',
             'best2exp', 'best2bin'] = 'best1bin',
@@ -287,12 +290,11 @@ class ScipyDE(Optimizer):
         integrality = None,
     ):
-        super().__init__(params, {})
+        super().__init__(params, lb=lb, ub=ub)
         kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['bounds'], kwargs['__class__']
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
         self._kwargs = kwargs
-        self._lb, self._ub = bounds
     def _objective(self, x: np.ndarray, params: TensorList, closure):
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
@@ -303,7 +305,11 @@ class ScipyDE(Optimizer):
         params = self.get_params()
         x0 = params.to_vec().detach().cpu().numpy()
-        bounds = [(self._lb, self._ub)] * len(x0)
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([(l, u)] * p.numel())
         res = scipy.optimize.differential_evolution(
             partial(self._objective, params = params, closure = closure),
@@ -321,7 +327,8 @@ class ScipyDualAnnealing(Optimizer):
     def __init__(
         self,
         params,
-        bounds: tuple[float, float],
+        lb: float,
+        ub: float,
         maxiter=1000,
         minimizer_kwargs=None,
         initial_temp=5230.0,
@@ -332,23 +339,25 @@ class ScipyDualAnnealing(Optimizer):
         rng=None,
         no_local_search=False,
     ):
-        super().__init__(params, {})
+        super().__init__(params, lb=lb, ub=ub)
         kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['bounds'], kwargs['__class__']
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
         self._kwargs = kwargs
-        self._lb, self._ub = bounds
     def _objective(self, x: np.ndarray, params: TensorList, closure):
         params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         return _ensure_float(closure(False))
     @torch.no_grad
-    def step(self, closure: Closure):# pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
+    def step(self, closure: Closure):
         params = self.get_params()
         x0 = params.to_vec().detach().cpu().numpy()
-        bounds = [(self._lb, self._ub)] * len(x0)
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([(l, u)] * p.numel())
         res = scipy.optimize.dual_annealing(
             partial(self._objective, params = params, closure = closure),
@@ -360,3 +369,145 @@ class ScipyDualAnnealing(Optimizer):
         params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
         return res.fun
+class ScipySHGO(Optimizer):
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        constraints = None,
+        n: int = 100,
+        iters: int = 1,
+        callback = None,
+        minimizer_kwargs = None,
+        options = None,
+        sampling_method: str = 'simplicial',
+    ):
+        super().__init__(params, lb=lb, ub=ub)
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([(l, u)] * p.numel())
+        res = scipy.optimize.shgo(
+            partial(self._objective, params = params, closure = closure),
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return res.fun
+class ScipyDIRECT(Optimizer):
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        maxfun: int | None = 1000,
+        maxiter: int = 1000,
+        eps: float = 0.0001,
+        locally_biased: bool = True,
+        f_min: float = -np.inf,
+        f_min_rtol: float = 0.0001,
+        vol_tol: float = 1e-16,
+        len_tol: float = 0.000001,
+        callback = None,
+    ):
+        super().__init__(params, lb=lb, ub=ub)
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
+        if self.raised: return np.inf
+        try:
+            params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+            return _ensure_float(closure(False))
+        except Exception as e:
+            # he he he ha, I found a way to make exceptions work in fcmaes and scipy direct
+            self.e = e
+            self.raised = True
+            return np.inf
+    @torch.no_grad
+    def step(self, closure: Closure):
+        self.raised = False
+        self.e = None
+        params = self.get_params()
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([(l, u)] * p.numel())
+        res = scipy.optimize.direct(
+            partial(self._objective, params=params, closure=closure),
+            bounds=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        if self.e is not None: raise self.e from None
+        return res.fun
+class ScipyBrute(Optimizer):
+    def __init__(
+        self,
+        params,
+        lb: float,
+        ub: float,
+        Ns: int = 20,
+        full_output: int = 0,
+        finish = scipy.optimize.fmin,
+        disp: bool = False,
+        workers: int = 1
+    ):
+        super().__init__(params, lb=lb, ub=ub)
+        kwargs = locals().copy()
+        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
+        self._kwargs = kwargs
+    def _objective(self, x: np.ndarray, params: TensorList, closure):
+        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return _ensure_float(closure(False))
+    @torch.no_grad
+    def step(self, closure: Closure):
+        params = self.get_params()
+        lb, ub = self.group_vals('lb', 'ub', cls=list)
+        bounds = []
+        for p, l, u in zip(params, lb, ub):
+            bounds.extend([(l, u)] * p.numel())
+        x0 = scipy.optimize.brute(
+            partial(self._objective, params = params, closure = closure),
+            ranges=bounds,
+            **self._kwargs
+        )
+        params.from_vec_(torch.from_numpy(x0).to(device = params[0].device, dtype=params[0].dtype, copy=False))
+        return None

torchzero/utils/__init__.py CHANGED Viewed

@@ -9,11 +9,7 @@ from .optimizer import (
     get_group_vals,
     get_params,
     get_state_vals,
-    grad_at_params,
-    grad_vec_at_params,
-    loss_at_params,
-    loss_grad_at_params,
-    loss_grad_vec_at_params,
+    unpack_states,
 )
 from .params import (
     Params,
@@ -22,6 +18,6 @@ from .params import (
     _copy_param_groups,
     _make_param_groups,
 )
-from .python_tools import flatten, generic_eq, reduce_dim
-from .tensorlist import TensorList, as_tensorlist, Distributions, generic_clamp, generic_numel, generic_vector_norm, generic_zeros_like, generic_randn_like
+from .python_tools import flatten, generic_eq, generic_ne, reduce_dim, unpack_dicts
+from .tensorlist import TensorList, as_tensorlist, Distributions, generic_clamp, generic_numel, generic_vector_norm, generic_zeros_like, generic_randn_like, generic_finfo_eps
 from .torch_tools import tofloat, tolist, tonumpy, totensor, vec_to_tensors, vec_to_tensors_, set_storage_

torchzero/utils/derivatives.py CHANGED Viewed

@@ -2,6 +2,7 @@ from collections.abc import Iterable, Sequence
 import torch
 import torch.autograd.forward_ad as fwAD
+from typing import Literal
 from .torch_tools import swap_tensors_no_use_count_check, vec_to_tensors
@@ -157,7 +158,7 @@ def hessian_mat(
     method="func",
     vectorize=False,
     outer_jacobian_strategy="reverse-mode",
-):
+) -> torch.Tensor:
     """
     returns hessian matrix for parameters (as if they were flattened and concatenated into a vector).
@@ -189,7 +190,7 @@ def hessian_mat(
         return loss
     if method == 'func':
-        return torch.func.hessian(func)(torch.cat([p.view(-1) for p in params]).detach().requires_grad_(create_graph))
+        return torch.func.hessian(func)(torch.cat([p.view(-1) for p in params]).detach().requires_grad_(create_graph)) # pyright:ignore[reportReturnType]
     if method == 'autograd.functional':
         return torch.autograd.functional.hessian(
@@ -198,7 +199,7 @@ def hessian_mat(
             create_graph=create_graph,
             vectorize=vectorize,
             outer_jacobian_strategy=outer_jacobian_strategy,
-        )
+        ) # pyright:ignore[reportReturnType]
     raise ValueError(method)
 def jvp(fn, params: Iterable[torch.Tensor], tangent: Iterable[torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
@@ -510,4 +511,4 @@ def hvp_fd_forward(
     torch._foreach_div_(hvp_, h)
     if normalize: torch._foreach_mul_(hvp_, vec_norm)
-    return loss, hvp_
+    return loss, hvp_

torchzero/utils/linalg/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@ from .matrix_funcs import inv_sqrt_2x2, eigvals_func, singular_vals_func, matrix
 from .orthogonalize import gram_schmidt
 from .qr import qr_householder
 from .svd import randomized_svd
-from .solve import cg, nystrom_approximation, nystrom_sketch_and_solve
+from .solve import cg, nystrom_approximation, nystrom_sketch_and_solve, steihaug_toint_cg

torchzero 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

torchzero 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl