PyPI - torchzero - Versions diffs - 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +47 -36
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +8 -2
torchzero/core/chain.py +47 -0
torchzero/core/functional.py +103 -0
torchzero/core/modular.py +233 -0
torchzero/core/module.py +132 -643
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +56 -23
torchzero/core/transform.py +261 -365
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +132 -34
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +0 -1
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +3 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/{higher_order → experimental}/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +20 -17
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +5 -5
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +8 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/__init__.py +1 -1
torchzero/modules/line_search/_polyinterp.py +3 -1
torchzero/modules/line_search/adaptive.py +3 -3
torchzero/modules/line_search/backtracking.py +3 -3
torchzero/modules/line_search/interpolation.py +160 -0
torchzero/modules/line_search/line_search.py +42 -51
torchzero/modules/line_search/strong_wolfe.py +5 -5
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +63 -61
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/__init__.py +2 -0
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +25 -16
torchzero/modules/quasi_newton/sg2.py +292 -0
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +6 -3
torchzero/modules/second_order/ifn.py +58 -0
torchzero/modules/second_order/inm.py +101 -0
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +105 -228
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +158 -178
torchzero/modules/second_order/rsn.py +237 -0
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +57 -50
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +112 -88
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/modules/experimental/momentum.py +0 -160
torchzero/modules/higher_order/__init__.py +0 -1
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.14.dist-info/RECORD +0 -167
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.14.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/optim/wrappers/scipy.py DELETED Viewed

@@ -1,572 +0,0 @@
-from collections import abc
-from collections.abc import Callable
-from functools import partial
-from typing import Any, Literal
-import numpy as np
-import torch
-import scipy.optimize
-from ...utils import Optimizer, TensorList
-from ...utils.derivatives import (
-    flatten_jacobian,
-    jacobian_and_hessian_mat_wrt,
-    jacobian_wrt,
-)
-def _ensure_float(x) -> float:
-    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
-    if isinstance(x, np.ndarray): return float(x.item())
-    return float(x)
-def _ensure_numpy(x):
-    if isinstance(x, torch.Tensor): return x.detach().cpu()
-    if isinstance(x, np.ndarray): return x
-    return np.array(x)
-Closure = Callable[[bool], Any]
-class ScipyMinimize(Optimizer):
-    """Use scipy.minimize.optimize as pytorch optimizer. Note that this performs full minimization on each step,
-    so usually you would want to perform a single step, although performing multiple steps will refine the
-    solution.
-    Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
-    for a detailed description of args.
-    Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
-        method (str | None, optional): type of solver.
-            If None, scipy will select one of BFGS, L-BFGS-B, SLSQP,
-            depending on whether or not the problem has constraints or bounds.
-            Defaults to None.
-        bounds (optional): bounds on variables. Defaults to None.
-        constraints (tuple, optional): constraints definition. Defaults to ().
-        tol (float | None, optional): Tolerance for termination. Defaults to None.
-        callback (Callable | None, optional): A callable called after each iteration. Defaults to None.
-        options (dict | None, optional): A dictionary of solver options. Defaults to None.
-        jac (str, optional): Method for computing the gradient vector.
-            Only for CG, BFGS, Newton-CG, L-BFGS-B, TNC, SLSQP, dogleg, trust-ncg, trust-krylov, trust-exact and trust-constr.
-            In addition to scipy options, this supports 'autograd', which uses pytorch autograd.
-            This setting is ignored for methods that don't require gradient. Defaults to 'autograd'.
-        hess (str, optional):
-            Method for computing the Hessian matrix.
-            Only for Newton-CG, dogleg, trust-ncg, trust-krylov, trust-exact and trust-constr.
-            This setting is ignored for methods that don't require hessian. Defaults to 'autograd'.
-        tikhonov (float, optional):
-            optional hessian regularizer value. Only has effect for methods that require hessian.
-    """
-    def __init__(
-        self,
-        params,
-        method: Literal['nelder-mead', 'powell', 'cg', 'bfgs', 'newton-cg',
-                    'l-bfgs-b', 'tnc', 'cobyla', 'cobyqa', 'slsqp',
-                    'trust-constr', 'dogleg', 'trust-ncg', 'trust-exact',
-                    'trust-krylov'] | str | None = None,
-        lb = None,
-        ub = None,
-        constraints = (),
-        tol: float | None = None,
-        callback = None,
-        options = None,
-        jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
-        hess: Literal['2-point', '3-point', 'cs', 'autograd'] | scipy.optimize.HessianUpdateStrategy = 'autograd',
-    ):
-        defaults = dict(lb=lb, ub=ub)
-        super().__init__(params, defaults)
-        self.method = method
-        self.constraints = constraints
-        self.tol = tol
-        self.callback = callback
-        self.options = options
-        self.jac = jac
-        self.hess = hess
-        self.use_jac_autograd = jac.lower() == 'autograd' and (method is None or method.lower() in [
-            'cg', 'bfgs', 'newton-cg', 'l-bfgs-b', 'tnc', 'slsqp', 'dogleg',
-            'trust-ncg', 'trust-krylov', 'trust-exact', 'trust-constr',
-        ])
-        self.use_hess_autograd = isinstance(hess, str) and hess.lower() == 'autograd' and method is not None and method.lower() in [
-            'newton-cg', 'dogleg', 'trust-ncg', 'trust-krylov', 'trust-exact'
-        ]
-        # jac in scipy is '2-point', '3-point', 'cs', True or None.
-        if self.jac == 'autograd':
-            if self.use_jac_autograd: self.jac = True
-            else: self.jac = None
-    def _hess(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        with torch.enable_grad():
-            value = closure(False)
-            _, H = jacobian_and_hessian_mat_wrt([value], wrt = params)
-        return H.numpy(force=True)
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        # set params to x
-        params.from_vec_(torch.from_numpy(x).to(params[0], copy=False))
-        # return value and maybe gradients
-        if self.use_jac_autograd:
-            with torch.enable_grad(): value = _ensure_float(closure())
-            grad = params.ensure_grad_().grad.to_vec().numpy(force=True)
-            # slsqp requires float64
-            if self.method.lower() == 'slsqp': grad = grad.astype(np.float64)
-            return value, grad
-        return _ensure_float(closure(False))
-    @torch.no_grad
-    def step(self, closure: Closure):# pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
-        params = self.get_params()
-        # determine hess argument
-        if self.hess == 'autograd':
-            if self.use_hess_autograd: hess = partial(self._hess, params = params, closure = closure)
-            else: hess = None
-        else: hess = self.hess
-        x0 = params.to_vec().numpy(force=True)
-        # make bounds
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = None
-        if any(b is not None for b in lb) or any(b is not None for b in ub):
-            bounds = []
-            for p, l, u in zip(params, lb, ub):
-                bounds.extend([(l, u)] * p.numel())
-        if self.method is not None and (self.method.lower() == 'tnc' or self.method.lower() == 'slsqp'):
-            x0 = x0.astype(np.float64) # those methods error without this
-        res = scipy.optimize.minimize(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            method=self.method,
-            bounds=bounds,
-            constraints=self.constraints,
-            tol=self.tol,
-            callback=self.callback,
-            options=self.options,
-            jac = self.jac,
-            hess = hess,
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyRootOptimization(Optimizer):
-    """Optimization via using scipy.optimize.root on gradients, mainly for experimenting!
-    Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
-        method (str | None, optional): _description_. Defaults to None.
-        tol (float | None, optional): _description_. Defaults to None.
-        callback (_type_, optional): _description_. Defaults to None.
-        options (_type_, optional): _description_. Defaults to None.
-        jac (T.Literal[&#39;2, optional): _description_. Defaults to 'autograd'.
-    """
-    def __init__(
-        self,
-        params,
-        method: Literal[
-            "hybr",
-            "lm",
-            "broyden1",
-            "broyden2",
-            "anderson",
-            "linearmixing",
-            "diagbroyden",
-            "excitingmixing",
-            "krylov",
-            "df-sane",
-        ] = 'hybr',
-        tol: float | None = None,
-        callback = None,
-        options = None,
-        jac: Literal['2-point', '3-point', 'cs', 'autograd'] = 'autograd',
-    ):
-        super().__init__(params, {})
-        self.method = method
-        self.tol = tol
-        self.callback = callback
-        self.options = options
-        self.jac = jac
-        if self.jac == 'autograd': self.jac = True
-        # those don't require jacobian
-        if self.method.lower() in ('broyden1', 'broyden2', 'anderson', 'linearmixing', 'diagbroyden', 'excitingmixing', 'krylov', 'df-sane'):
-            self.jac = None
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        # set params to x
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        # return gradients and maybe hessian
-        if self.jac:
-            with torch.enable_grad():
-                self.value = closure(False)
-                if not isinstance(self.value, torch.Tensor):
-                    raise TypeError(f"Autograd jacobian requires closure to return torch.Tensor, got {type(self.value)}")
-                g, H = jacobian_and_hessian_mat_wrt([self.value], wrt=params)
-            return g.detach().cpu().numpy(), H.detach().cpu().numpy()
-        # return the gradients
-        with torch.enable_grad(): self.value = closure()
-        jac = params.ensure_grad_().grad.to_vec()
-        return jac.detach().cpu().numpy()
-    @torch.no_grad
-    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
-        params = self.get_params()
-        x0 = params.to_vec().detach().cpu().numpy()
-        res = scipy.optimize.root(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            method=self.method,
-            tol=self.tol,
-            callback=self.callback,
-            options=self.options,
-            jac = self.jac,
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyLeastSquaresOptimization(Optimizer):
-    """Optimization via using scipy.optimize.least_squares on gradients, mainly for experimenting!
-    Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
-        method (str | None, optional): _description_. Defaults to None.
-        tol (float | None, optional): _description_. Defaults to None.
-        callback (_type_, optional): _description_. Defaults to None.
-        options (_type_, optional): _description_. Defaults to None.
-        jac (T.Literal[&#39;2, optional): _description_. Defaults to 'autograd'.
-    """
-    def __init__(
-        self,
-        params,
-        method='trf',
-        jac='autograd',
-        bounds=(-np.inf, np.inf),
-        ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
-        f_scale=1.0, diff_step=None, tr_solver=None, tr_options=None,
-        jac_sparsity=None, max_nfev=None, verbose=0
-    ):
-        super().__init__(params, {})
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['__class__'], kwargs['jac']
-        self._kwargs = kwargs
-        self.jac = jac
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        # set params to x
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        # return the gradients
-        with torch.enable_grad(): self.value = closure()
-        jac = params.ensure_grad_().grad.to_vec()
-        return jac.numpy(force=True)
-    def _hess(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        with torch.enable_grad():
-            value = closure(False)
-            _, H = jacobian_and_hessian_mat_wrt([value], wrt = params)
-        return H.numpy(force=True)
-    @torch.no_grad
-    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
-        params = self.get_params()
-        x0 = params.to_vec().detach().cpu().numpy()
-        if self.jac == 'autograd': jac = partial(self._hess, params = params, closure = closure)
-        else: jac = self.jac
-        res = scipy.optimize.least_squares(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            jac=jac, # type:ignore
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyDE(Optimizer):
-    """Use scipy.minimize.differential_evolution as pytorch optimizer. Note that this performs full minimization on each step,
-    so usually you would want to perform a single step. This also requires bounds to be specified.
-    Please refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
-    for all other args.
-    Args:
-        params: iterable of parameters to optimize or dicts defining parameter groups.
-        bounds (tuple[float,float], optional): tuple with lower and upper bounds.
-            DE requires bounds to be specified. Defaults to None.
-        other args:
-            refer to https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html
-    """
-    def __init__(
-        self,
-        params,
-        lb: float,
-        ub: float,
-        strategy: Literal['best1bin', 'best1exp', 'rand1bin', 'rand1exp', 'rand2bin', 'rand2exp',
-            'randtobest1bin', 'randtobest1exp', 'currenttobest1bin', 'currenttobest1exp',
-            'best2exp', 'best2bin'] = 'best1bin',
-        maxiter: int = 1000,
-        popsize: int = 15,
-        tol: float = 0.01,
-        mutation = (0.5, 1),
-        recombination: float = 0.7,
-        seed = None,
-        callback = None,
-        disp: bool = False,
-        polish: bool = False,
-        init: str = 'latinhypercube',
-        atol: int = 0,
-        updating: str = 'immediate',
-        workers: int = 1,
-        constraints = (),
-        *,
-        integrality = None,
-    ):
-        super().__init__(params, lb=lb, ub=ub)
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
-        self._kwargs = kwargs
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return _ensure_float(closure(False))
-    @torch.no_grad
-    def step(self, closure: Closure): # pylint:disable = signature-differs # pyright:ignore[reportIncompatibleMethodOverride]
-        params = self.get_params()
-        x0 = params.to_vec().detach().cpu().numpy()
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
-        res = scipy.optimize.differential_evolution(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            bounds=bounds,
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyDualAnnealing(Optimizer):
-    def __init__(
-        self,
-        params,
-        lb: float,
-        ub: float,
-        maxiter=1000,
-        minimizer_kwargs=None,
-        initial_temp=5230.0,
-        restart_temp_ratio=2.0e-5,
-        visit=2.62,
-        accept=-5.0,
-        maxfun=1e7,
-        rng=None,
-        no_local_search=False,
-    ):
-        super().__init__(params, lb=lb, ub=ub)
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
-        self._kwargs = kwargs
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return _ensure_float(closure(False))
-    @torch.no_grad
-    def step(self, closure: Closure):
-        params = self.get_params()
-        x0 = params.to_vec().detach().cpu().numpy()
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
-        res = scipy.optimize.dual_annealing(
-            partial(self._objective, params = params, closure = closure),
-            x0 = x0,
-            bounds=bounds,
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipySHGO(Optimizer):
-    def __init__(
-        self,
-        params,
-        lb: float,
-        ub: float,
-        constraints = None,
-        n: int = 100,
-        iters: int = 1,
-        callback = None,
-        minimizer_kwargs = None,
-        options = None,
-        sampling_method: str = 'simplicial',
-    ):
-        super().__init__(params, lb=lb, ub=ub)
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
-        self._kwargs = kwargs
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return _ensure_float(closure(False))
-    @torch.no_grad
-    def step(self, closure: Closure):
-        params = self.get_params()
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
-        res = scipy.optimize.shgo(
-            partial(self._objective, params = params, closure = closure),
-            bounds=bounds,
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return res.fun
-class ScipyDIRECT(Optimizer):
-    def __init__(
-        self,
-        params,
-        lb: float,
-        ub: float,
-        maxfun: int | None = 1000,
-        maxiter: int = 1000,
-        eps: float = 0.0001,
-        locally_biased: bool = True,
-        f_min: float = -np.inf,
-        f_min_rtol: float = 0.0001,
-        vol_tol: float = 1e-16,
-        len_tol: float = 0.000001,
-        callback = None,
-    ):
-        super().__init__(params, lb=lb, ub=ub)
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
-        self._kwargs = kwargs
-    def _objective(self, x: np.ndarray, params: TensorList, closure) -> float:
-        if self.raised: return np.inf
-        try:
-            params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-            return _ensure_float(closure(False))
-        except Exception as e:
-            # he he he ha, I found a way to make exceptions work in fcmaes and scipy direct
-            self.e = e
-            self.raised = True
-            return np.inf
-    @torch.no_grad
-    def step(self, closure: Closure):
-        self.raised = False
-        self.e = None
-        params = self.get_params()
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
-        res = scipy.optimize.direct(
-            partial(self._objective, params=params, closure=closure),
-            bounds=bounds,
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(res.x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        if self.e is not None: raise self.e from None
-        return res.fun
-class ScipyBrute(Optimizer):
-    def __init__(
-        self,
-        params,
-        lb: float,
-        ub: float,
-        Ns: int = 20,
-        full_output: int = 0,
-        finish = scipy.optimize.fmin,
-        disp: bool = False,
-        workers: int = 1
-    ):
-        super().__init__(params, lb=lb, ub=ub)
-        kwargs = locals().copy()
-        del kwargs['self'], kwargs['params'], kwargs['lb'], kwargs['ub'], kwargs['__class__']
-        self._kwargs = kwargs
-    def _objective(self, x: np.ndarray, params: TensorList, closure):
-        params.from_vec_(torch.from_numpy(x).to(device = params[0].device, dtype=params[0].dtype, copy=False))
-        return _ensure_float(closure(False))
-    @torch.no_grad
-    def step(self, closure: Closure):
-        params = self.get_params()
-        lb, ub = self.group_vals('lb', 'ub', cls=list)
-        bounds = []
-        for p, l, u in zip(params, lb, ub):
-            bounds.extend([(l, u)] * p.numel())
-        x0 = scipy.optimize.brute(
-            partial(self._objective, params = params, closure = closure),
-            ranges=bounds,
-            **self._kwargs
-        )
-        params.from_vec_(torch.from_numpy(x0).to(device = params[0].device, dtype=params[0].dtype, copy=False))

torchzero/utils/linalg/__init__.py DELETED Viewed

@@ -1,12 +0,0 @@
-from . import linear_operator
-from .matrix_funcs import (
-    eigvals_func,
-    inv_sqrt_2x2,
-    matrix_power_eigh,
-    singular_vals_func,
-    x_inv,
-)
-from .orthogonalize import gram_schmidt
-from .qr import qr_householder
-from .solve import cg, nystrom_approximation, nystrom_sketch_and_solve
-from .svd import randomized_svd

torchzero/utils/linalg/matrix_funcs.py DELETED Viewed

@@ -1,87 +0,0 @@
-import warnings
-from collections.abc import Callable
-import torch
-def eigvals_func(A: torch.Tensor, fn: Callable[[torch.Tensor], torch.Tensor]) -> torch.Tensor:
-    L, Q = torch.linalg.eigh(A) # pylint:disable=not-callable
-    L = fn(L)
-    return  (Q * L.unsqueeze(-2)) @ Q.mH
-def singular_vals_func(A: torch.Tensor, fn: Callable[[torch.Tensor], torch.Tensor]) -> torch.Tensor:
-    U, S, V = torch.linalg.svd(A) # pylint:disable=not-callable
-    S = fn(S)
-    return (U * S.unsqueeze(-2)) @ V.mT
-def matrix_power_eigh(A: torch.Tensor, pow:float):
-    L, Q = torch.linalg.eigh(A) # pylint:disable=not-callable
-    if pow % 2 != 0: L.clip_(min = torch.finfo(A.dtype).tiny * 2)
-    return (Q * L.pow(pow).unsqueeze(-2)) @ Q.mH
-def inv_sqrt_2x2(A: torch.Tensor, force_pd: bool=False) -> torch.Tensor:
-    """Inverse square root of a possibly batched 2x2 matrix using a general formula for 2x2 matrices so that this is way faster than torch linalg. I tried doing a hierarchical 2x2 preconditioning but it didn't work well."""
-    eps = torch.finfo(A.dtype).tiny * 2
-    a = A[..., 0, 0]
-    b = A[..., 0, 1]
-    c = A[..., 1, 0]
-    d = A[..., 1, 1]
-    det = (a * d).sub_(b * c)
-    trace = a + d
-    if force_pd:
-        # add smallest eigenvalue magnitude to diagonal to force PD
-        # could also abs or clip eigenvalues bc there is a formula for eigenvectors
-        term1 = trace/2
-        term2 = (trace.pow(2).div_(4).sub_(det)).clamp_(min=eps).sqrt_()
-        y1 = term1 + term2
-        y2 = term1 - term2
-        smallest_eigval = torch.minimum(y1, y2).neg_().clamp_(min=0) + eps
-        a = a+smallest_eigval
-        d = d+smallest_eigval
-        # recalculate det and trace witg new a and b
-        det = (a * d).sub_(b * c)
-        trace = a + d
-    s = (det.clamp(min=eps)).sqrt_()
-    tau_squared = trace + 2 * s
-    tau = (tau_squared.clamp(min=eps)).sqrt_()
-    denom = s * tau
-    coeff = (denom.clamp(min=eps)).reciprocal_().unsqueeze(-1).unsqueeze(-1)
-    row1 = torch.stack([d + s, -b], dim=-1)
-    row2 = torch.stack([-c, a + s], dim=-1)
-    M = torch.stack([row1, row2], dim=-2)
-    return coeff * M
-def x_inv(diag: torch.Tensor,antidiag: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-    """invert a matrix with diagonal and anti-diagonal non zero elements, with no checks that it is invertible"""
-    n = diag.shape[0]
-    if diag.dim() != 1 or antidiag.dim() != 1 or antidiag.shape[0] != n:
-        raise ValueError("Input tensors must be 1D and have the same size.")
-    if n == 0:
-        return torch.empty_like(diag), torch.empty_like(antidiag)
-    # opposite indexes
-    diag_rev = torch.flip(diag, dims=[0])
-    antidiag_rev = torch.flip(antidiag, dims=[0])
-    # determinants
-    # det_i = d[i] * d[n-1-i] - a[i] * a[n-1-i]
-    determinant_vec = diag * diag_rev - antidiag * antidiag_rev
-    # inverse diagonal elements: y_d[i] = d[n-1-i] / det_i
-    inv_diag_vec = diag_rev / determinant_vec
-    # inverse anti-diagonal elements: y_a[i] = -a[i] / det_i
-    inv_anti_diag_vec = -antidiag / determinant_vec
-    return inv_diag_vec, inv_anti_diag_vec

torchzero/utils/linalg/orthogonalize.py DELETED Viewed

@@ -1,12 +0,0 @@
-from typing import overload
-import torch
-from ..tensorlist import TensorList
-@overload
-def gram_schmidt(x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: ...
-@overload
-def gram_schmidt(x: TensorList, y: TensorList) -> tuple[TensorList, TensorList]: ...
-def gram_schmidt(x, y):
-    """makes two orthogonal vectors, only y is changed"""
-    min = torch.finfo(x.dtype).tiny * 2
-    return x, y - (x*y) / (x*x).clip(min=min)

torchzero 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.14py3-none-any.whl → 0.4.0py3-none-any.whl