PyPI - torchzero - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

tests/test_identical.py +22 -22
tests/test_opts.py +199 -198
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +1 -1
torchzero/core/functional.py +1 -1
torchzero/core/modular.py +5 -5
torchzero/core/module.py +2 -2
torchzero/core/objective.py +10 -10
torchzero/core/transform.py +1 -1
torchzero/linalg/__init__.py +3 -2
torchzero/linalg/eigh.py +223 -4
torchzero/linalg/orthogonalize.py +2 -4
torchzero/linalg/qr.py +12 -0
torchzero/linalg/solve.py +1 -3
torchzero/linalg/svd.py +47 -20
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +10 -10
torchzero/modules/adaptive/adahessian.py +2 -2
torchzero/modules/adaptive/adam.py +1 -1
torchzero/modules/adaptive/adan.py +1 -1
torchzero/modules/adaptive/adaptive_heavyball.py +1 -1
torchzero/modules/adaptive/esgd.py +2 -2
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +2 -1
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +2 -2
torchzero/modules/adaptive/matrix_momentum.py +1 -1
torchzero/modules/adaptive/msam.py +4 -4
torchzero/modules/adaptive/muon.py +9 -6
torchzero/modules/adaptive/natural_gradient.py +32 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rprop.py +2 -2
torchzero/modules/adaptive/sam.py +4 -4
torchzero/modules/adaptive/shampoo.py +28 -3
torchzero/modules/adaptive/soap.py +3 -3
torchzero/modules/adaptive/sophia_h.py +2 -2
torchzero/modules/clipping/clipping.py +7 -7
torchzero/modules/conjugate_gradient/cg.py +2 -2
torchzero/modules/experimental/__init__.py +5 -0
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +2 -2
torchzero/modules/experimental/newtonnewton.py +34 -40
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/rfdm.py +4 -4
torchzero/modules/least_squares/gn.py +68 -45
torchzero/modules/line_search/backtracking.py +2 -2
torchzero/modules/line_search/line_search.py +1 -1
torchzero/modules/line_search/strong_wolfe.py +2 -2
torchzero/modules/misc/escape.py +1 -1
torchzero/modules/misc/gradient_accumulation.py +1 -1
torchzero/modules/misc/misc.py +1 -1
torchzero/modules/misc/multistep.py +4 -7
torchzero/modules/misc/regularization.py +2 -2
torchzero/modules/misc/split.py +1 -1
torchzero/modules/misc/switch.py +2 -2
torchzero/modules/momentum/cautious.py +3 -3
torchzero/modules/momentum/momentum.py +1 -1
torchzero/modules/ops/higher_level.py +1 -1
torchzero/modules/ops/multi.py +1 -1
torchzero/modules/projections/projection.py +5 -2
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +3 -3
torchzero/modules/quasi_newton/lsr1.py +3 -3
torchzero/modules/quasi_newton/quasi_newton.py +44 -29
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +17 -17
torchzero/modules/second_order/inm.py +33 -25
torchzero/modules/second_order/newton.py +132 -130
torchzero/modules/second_order/newton_cg.py +3 -3
torchzero/modules/second_order/nystrom.py +83 -32
torchzero/modules/second_order/rsn.py +41 -44
torchzero/modules/smoothing/laplacian.py +1 -1
torchzero/modules/smoothing/sampling.py +2 -3
torchzero/modules/step_size/adaptive.py +6 -6
torchzero/modules/step_size/lr.py +2 -2
torchzero/modules/trust_region/cubic_regularization.py +1 -1
torchzero/modules/trust_region/levenberg_marquardt.py +2 -2
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/variance_reduction/svrg.py +4 -5
torchzero/modules/weight_decay/reinit.py +2 -2
torchzero/modules/weight_decay/weight_decay.py +5 -5
torchzero/modules/wrappers/optim_wrapper.py +4 -4
torchzero/modules/zeroth_order/cd.py +1 -1
torchzero/optim/mbs.py +291 -0
torchzero/optim/wrappers/nevergrad.py +0 -9
torchzero/optim/wrappers/optuna.py +2 -0
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/derivatives.py +4 -4
{torchzero-0.4.0.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
torchzero/modules/adaptive/lmadagrad.py +0 -241
torchzero-0.4.0.dist-info/RECORD +0 -191
/torchzero/modules/{functional.py → opt_utils.py} +0 -0
{torchzero-0.4.0.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.4.0.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

torchzero/modules/second_order/rsn.py CHANGED Viewed

@@ -6,10 +6,10 @@ from typing import Literal
 import torch
 from ...core import Chainable, Transform, HVPMethod
-from ...utils import vec_to_tensors
+from ...utils import vec_to_tensors_
 from ...linalg.linear_operator import Sketched
-from .newton import _newton_step
+from .newton import _newton_update_state_, _newton_solve
 def _qr_orthonormalize(A:torch.Tensor):
     m,n = A.shape
@@ -20,12 +20,10 @@ def _qr_orthonormalize(A:torch.Tensor):
     q, _ = torch.linalg.qr(A) # pylint:disable=not-callable
     return q
 def _orthonormal_sketch(m, n, dtype, device, generator):
     return _qr_orthonormalize(torch.randn(m, n, dtype=dtype, device=device, generator=generator))
-def _gaussian_sketch(m, n, dtype, device, generator):
-    return torch.randn(m, n, dtype=dtype, device=device, generator=generator) / math.sqrt(m)
 def _rademacher_sketch(m, n, dtype, device, generator):
     rademacher = torch.bernoulli(torch.full((m,n), 0.5), generator = generator).mul_(2).sub_(1)
     return rademacher.mul_(1 / math.sqrt(m))
@@ -37,11 +35,10 @@ class SubspaceNewton(Transform):
         sketch_size (int):
             size of the random sketch. This many hessian-vector products will need to be evaluated each step.
         sketch_type (str, optional):
+            - "common_directions" - uses history steepest descent directions as the basis[2]. It is orthonormalized on-line using Gram-Schmidt (default).
             - "orthonormal" - random orthonormal basis. Orthonormality is necessary to use linear operator based modules such as trust region, but it can be slower to compute.
-            - "rademacher" - approximately orthonormal scaled random rademacher basis.
-            - "gaussian" - random gaussian (not orthonormal) basis.
-            - "common_directions" - uses history steepest descent directions as the basis[2]. It is orthonormalized on-line using Gram-Schmidt.
-            - "mixed" - random orthonormal basis but with four directions set to gradient, slow and fast gradient EMAs, and previous update direction (default).
+            - "rademacher" - approximately orthonormal (if dimension is large) scaled random rademacher basis. It is recommended to use at least "orthonormal" - it requires QR but it is still very cheap.
+            - "mixed" - random orthonormal basis but with four directions set to gradient, slow and fast gradient EMAs, and previous update direction.
         damping (float, optional): hessian damping (scale of identity matrix added to hessian). Defaults to 0.
         hvp_method (str, optional):
             How to compute hessian-matrix product:
@@ -73,7 +70,7 @@ class SubspaceNewton(Transform):
     RSN with line search
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.RSN(),
         tz.m.Backtracking()
@@ -82,7 +79,7 @@ class SubspaceNewton(Transform):
     RSN with trust region
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.LevenbergMarquardt(tz.m.RSN()),
     )
@@ -97,14 +94,14 @@ class SubspaceNewton(Transform):
     def __init__(
         self,
         sketch_size: int,
-        sketch_type: Literal["orthonormal", "gaussian", "common_directions", "mixed"] = "mixed",
+        sketch_type: Literal["orthonormal", "common_directions", "mixed", "rademacher"] = "common_directions",
         damping:float=0,
+        eigval_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
+        update_freq: int = 1,
+        precompute_inverse: bool = False,
+        use_lstsq: bool = True,
         hvp_method: HVPMethod = "batched_autograd",
         h: float = 1e-2,
-        use_lstsq: bool = True,
-        update_freq: int = 1,
-        H_tfm: Callable[[torch.Tensor, torch.Tensor], tuple[torch.Tensor, bool]] | Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None,
-        eigval_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
         seed: int | None = None,
         inner: Chainable | None = None,
     ):
@@ -128,10 +125,7 @@ class SubspaceNewton(Transform):
         sketch_type = fs["sketch_type"]
         hvp_method = fs["hvp_method"]
-        if sketch_type in ('normal', 'gaussian'):
-            S = _gaussian_sketch(ndim, sketch_size, device=device, dtype=dtype, generator=generator)
-        elif sketch_type == "rademacher":
+        if sketch_type == "rademacher":
             S = _rademacher_sketch(ndim, sketch_size, device=device, dtype=dtype, generator=generator)
         elif sketch_type == 'orthonormal':
@@ -187,7 +181,7 @@ class SubspaceNewton(Transform):
             # form and orthogonalize sketching matrix
             S = torch.stack([g, slow_ema, fast_ema, prev_dir], dim=1)
             if sketch_size > 4:
-                S_random = _gaussian_sketch(ndim, sketch_size - 3, device=device, dtype=dtype, generator=generator)
+                S_random = torch.randn(ndim, sketch_size - 3, device=device, dtype=dtype, generator=generator) / math.sqrt(ndim)
                 S = torch.cat([S, S_random], dim=1)
             S = _qr_orthonormalize(S)
@@ -200,38 +194,41 @@ class SubspaceNewton(Transform):
                                                  hvp_method=fs["hvp_method"], h=fs["h"])
         H_sketched = S.T @ HS
-        self.global_state["H_sketched"] = H_sketched
+        # update state
+        _newton_update_state_(
+            state = self.global_state,
+            H = H_sketched,
+            damping = fs["damping"],
+            eigval_fn = fs["eigval_fn"],
+            precompute_inverse = fs["precompute_inverse"],
+            use_lstsq = fs["use_lstsq"]
+        )
         self.global_state["S"] = S
     def apply_states(self, objective, states, settings):
-        S: torch.Tensor = self.global_state["S"]
+        updates = objective.get_updates()
+        fs = settings[0]
-        d_proj = _newton_step(
-            objective=objective,
-            H=self.global_state["H_sketched"],
-            damping=self.defaults["damping"],
-            H_tfm=self.defaults["H_tfm"],
-            eigval_fn=self.defaults["eigval_fn"],
-            use_lstsq=self.defaults["use_lstsq"],
-            g_proj = lambda g: S.T @ g
-        )
+        S = self.global_state["S"]
+        b = torch.cat([t.ravel() for t in updates])
+        b_proj = S.T @ b
+        d_proj = _newton_solve(b=b_proj, state=self.global_state, use_lstsq=fs["use_lstsq"])
         d = S @ d_proj
-        objective.updates = vec_to_tensors(d, objective.params)
+        vec_to_tensors_(d, updates)
         return objective
     def get_H(self, objective=...):
-        eigval_fn = self.defaults["eigval_fn"]
-        H_sketched: torch.Tensor = self.global_state["H_sketched"]
-        S: torch.Tensor = self.global_state["S"]
-        if eigval_fn is not None:
-            try:
-                L, Q = torch.linalg.eigh(H_sketched) # pylint:disable=not-callable
-                L: torch.Tensor = eigval_fn(L)
-                H_sketched = Q @ L.diag_embed() @ Q.mH
+        if "H" in self.global_state:
+            H_sketched = self.global_state["H"]
-            except torch.linalg.LinAlgError:
-                pass
+        else:
+            L = self.global_state["L"]
+            Q = self.global_state["Q"]
+            H_sketched = Q @ L.diag_embed() @ Q.mH
+        S: torch.Tensor = self.global_state["S"]
         return Sketched(S, H_sketched)

torchzero/modules/smoothing/laplacian.py CHANGED Viewed

@@ -74,7 +74,7 @@ class LaplacianSmoothing(TensorTransform):
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.LaplacianSmoothing(),
         tz.m.LR(1e-2),

torchzero/modules/smoothing/sampling.py CHANGED Viewed

@@ -7,15 +7,14 @@ from typing import Literal, cast
 import torch
-from ...core import Chainable, Modular, Module, Objective
+from ...core import Chainable, Optimizer, Module, Objective
 from ...core.reformulation import Reformulation
 from ...utils import Distributions, NumberList, TensorList
 from ..termination import TerminationCriteriaBase, make_termination_criteria
 def _reset_except_self(objective: Objective, modules, self: Module):
-    assert objective.modular is not None
-    for m in objective.modular.flat_modules:
+    for m in modules:
         if m is not self:
             m.reset()

torchzero/modules/step_size/adaptive.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from ...core import Chainable, TensorTransform
 from ...utils import NumberList, TensorList, tofloat, unpack_dicts, unpack_states
 from ...linalg.linear_operator import ScaledIdentity
-from ..functional import epsilon_step_size
+from ..opt_utils import epsilon_step_size
 def _acceptable_alpha(alpha, param:torch.Tensor):
     finfo = torch.finfo(param.dtype)
@@ -16,7 +16,7 @@ def _acceptable_alpha(alpha, param:torch.Tensor):
         return False
     return True
-def _get_H(self: TensorTransform, var):
+def _get_scaled_identity_H(self: TensorTransform, var):
     n = sum(p.numel() for p in var.params)
     p = var.params[0]
     alpha = self.global_state.get('alpha', 1)
@@ -87,7 +87,7 @@ class PolyakStepSize(TensorTransform):
         return tensors
     def get_H(self, objective):
-        return _get_H(self, objective)
+        return _get_scaled_identity_H(self, objective)
 def _bb_short(s: TensorList, y: TensorList, sy, eps):
@@ -176,7 +176,7 @@ class BarzilaiBorwein(TensorTransform):
         prev_g.copy_(g)
     def get_H(self, objective):
-        return _get_H(self, objective)
+        return _get_scaled_identity_H(self, objective)
     @torch.no_grad
     def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
@@ -288,7 +288,7 @@ class BBStab(TensorTransform):
         prev_g.copy_(g)
     def get_H(self, objective):
-        return _get_H(self, objective)
+        return _get_scaled_identity_H(self, objective)
     @torch.no_grad
     def multi_tensor_apply(self, tensors, params, grads, loss, states, settings):
@@ -384,4 +384,4 @@ class AdGD(TensorTransform):
         return tensors
     def get_H(self, objective):
-        return _get_H(self, objective)
+        return _get_scaled_identity_H(self, objective)

torchzero/modules/step_size/lr.py CHANGED Viewed

@@ -51,7 +51,7 @@ class Warmup(TensorTransform):
         .. code-block:: python
-            opt = tz.Modular(
+            opt = tz.Optimizer(
                 model.parameters(),
                 tz.m.Adam(),
                 tz.m.LR(1e-2),
@@ -90,7 +90,7 @@ class WarmupNormClip(TensorTransform):
         .. code-block:: python
-            opt = tz.Modular(
+            opt = tz.Optimizer(
                 model.parameters(),
                 tz.m.Adam(),
                 tz.m.WarmupNormClip(steps=1000)

torchzero/modules/trust_region/cubic_regularization.py CHANGED Viewed

@@ -109,7 +109,7 @@ class CubicRegularization(TrustRegionBase):
         .. code-block:: python
-            opt = tz.Modular(
+            opt = tz.Optimizer(
                 model.parameters(),
                 tz.m.CubicRegularization(tz.m.Newton()),
             )

torchzero/modules/trust_region/levenberg_marquardt.py CHANGED Viewed

@@ -44,7 +44,7 @@ class LevenbergMarquardt(TrustRegionBase):
     Gauss-Newton with Levenberg-Marquardt trust-region
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.LevenbergMarquardt(tz.m.GaussNewton()),
     )
@@ -52,7 +52,7 @@ class LevenbergMarquardt(TrustRegionBase):
     LM-SR1
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.LevenbergMarquardt(tz.m.SR1(inverse=False)),
     )

torchzero/modules/trust_region/trust_cg.py CHANGED Viewed

@@ -47,7 +47,7 @@ class TrustCG(TrustRegionBase):
         .. code-block:: python
-            opt = tz.Modular(
+            opt = tz.Optimizer(
                 model.parameters(),
                 tz.m.TrustCG(hess_module=tz.m.SR1(inverse=False)),
             )

torchzero/modules/variance_reduction/svrg.py CHANGED Viewed

@@ -8,8 +8,7 @@ from ...utils import tofloat
 def _reset_except_self(objective: Objective, modules, self: Module):
-    assert objective.modular is not None
-    for m in objective.modular.flat_modules:
+    for m in modules:
         if m is not self:
             m.reset()
@@ -45,7 +44,7 @@ class SVRG(Module):
     ## Examples:
     SVRG-LBFGS
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.SVRG(len(dataloader)),
         tz.m.LBFGS(),
@@ -55,7 +54,7 @@ class SVRG(Module):
     For extra variance reduction one can use Online versions of algorithms, although it won't always help.
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.SVRG(len(dataloader)),
         tz.m.Online(tz.m.LBFGS()),
@@ -64,7 +63,7 @@ class SVRG(Module):
     Variance reduction can also be applied to gradient estimators.
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.SPSA(),
         tz.m.SVRG(100),

torchzero/modules/weight_decay/reinit.py CHANGED Viewed

@@ -6,8 +6,8 @@ from ...core import Module
 from ...utils import NumberList, TensorList
-def _reset_except_self(optimizer, var, self: Module):
-    for m in optimizer.unrolled_modules:
+def _reset_except_self(objective, modules, self: Module):
+    for m in modules:
         if m is not self:
             m.reset()

torchzero/modules/weight_decay/weight_decay.py CHANGED Viewed

@@ -33,7 +33,7 @@ class WeightDecay(TensorTransform):
     Adam with non-decoupled weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.WeightDecay(1e-3),
         tz.m.Adam(),
@@ -44,7 +44,7 @@ class WeightDecay(TensorTransform):
     Adam with decoupled weight decay that still scales with learning rate
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.WeightDecay(1e-3),
@@ -54,7 +54,7 @@ class WeightDecay(TensorTransform):
     Adam with fully decoupled weight decay that doesn't scale with learning rate
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.LR(1e-3),
@@ -93,7 +93,7 @@ class RelativeWeightDecay(TensorTransform):
     Adam with non-decoupled relative weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.RelativeWeightDecay(1e-1),
         tz.m.Adam(),
@@ -103,7 +103,7 @@ class RelativeWeightDecay(TensorTransform):
     Adam with decoupled relative weight decay
     ```python
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Adam(),
         tz.m.RelativeWeightDecay(1e-1),

torchzero/modules/wrappers/optim_wrapper.py CHANGED Viewed

@@ -11,7 +11,7 @@ class Wrap(Module):
     Wraps a pytorch optimizer to use it as a module.
     Note:
-        Custom param groups are supported only by ``set_param_groups``, settings passed to Modular will be applied to all parameters.
+        Custom param groups are supported only by ``set_param_groups``, settings passed to Optimizer will be applied to all parameters.
     Args:
         opt_fn (Callable[..., torch.optim.Optimizer] | torch.optim.Optimizer):
@@ -21,7 +21,7 @@ class Wrap(Module):
         **kwargs:
             Extra args to be passed to opt_fn. The function is called as ``opt_fn(parameters, *args, **kwargs)``.
         use_param_groups:
-            Whether to pass settings passed to Modular to the wrapped optimizer.
+            Whether to pass settings passed to Optimizer to the wrapped optimizer.
             Note that settings to the first parameter are used for all parameters,
             so if you specified per-parameter settings, they will be ignored.
@@ -32,7 +32,7 @@ class Wrap(Module):
     ```python
     from pytorch_optimizer import StableAdamW
-    opt = tz.Modular(
+    opt = tz.Optimizer(
         model.parameters(),
         tz.m.Wrap(StableAdamW, lr=1),
         tz.m.Cautious(),
@@ -83,7 +83,7 @@ class Wrap(Module):
                 # settings passed in `set_param_groups` are the highest priority
                 # schedulers will override defaults but not settings passed in `set_param_groups`
-                # this is consistent with how Modular does it.
+                # this is consistent with how Optimizer does it.
                 if self._custom_param_groups is not None:
                     setting = {k:v for k,v in setting if k not in self._custom_param_groups[0]}

torchzero/modules/zeroth_order/cd.py CHANGED Viewed

@@ -29,7 +29,7 @@ class CD(Module):
             whether to use three points (three function evaluatins) to determine descent direction.
             if False, uses two points, but then ``adaptive`` can't be used. Defaults to True.
     """
-    def __init__(self, h:float=1e-3, grad:bool=True, adaptive:bool=True, index:Literal['cyclic', 'cyclic2', 'random']="cyclic2", threepoint:bool=True,):
+    def __init__(self, h:float=1e-3, grad:bool=False, adaptive:bool=True, index:Literal['cyclic', 'cyclic2', 'random']="cyclic2", threepoint:bool=True,):
         defaults = dict(h=h, grad=grad, adaptive=adaptive, index=index, threepoint=threepoint)
         super().__init__(defaults)

torchzero 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl