PyPI - torchzero - Versions diffs - 0.0.1__py3-none-any.whl - Mend

torchzero 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

torchzero/__init__.py +4 -0
torchzero/core/__init__.py +13 -0
torchzero/core/module.py +471 -0
torchzero/core/tensorlist_optimizer.py +219 -0
torchzero/modules/__init__.py +21 -0
torchzero/modules/adaptive/__init__.py +4 -0
torchzero/modules/adaptive/adaptive.py +192 -0
torchzero/modules/experimental/__init__.py +19 -0
torchzero/modules/experimental/experimental.py +294 -0
torchzero/modules/experimental/quad_interp.py +104 -0
torchzero/modules/experimental/subspace.py +259 -0
torchzero/modules/gradient_approximation/__init__.py +7 -0
torchzero/modules/gradient_approximation/_fd_formulas.py +3 -0
torchzero/modules/gradient_approximation/base_approximator.py +110 -0
torchzero/modules/gradient_approximation/fdm.py +125 -0
torchzero/modules/gradient_approximation/forward_gradient.py +163 -0
torchzero/modules/gradient_approximation/newton_fdm.py +198 -0
torchzero/modules/gradient_approximation/rfdm.py +125 -0
torchzero/modules/line_search/__init__.py +30 -0
torchzero/modules/line_search/armijo.py +56 -0
torchzero/modules/line_search/base_ls.py +139 -0
torchzero/modules/line_search/directional_newton.py +217 -0
torchzero/modules/line_search/grid_ls.py +158 -0
torchzero/modules/line_search/scipy_minimize_scalar.py +62 -0
torchzero/modules/meta/__init__.py +12 -0
torchzero/modules/meta/alternate.py +65 -0
torchzero/modules/meta/grafting.py +195 -0
torchzero/modules/meta/optimizer_wrapper.py +173 -0
torchzero/modules/meta/return_overrides.py +46 -0
torchzero/modules/misc/__init__.py +10 -0
torchzero/modules/misc/accumulate.py +43 -0
torchzero/modules/misc/basic.py +115 -0
torchzero/modules/misc/lr.py +96 -0
torchzero/modules/misc/multistep.py +51 -0
torchzero/modules/misc/on_increase.py +53 -0
torchzero/modules/momentum/__init__.py +4 -0
torchzero/modules/momentum/momentum.py +106 -0
torchzero/modules/operations/__init__.py +29 -0
torchzero/modules/operations/multi.py +298 -0
torchzero/modules/operations/reduction.py +134 -0
torchzero/modules/operations/singular.py +113 -0
torchzero/modules/optimizers/__init__.py +10 -0
torchzero/modules/optimizers/adagrad.py +49 -0
torchzero/modules/optimizers/adam.py +118 -0
torchzero/modules/optimizers/lion.py +28 -0
torchzero/modules/optimizers/rmsprop.py +51 -0
torchzero/modules/optimizers/rprop.py +99 -0
torchzero/modules/optimizers/sgd.py +54 -0
torchzero/modules/orthogonalization/__init__.py +2 -0
torchzero/modules/orthogonalization/newtonschulz.py +159 -0
torchzero/modules/orthogonalization/svd.py +86 -0
torchzero/modules/quasi_newton/__init__.py +4 -0
torchzero/modules/regularization/__init__.py +22 -0
torchzero/modules/regularization/dropout.py +34 -0
torchzero/modules/regularization/noise.py +77 -0
torchzero/modules/regularization/normalization.py +328 -0
torchzero/modules/regularization/ortho_grad.py +78 -0
torchzero/modules/regularization/weight_decay.py +92 -0
torchzero/modules/scheduling/__init__.py +2 -0
torchzero/modules/scheduling/lr_schedulers.py +131 -0
torchzero/modules/scheduling/step_size.py +80 -0
torchzero/modules/second_order/__init__.py +4 -0
torchzero/modules/second_order/newton.py +165 -0
torchzero/modules/smoothing/__init__.py +5 -0
torchzero/modules/smoothing/gaussian_smoothing.py +90 -0
torchzero/modules/smoothing/laplacian_smoothing.py +128 -0
torchzero/modules/weight_averaging/__init__.py +2 -0
torchzero/modules/weight_averaging/ema.py +72 -0
torchzero/modules/weight_averaging/swa.py +171 -0
torchzero/optim/__init__.py +10 -0
torchzero/optim/experimental/__init__.py +20 -0
torchzero/optim/experimental/experimental.py +343 -0
torchzero/optim/experimental/ray_search.py +83 -0
torchzero/optim/first_order/__init__.py +18 -0
torchzero/optim/first_order/cautious.py +158 -0
torchzero/optim/first_order/forward_gradient.py +70 -0
torchzero/optim/first_order/optimizers.py +570 -0
torchzero/optim/modular.py +132 -0
torchzero/optim/quasi_newton/__init__.py +1 -0
torchzero/optim/quasi_newton/directional_newton.py +58 -0
torchzero/optim/second_order/__init__.py +1 -0
torchzero/optim/second_order/newton.py +94 -0
torchzero/optim/wrappers/__init__.py +0 -0
torchzero/optim/wrappers/nevergrad.py +113 -0
torchzero/optim/wrappers/nlopt.py +165 -0
torchzero/optim/wrappers/scipy.py +439 -0
torchzero/optim/zeroth_order/__init__.py +4 -0
torchzero/optim/zeroth_order/fdm.py +87 -0
torchzero/optim/zeroth_order/newton_fdm.py +146 -0
torchzero/optim/zeroth_order/rfdm.py +217 -0
torchzero/optim/zeroth_order/rs.py +85 -0
torchzero/random/__init__.py +1 -0
torchzero/random/random.py +46 -0
torchzero/tensorlist.py +819 -0
torchzero/utils/__init__.py +0 -0
torchzero/utils/compile.py +39 -0
torchzero/utils/derivatives.py +99 -0
torchzero/utils/python_tools.py +25 -0
torchzero/utils/torch_tools.py +92 -0
torchzero-0.0.1.dist-info/LICENSE +21 -0
torchzero-0.0.1.dist-info/METADATA +118 -0
torchzero-0.0.1.dist-info/RECORD +104 -0
torchzero-0.0.1.dist-info/WHEEL +5 -0
torchzero-0.0.1.dist-info/top_level.txt +1 -0

torchzero/utils/__init__.py ADDED Viewed

File without changes

torchzero/utils/compile.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Experimental and I need to test this on Windows."""
+import warnings
+import functools
+import torch
+ENABLE_COMPILING = True
+def _try_compiling(warn=False):
+    def add(x,y): return x + y
+    compled_add = torch.compile(add)
+    try:
+        res = compled_add(torch.tensor(1.), torch.tensor(2.))
+    except Exception as e:
+        if warn: warnings.warn(f'Compiling failed so no further functions will be compiled:\n{e}')
+        return False
+    if res == 3: return True
+    return False
+class _Compiler:
+    def __init__(self, warn=False):
+        self.can_compile = None
+        self.warn = warn
+    def maybe_compile(self, fn, **kwargs):
+        if self.can_compile is None: self.can_compile = _try_compiling(self.warn)
+        if self.can_compile: return torch.compile(fn, **kwargs)
+        return fn
+_COMPILER = _Compiler(False)
+@functools.wraps(torch.compile)
+def maybe_compile(*args, **kwargs):
+    """Compiles a function if possible. Same usage as `torch.compile`.
+    On first try this will attempt to compile a simple test function. If that fails, all subsequent functions will not be compiled.
+    I need to actually test this on windows.
+    """
+    if ENABLE_COMPILING: return _COMPILER.maybe_compile(*args, **kwargs)
+    return args[0]

torchzero/utils/derivatives.py ADDED Viewed

@@ -0,0 +1,99 @@
+from collections.abc import Sequence, Iterable
+import torch
+def _jacobian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False):
+    flat_input = torch.cat([i.reshape(-1) for i in input])
+    grad_ouputs = torch.eye(len(flat_input), device=input[0].device, dtype=input[0].dtype)
+    jac = []
+    for i in range(flat_input.numel()):
+        jac.append(torch.autograd.grad(
+            flat_input,
+            wrt,
+            grad_ouputs[i],
+            retain_graph=True,
+            create_graph=create_graph,
+            allow_unused=True,
+            is_grads_batched=False,
+        ))
+    return [torch.stack(z) for z in zip(*jac)]
+def _jacobian_batched(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False):
+    flat_input = torch.cat([i.reshape(-1) for i in input])
+    return torch.autograd.grad(
+        flat_input,
+        wrt,
+        torch.eye(len(flat_input), device=input[0].device, dtype=input[0].dtype),
+        retain_graph=True,
+        create_graph=create_graph,
+        allow_unused=True,
+        is_grads_batched=True,
+    )
+def jacobian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True) -> Sequence[torch.Tensor]:
+    """Calculate jacobian of a sequence of tensors w.r.t another sequence of tensors.
+    Returns a sequence of tensors with the length as `wrt`.
+    Each tensor will have the shape `(*input.shape, *wrt[i].shape)`.
+    Args:
+        input (Sequence[torch.Tensor]): input sequence of tensors.
+        wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
+        create_graph (bool, optional):
+            pytorch option, if True, graph of the derivative will be constructed,
+            allowing to compute higher order derivative products. Default: False.
+        batched (bool, optional): use faster but experimental pytorch batched jacobian
+            This only has effect when `input` has more than 1 element. Defaults to True.
+    Returns:
+        sequence of tensors with the length as `wrt`.
+    """
+    if batched: return _jacobian_batched(input, wrt, create_graph)
+    return _jacobian(input, wrt, create_graph)
+def hessian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True):
+    """Calculate hessian of a sequence of tensors w.r.t another sequence of tensors.
+    Returns a sequence of tensors with the length as `wrt`.
+    If you need a hessian matrix out of that sequence, pass it to `hessian_list_to_mat`.
+    Args:
+        input (Sequence[torch.Tensor]): input sequence of tensors.
+        wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
+        create_graph (bool, optional):
+            pytorch option, if True, graph of the derivative will be constructed,
+            allowing to compute higher order derivative products. Default: False.
+        batched (bool, optional): use faster but experimental pytorch batched grad. Defaults to True.
+    Returns:
+        sequence of tensors with the length as `wrt`.
+    """
+    return jacobian(jacobian(input, wrt, create_graph=True, batched=batched), wrt, create_graph=create_graph, batched=batched)
+def jacobian_and_hessian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True):
+    """Calculate jacobian and hessian of a sequence of tensors w.r.t another sequence of tensors.
+    Calculating hessian requires calculating the jacobian. So this function is more efficient than
+    calling `jacobian` and `hessian` separately, which would calculate jacobian twice.
+    Args:
+        input (Sequence[torch.Tensor]): input sequence of tensors.
+        wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
+        create_graph (bool, optional):
+            pytorch option, if True, graph of the derivative will be constructed,
+            allowing to compute higher order derivative products. Default: False.
+        batched (bool, optional): use faster but experimental pytorch batched grad. Defaults to True.
+    Returns:
+        tuple with jacobians sequence and hessians sequence.
+    """
+    jac = jacobian(input, wrt, create_graph=True, batched = batched)
+    return jac, jacobian(jac, wrt, batched = batched, create_graph=create_graph)
+def jacobian_list_to_vec(jacobians: Iterable[torch.Tensor]):
+    """flattens and concatenates a sequence of tensors."""
+    return torch.cat([i.ravel() for i in jacobians], 0)
+def hessian_list_to_mat(hessians: Sequence[torch.Tensor]):
+    """takes output of `hessian` and returns the 2D hessian matrix.
+    Note - I only tested this for cases where input is a scalar."""
+    return torch.cat([h.reshape(h.size(0), h[1].numel()) for h in hessians], 1)

torchzero/utils/python_tools.py ADDED Viewed

@@ -0,0 +1,25 @@
+import functools
+import operator
+from typing import Any, TypeVar
+from collections.abc import Iterable
+import torch
+def _flatten_no_check(iterable: Iterable) -> list[Any]:
+    """Flatten an iterable of iterables, returns a flattened list. Note that if `iterable` is not Iterable, this will return `[iterable]`."""
+    if isinstance(iterable, Iterable):
+        return [a for i in iterable for a in _flatten_no_check(i)]
+    return [iterable]
+def flatten(iterable: Iterable) -> list[Any]:
+    """Flatten an iterable of iterables, returns a flattened list. If `iterable` is not iterable, raises a TypeError."""
+    if isinstance(iterable, Iterable): return [a for i in iterable for a in _flatten_no_check(i)]
+    raise TypeError(f'passed object is not an iterable, {type(iterable) = }')
+X = TypeVar("X")
+# def reduce_dim[X](x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
+def reduce_dim(x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
+    """Reduces one level of nesting. Takes an iterable of iterables of X, and returns an iterable of X."""
+    return functools.reduce(operator.iconcat, x, [])
+_ScalarLoss = int | float | bool | torch.Tensor

torchzero/utils/torch_tools.py ADDED Viewed

@@ -0,0 +1,92 @@
+import copyreg
+import weakref
+import torch
+import numpy as np
+def swap_tensors_no_use_count_check(t1, t2):
+    """
+    Copy of pytorch function with no use count check.
+    This function swaps the content of the two Tensor objects.
+    At a high level, this will make t1 have the content of t2 while preserving
+    its identity.
+    This will not work if t1 and t2 have different slots.
+    """
+    # Ensure there are no weakrefs
+    if weakref.getweakrefs(t1):
+        raise RuntimeError("Cannot swap t1 because it has weakref associated with it")
+    if weakref.getweakrefs(t2):
+        raise RuntimeError("Cannot swap t2 because it has weakref associated with it")
+    t1_slots = set(copyreg._slotnames(t1.__class__))  # type: ignore[attr-defined]
+    t2_slots = set(copyreg._slotnames(t2.__class__))  # type: ignore[attr-defined]
+    if t1_slots != t2_slots:
+        raise RuntimeError("Cannot swap t1 and t2 if they have different slots")
+    def swap_attr(name):
+        tmp = getattr(t1, name)
+        setattr(t1, name, (getattr(t2, name)))
+        setattr(t2, name, tmp)
+    # def error_pre_hook(grad_outputs):
+    #     raise RuntimeError(
+    #         "Trying to execute AccumulateGrad node that was poisoned by swap_tensors "
+    #         "this can happen when you try to run backward on a tensor that was swapped. "
+    #         "For a module m with `torch.__future__.set_swap_module_params_on_conversion(True)` "
+    #         "you should not change the device or dtype of the module (e.g. `m.cpu()` or `m.half()`) "
+    #         "between running forward and backward. To resolve this, please only change the "
+    #         "device/dtype before running forward (or after both forward and backward)."
+    #     )
+    # def check_use_count(t, name="t1"):
+    #     use_count = t._use_count()
+    #     error_str = (
+    #         f"Expected use_count of {name} to be 1 or 2 with an AccumulateGrad node but got {use_count} "
+    #         f"make sure you are not holding references to the tensor in other places."
+    #     )
+    #     if use_count > 1:
+    #         if use_count == 2 and t.is_leaf:
+    #             accum_grad_node = torch.autograd.graph.get_gradient_edge(t).node
+    #             # Make sure that the accumulate_grad node was not lazy_init-ed by get_gradient_edge
+    #             if t._use_count() == 2:
+    #                 accum_grad_node.register_prehook(error_pre_hook)
+    #             else:
+    #                 raise RuntimeError(error_str)
+    #         else:
+    #             raise RuntimeError(error_str)
+    # check_use_count(t1, "t1")
+    # check_use_count(t2, "t2")
+    # Swap the types
+    # Note that this will fail if there are mismatched slots
+    swap_attr("__class__")
+    # Swap the dynamic attributes
+    swap_attr("__dict__")
+    # Swap the slots
+    for slot in t1_slots:
+        if hasattr(t1, slot) and hasattr(t2, slot):
+            swap_attr(slot)
+        elif hasattr(t1, slot):
+            setattr(t2, slot, (getattr(t1, slot)))
+            delattr(t1, slot)
+        elif hasattr(t2, slot):
+            setattr(t1, slot, (getattr(t2, slot)))
+            delattr(t2, slot)
+    # Swap the at::Tensor they point to
+    torch._C._swap_tensor_impl(t1, t2)
+def totensor(x) -> torch.Tensor:
+    if isinstance(x, torch.Tensor): return x
+    if isinstance(x, np.ndarray): return torch.from_numpy(x)
+    return torch.from_numpy(np.asarray(x))
+def tofloat(x) -> float:
+    if isinstance(x, torch.Tensor): return x.detach().cpu().item()
+    if isinstance(x, np.ndarray): return x.item()
+    return float()

torchzero-0.0.1.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 inikishev
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

torchzero-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,118 @@
+Metadata-Version: 2.2
+Name: torchzero
+Version: 0.0.1
+Author-email: Ivan Nikishev <nkshv2@gmail.com>
+License: MIT License
+        Copyright (c) 2024 inikishev
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/inikishev/torchzero
+Project-URL: Repository, https://github.com/inikishev/torchzero
+Project-URL: Issues, https://github.com/inikishev/torchzero/isses
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch
+Requires-Dist: numpy
+Requires-Dist: typing_extensions
+![example workflow](https://github.com/inikishev/torchzero/actions/workflows/tests.yml/badge.svg)
+# torchzero
+This is a work-in-progress optimizers library for pytorch with composable zeroth, first, second order and quasi newton methods, gradient approximation, line searches and a whole lot of other stuff.
+Most optimizers are modular, meaning you can chain them like this:
+```py
+optimizer = torchzero.optim.Modular(model.parameters(), [*list of modules*])`
+```
+For example you might use `[ClipNorm(4), LR(1e-3), NesterovMomentum(0.9)]` for standard SGD with gradient clipping and nesterov momentum. Move `ClipNorm` to the end to clip the update instead of the gradients. If you don't have access to gradients, add a `RandomizedFDM()` at the beginning to approximate them via randomized finite differences. Add `Cautious()` to make the optimizer cautious.
+Each new module takes previous module update and works on it. That way there is no need to reimplement stuff like laplacian smoothing for all optimizers, and it is easy to experiment with grafting, interpolation between different optimizers, and perhaps some weirder combinations like nested momentum.
+# How to use
+All modules are defined in `torchzero.modules`. You can generally mix and match them however you want. Some pre-made optimizers are available in `torchzero.optim`.
+Some optimizers require closure, which should look like this:
+```py
+def closure(backward = True):
+  preds = model(inputs)
+  loss = loss_fn(preds, targets)
+  # if you can't call loss.backward(), and instead use gradient-free methods,
+  # they always call closure with backward=False.
+  # so you can remove the part below, but keep the unused backward argument.
+  if backward:
+    optimizer.zero_grad()
+    loss.backward()
+  return loss
+optimizer.step(closure)
+```
+This closure will also work with all built in pytorch optimizers, including LBFGS, all optimizers in this library, as well as most custom ones.
+# Contents
+Docs are available at [torchzero.readthedocs.io](https://torchzero.readthedocs.io/en/latest/). A preliminary list of all modules is available here <https://torchzero.readthedocs.io/en/latest/autoapi/torchzero/modules/index.html#classes>. Some of the implemented algorithms:
+- SGD/Rprop/RMSProp/AdaGrad/Adam as composable modules. They are also tested to exactly match built in pytorch versions.
+- Cautious Optimizers (<https://huggingface.co/papers/2411.16085>)
+- Optimizer grafting (<https://openreview.net/forum?id=FpKgG31Z_i9>)
+- Laplacian smoothing (<https://arxiv.org/abs/1806.06317>)
+- Polyak momentum, nesterov momentum
+- Gradient norm and value clipping, gradient normalization
+- Gradient centralization (<https://arxiv.org/abs/2004.01461>)
+- Learning rate droput (<https://pubmed.ncbi.nlm.nih.gov/35286266/>).
+- Forward gradient (<https://arxiv.org/abs/2202.08587>)
+- Gradient approximation via finite difference or randomized finite difference, which includes SPSA, RDSA, FDSA and Gaussian smoothing (<https://arxiv.org/abs/2211.13566v3>)
+- Various line searches
+- Exact Newton's method (with Levenberg-Marquardt regularization), newton with hessian approximation via finite difference, subspace finite differences newton.
+- Directional newton via one additional forward pass
+All modules should be quite fast, especially on models with many different parameters, due to `_foreach` operations.
+I am getting to the point where I can start focusing on good docs and tests. As of now, the code should be considered experimental, untested and subject to change, so feel free but be careful if using this for actual project.
+# Wrappers
+### scipy.optimize.minimize wrapper
+scipy.optimize.minimize wrapper with support for both gradient and hessian via batched autograd
+```py
+from torchzero.optim.wrappers.scipy import ScipyMinimize
+opt = ScipyMinimize(model.parameters(), method = 'trust-krylov')
+```
+Use as any other optimizer (make sure closure accepts `backward` argument like one from **How to use**). Note that it performs full minimization on each step.
+### Nevergrad wrapper
+```py
+opt = NevergradOptimizer(bench.parameters(), ng.optimizers.NGOptBase, budget = 1000)
+```
+Use as any other optimizer (make sure closure accepts `backward` argument like one from **How to use**).

torchzero-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,104 @@
+torchzero/__init__.py,sha256=CCIYfhGNYMnRP_cdXL7DgocxkEWYUZYgB3Sf1T5tdYY,203
+torchzero/tensorlist.py,sha256=5XvutWeZdyUcAVVkoEXbgSjvTiDe0f1806wsINfzlRg,41387
+torchzero/core/__init__.py,sha256=aw2p6Gt0qrtUM8x2msspAG8JHMaD2ma11sMhXm-rC90,267
+torchzero/core/module.py,sha256=msvh-e7cE3vArXb5PLrrsG3j7d-94GG3V4aBXYKFZTo,21254
+torchzero/core/tensorlist_optimizer.py,sha256=hIQIW7uBwddlJxXhR4-xnll0ixY9ZJY0i0oH-wqxfX0,9963
+torchzero/modules/__init__.py,sha256=5f8kt2mMn1eo9YcjXc3ESW-bqMQIRf646V3zlr8UAO4,571
+torchzero/modules/adaptive/__init__.py,sha256=YBVDXCosr4-C-GFCoreHS3DFyHiYMhCbOWgdhVVaZ_E,161
+torchzero/modules/adaptive/adaptive.py,sha256=msQkLlxqt3fFWMLGyr1Pi77bGPv-QAyfDfQ1Oipl5Yo,6473
+torchzero/modules/experimental/__init__.py,sha256=dRLPbhTO8efkRQHB-8Z29dKS3BEleCso1dsXxgccFgM,647
+torchzero/modules/experimental/experimental.py,sha256=Z7g3AhZ5udtjIF_3a5-GdvtMOSt_uMmyJF8AKshw9i0,9853
+torchzero/modules/experimental/quad_interp.py,sha256=iAP6r2uHp2BDsGNFYhpIHuv1l5z7ZXQZLZU1E-VO7eE,4117
+torchzero/modules/experimental/subspace.py,sha256=KgTIdzv75hlmPFs9nCMV0M3NWa7zcp9IK0OLFsl5UrU,11801
+torchzero/modules/gradient_approximation/__init__.py,sha256=q8rNkk2PK6Y9zk42Mq8pY2YF6fHt5QuJd7BK-RTFKpg,179
+torchzero/modules/gradient_approximation/_fd_formulas.py,sha256=mXqRwsDYjXi3pnI4mkpwwtJE85omYEvcmtbJAOfpg9o,82
+torchzero/modules/gradient_approximation/base_approximator.py,sha256=lNDrbGrsfhru7u40w2EDd5Xccv9oQxRaEDOLoVnBJdk,3975
+torchzero/modules/gradient_approximation/fdm.py,sha256=yBvWmkKJqVMd0TUSZcEr6_ulNTtlZbE7rl0uLI7BADY,4977
+torchzero/modules/gradient_approximation/forward_gradient.py,sha256=3CC-tcUIAL0d422WHUQLPx0UMcU5URQ5uYCNbgpi19M,6456
+torchzero/modules/gradient_approximation/newton_fdm.py,sha256=foEjnOUn-9Til6IP6x6E7-wVoAnKGMfbyk1hfmuRYcI,7116
+torchzero/modules/gradient_approximation/rfdm.py,sha256=iA2ydwkk9iRVPYcaECqUupprj6nrgmRKPVN0936gYf4,4967
+torchzero/modules/line_search/__init__.py,sha256=hYysFi5b9a5g0jcRNkgZYGRcZ1V7_JacBVWMR7idP38,1380
+torchzero/modules/line_search/armijo.py,sha256=2-tErIpO8p3XhY9uKrwGsaohEAN6h5tZ847_hXTPjxs,1966
+torchzero/modules/line_search/base_ls.py,sha256=uRHg6n9lcLrBrIAUZLuEiJuWaZDQR-rqNO0ZxZYGAXo,5330
+torchzero/modules/line_search/directional_newton.py,sha256=LLMjDu9nzy-WiqXb2Kzc4kRzhCoQOFc-1j_9hOyxt00,9168
+torchzero/modules/line_search/grid_ls.py,sha256=PLpi8R_KIc8xZ6IxJmeLgKPJQPSgd5M4T-pj33ykLnw,5614
+torchzero/modules/line_search/scipy_minimize_scalar.py,sha256=6JS603_sphNxj4Ji2Ia4gWcyqIM326MVGMHLWaQDXBA,2201
+torchzero/modules/meta/__init__.py,sha256=ARVR3Vzvq50n-3uFMNxcGUDd2r4Euamay5UYtpIxXNg,407
+torchzero/modules/meta/alternate.py,sha256=6LOYJI6_Q2X5MKAnFcymoJP8i4lcarhaPRp9Sm0eQS0,2124
+torchzero/modules/meta/grafting.py,sha256=tWazkxlqw2brkJjkaugasoh3XGaAsR4xOqyjQMEv5uQ,7583
+torchzero/modules/meta/optimizer_wrapper.py,sha256=cVe63uXMLLIbuj4wkSQyj0mBmKVx9SqSVzqsgcczbzA,6448
+torchzero/modules/meta/return_overrides.py,sha256=6bveA6P0jgNiWu-P2NumAjfrAtpOL_uoIHBljOu-aYs,2031
+torchzero/modules/misc/__init__.py,sha256=P43XRz1nnOuJbpq_bQboLJ7hip80SQmvhua2baPdJ-c,390
+torchzero/modules/misc/accumulate.py,sha256=qN0xJ-wnhH6pyr8OY5g0N-ObYJrPyiYgVjX11Sss10s,1330
+torchzero/modules/misc/basic.py,sha256=dWLMkj32bp4FN44wFrF5VP1_fyN7Xxb9FTVtWP_4EWw,3492
+torchzero/modules/misc/lr.py,sha256=V6W5AU9upjpSVpFtlXVar-zJ-qZAgY2Bts-ibeKf4bk,3525
+torchzero/modules/misc/multistep.py,sha256=L526iSNWg8UbXdulRT09r4qcOm6jHXi4v3Ho8PjkCPQ,1781
+torchzero/modules/misc/on_increase.py,sha256=XoMzB6VWOIKpujL030fpwQcVyW_QSls-ipCwjoveMF0,2012
+torchzero/modules/momentum/__init__.py,sha256=Cj_3KJ76RLX-WQ7xsOoLY9mucQvnkyudTeVH8fnvdwc,138
+torchzero/modules/momentum/momentum.py,sha256=Tywb6g0PNY4gIfXRHxEIYxgH56qoAnAtE5MzPAJh7VU,3935
+torchzero/modules/operations/__init__.py,sha256=4SxIQMh-ixEqEDXWdizeGOzfhFw-af4Wz0A-00ypmg0,378
+torchzero/modules/operations/multi.py,sha256=XValEBe90w1uXY94kX_DItWvjchmOrAfpUa6hsi0sxk,10317
+torchzero/modules/operations/reduction.py,sha256=uIRrqG2V-NOvljZVrJegjfjcCSQ4pI1rN9bQnZt-EW4,4652
+torchzero/modules/operations/singular.py,sha256=Y6ImnsUB3CJPpe_7UkT1vq9diGWXf7XKpuA7Ev7Hq2g,3569
+torchzero/modules/optimizers/__init__.py,sha256=QZu8yvqy7ouY45Xju5Z2oHWJiFa1CslknodhWWRZRms,247
+torchzero/modules/optimizers/adagrad.py,sha256=20r1ghs67NfCED0Z0xPZflen1gLaG3tOLiTg0WEYsNU,2015
+torchzero/modules/optimizers/adam.py,sha256=6Lq69rsyE_UI54z8T0HRyHqo0nXLwjCv35u6BsnpVSg,4722
+torchzero/modules/optimizers/lion.py,sha256=LkXedRYK_IxJ1Xebn9dzOOMOnozM_OXVD20_wqOIB2w,905
+torchzero/modules/optimizers/rmsprop.py,sha256=pqeaGxec-IY1i4Io5_iMZaef8nOKVZPVblVdibaWy40,2153
+torchzero/modules/optimizers/rprop.py,sha256=nvv-PPvEpGJs0imJNr0BPRV0X_bMrEY2TaUeF986sa0,3604
+torchzero/modules/optimizers/sgd.py,sha256=dCSQ1UwiycAR8-nSTLnk0G8sieiNAgUyChZsCwgvEOY,2246
+torchzero/modules/orthogonalization/__init__.py,sha256=brvrj57U_1qKKU8AUqbe9lyY9jsfzZvUGnvsU4wjDSQ,151
+torchzero/modules/orthogonalization/newtonschulz.py,sha256=IEqA_Udi2E5AbG2K_XZEz0O8GbGuKjNmQ1GVdEZP0xs,7520
+torchzero/modules/orthogonalization/svd.py,sha256=SsuPFjODwM7uJzOxXW8LbLAbU-4hlLBmp1Fh5xWDCW4,3652
+torchzero/modules/quasi_newton/__init__.py,sha256=G5lW-q0pI47SJ2AZuY4qkjbqfYzJS0qATDo7V0BGzD4,124
+torchzero/modules/regularization/__init__.py,sha256=FD_KERcYY4bdVR22OuKXUUVt63jyfE9V84evwDC1edo,498
+torchzero/modules/regularization/dropout.py,sha256=YlJmmYOVaYIoeQQW2z8kXZfRyXntZfg4tX0m6_w6JDo,1004
+torchzero/modules/regularization/noise.py,sha256=Z_BrotV5QE5HY4E6DhIpoSjsqejaCNm_n393euTtA3o,3014
+torchzero/modules/regularization/normalization.py,sha256=FE51Ww-aDXTQSJr-qj2YxBHRCOjluZC7TqCmXF3Xagc,12142
+torchzero/modules/regularization/ortho_grad.py,sha256=DnUYXAc8VCMSOS5NoZSf1XrU9TStdyt8QpU8bhBzYqE,3127
+torchzero/modules/regularization/weight_decay.py,sha256=4QeTpTra52MLyTrgCSaeaB8JxN-l8gVDq4JIwNoL41k,3625
+torchzero/modules/scheduling/__init__.py,sha256=NxR1cpKXtZSbVqPRlzzzgH3_JBMuxQCf3nUhmxBN2Cc,89
+torchzero/modules/scheduling/lr_schedulers.py,sha256=8zEK_wtE9IqnhHtS3FYNdh_f83q8V90YqLa1zWVzEW4,4965
+torchzero/modules/scheduling/step_size.py,sha256=UOE2ZIcVTX7FHlO8BUqtMy31_jmOKQMpgkkc-WgLfZs,3674
+torchzero/modules/second_order/__init__.py,sha256=oRyRy8mCjurMINHNdsxjlptLbQNU0VnstkDm1Ccv_80,182
+torchzero/modules/second_order/newton.py,sha256=RPn0kHg6ZCAZLQLFW82eQAh7B1-U6d70xTb-CHbJLUs,6765
+torchzero/modules/smoothing/__init__.py,sha256=-mxey48zc72aGV0dv4TLHeFpf98QZjlxMu5Pct1LI_Y,195
+torchzero/modules/smoothing/gaussian_smoothing.py,sha256=9oxVMv--B0ESzOrhEaqQQeTWaVrSIqJXcU77VaRB2KE,3835
+torchzero/modules/smoothing/laplacian_smoothing.py,sha256=TXy2IgVqZehH97PQWn655mK7nDlNEr3EeeCkKEVT0tA,5553
+torchzero/modules/weight_averaging/__init__.py,sha256=nJJRs68AV2G4rGwiiHNRfm6XmtM-xUev1pCtzNIVfa8,66
+torchzero/modules/weight_averaging/ema.py,sha256=tun6TNOMQWeAZyvkbJEDLf3tGgvJPhhWAAA5ScBsT08,2857
+torchzero/modules/weight_averaging/swa.py,sha256=A4nRGQyMnZ2CaOW20iVfAs_iqV3lnULt7t--mjs9-TY,6772
+torchzero/optim/__init__.py,sha256=vk6pIYJHWAGYJMdtJ1otsmVph-pdL5HwBg-CTeBCGso,253
+torchzero/optim/modular.py,sha256=B1ypLnbGY87nUdylPcbukdNoXvKa5GHCl-_14XRqLWs,6066
+torchzero/optim/experimental/__init__.py,sha256=RqNzJu5mVl3T0u7cf4TBzSiA20M1kxTZVYWjSVhEHuU,585
+torchzero/optim/experimental/experimental.py,sha256=tMHZVbEXm3s6mMr7unFSvk_Jks3uAaAG0fzsH6gr098,10928
+torchzero/optim/experimental/ray_search.py,sha256=GYyssL64D6RiImrZ2tchoZJ04x9rX-Bp1y2nQXEGxX0,2662
+torchzero/optim/first_order/__init__.py,sha256=CRT4farcwi8sO1qqDGxXv1856zOwuKlJKBIAIvpL2Z0,336
+torchzero/optim/first_order/cautious.py,sha256=XBeqrLQ4gFKVUYnJI5ROmF9wQJGhY90HR6UG5IS7vYk,6610
+torchzero/optim/first_order/forward_gradient.py,sha256=EM6W8MezS6iUtW36lxozdo2U4aqlDKE7Zf-0s1LACXQ,3066
+torchzero/optim/first_order/optimizers.py,sha256=jYmU6YDsYRGMRsCNkYc6AlvOf3wlU7Uv1xUrzl0o8zo,24501
+torchzero/optim/quasi_newton/__init__.py,sha256=0X83dl-85_j3ck8itWxJR49ZbFeOcWurW6FI8J12F1w,49
+torchzero/optim/quasi_newton/directional_newton.py,sha256=oZ-If8SRcFXTDFKS_zlAcJnif-v5dTCR9HXqmfvsvNA,2595
+torchzero/optim/second_order/__init__.py,sha256=3Gt0dR4NzBK07TV0NF8KZImUGHbI8E2zncDmhIC377I,31
+torchzero/optim/second_order/newton.py,sha256=-DqJrS8JPea6Y9jp5lDV14KyP8A24YKiPxJ32Vsfiv4,3848
+torchzero/optim/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+torchzero/optim/wrappers/nevergrad.py,sha256=4PLqfs2L9XJhveyX6l7kJu1cIPl6uv7_UD76amIlP7I,4733
+torchzero/optim/wrappers/nlopt.py,sha256=fGDOZ82sRI2VLH3hKIAhZY4EuKSdu_g217c-NZvD_rs,7104
+torchzero/optim/wrappers/scipy.py,sha256=_BQwFDQ7SBqIA5i1SJ29Xj0jDXVV8MQ_9RcsPT3U6VQ,18047
+torchzero/optim/zeroth_order/__init__.py,sha256=_6T0znO6V63Niq7DMhJPgUuMc_nPvAGxjCjMdf-r64U,218
+torchzero/optim/zeroth_order/fdm.py,sha256=5iJc_F_tRR4cGQfy2Jr8PmAnCGrPva89ZWczSdcBkFk,3686
+torchzero/optim/zeroth_order/newton_fdm.py,sha256=-5E1FGzeJMr8_IougzE_FEOPFt9pEjQxID4Y89Hpmh0,6537
+torchzero/optim/zeroth_order/rfdm.py,sha256=_Y7yiF1bsVRlXt5IK-3zQccwVl95JF0-Xw-fl8Q_7y4,10529
+torchzero/optim/zeroth_order/rs.py,sha256=3w2nnPGWPecourEdUG583vchcqdNxC6Q_PBL3l0PvCk,3333
+torchzero/random/__init__.py,sha256=8EowQhC4yTZuF8w1ZDl73YZtLej8SuhxCk1Bkifbkms,93
+torchzero/random/random.py,sha256=Oq4GvM_6AOsabg5ke6b8h51V9npyHVxp8ob_wC5D-Aw,2829
+torchzero/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+torchzero/utils/compile.py,sha256=pYEyX8P26iCb_hFqAXC8IP2SSQrRfC7ZDhXS0vVCsfY,1257
+torchzero/utils/derivatives.py,sha256=koLmuUcVcX41SrH_9rvfJyMXyHyocNLuZ-C8Kr2B7hk,4844
+torchzero/utils/python_tools.py,sha256=kkyDhoP695HhapfKrdjcrRbRAbcvB0ArP1pkxuVUlf0,1192
+torchzero/utils/torch_tools.py,sha256=sSBY5Bmk9LOAgPtaq-6TK4wDgPXsg6FIWxv8CVDx82k,3580
+torchzero-0.0.1.dist-info/LICENSE,sha256=r9ZciAoZoqKC_FNADE0ORukj1p1XhLXEbegdsAyqhJs,1087
+torchzero-0.0.1.dist-info/METADATA,sha256=WKhVATagGeS8DLjdTEc8nyxBfOO_MswXKjL5c49joVw,5963
+torchzero-0.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+torchzero-0.0.1.dist-info/top_level.txt,sha256=isztuDR1ZGo8p2tORLa-vNuomcbLj7Xd208lhd-pVPs,10
+torchzero-0.0.1.dist-info/RECORD,,

torchzero-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

torchzero-0.0.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ torchzero