torchzero 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. torchzero/__init__.py +4 -0
  2. torchzero/core/__init__.py +13 -0
  3. torchzero/core/module.py +471 -0
  4. torchzero/core/tensorlist_optimizer.py +219 -0
  5. torchzero/modules/__init__.py +21 -0
  6. torchzero/modules/adaptive/__init__.py +4 -0
  7. torchzero/modules/adaptive/adaptive.py +192 -0
  8. torchzero/modules/experimental/__init__.py +19 -0
  9. torchzero/modules/experimental/experimental.py +294 -0
  10. torchzero/modules/experimental/quad_interp.py +104 -0
  11. torchzero/modules/experimental/subspace.py +259 -0
  12. torchzero/modules/gradient_approximation/__init__.py +7 -0
  13. torchzero/modules/gradient_approximation/_fd_formulas.py +3 -0
  14. torchzero/modules/gradient_approximation/base_approximator.py +110 -0
  15. torchzero/modules/gradient_approximation/fdm.py +125 -0
  16. torchzero/modules/gradient_approximation/forward_gradient.py +163 -0
  17. torchzero/modules/gradient_approximation/newton_fdm.py +198 -0
  18. torchzero/modules/gradient_approximation/rfdm.py +125 -0
  19. torchzero/modules/line_search/__init__.py +30 -0
  20. torchzero/modules/line_search/armijo.py +56 -0
  21. torchzero/modules/line_search/base_ls.py +139 -0
  22. torchzero/modules/line_search/directional_newton.py +217 -0
  23. torchzero/modules/line_search/grid_ls.py +158 -0
  24. torchzero/modules/line_search/scipy_minimize_scalar.py +62 -0
  25. torchzero/modules/meta/__init__.py +12 -0
  26. torchzero/modules/meta/alternate.py +65 -0
  27. torchzero/modules/meta/grafting.py +195 -0
  28. torchzero/modules/meta/optimizer_wrapper.py +173 -0
  29. torchzero/modules/meta/return_overrides.py +46 -0
  30. torchzero/modules/misc/__init__.py +10 -0
  31. torchzero/modules/misc/accumulate.py +43 -0
  32. torchzero/modules/misc/basic.py +115 -0
  33. torchzero/modules/misc/lr.py +96 -0
  34. torchzero/modules/misc/multistep.py +51 -0
  35. torchzero/modules/misc/on_increase.py +53 -0
  36. torchzero/modules/momentum/__init__.py +4 -0
  37. torchzero/modules/momentum/momentum.py +106 -0
  38. torchzero/modules/operations/__init__.py +29 -0
  39. torchzero/modules/operations/multi.py +298 -0
  40. torchzero/modules/operations/reduction.py +134 -0
  41. torchzero/modules/operations/singular.py +113 -0
  42. torchzero/modules/optimizers/__init__.py +10 -0
  43. torchzero/modules/optimizers/adagrad.py +49 -0
  44. torchzero/modules/optimizers/adam.py +118 -0
  45. torchzero/modules/optimizers/lion.py +28 -0
  46. torchzero/modules/optimizers/rmsprop.py +51 -0
  47. torchzero/modules/optimizers/rprop.py +99 -0
  48. torchzero/modules/optimizers/sgd.py +54 -0
  49. torchzero/modules/orthogonalization/__init__.py +2 -0
  50. torchzero/modules/orthogonalization/newtonschulz.py +159 -0
  51. torchzero/modules/orthogonalization/svd.py +86 -0
  52. torchzero/modules/quasi_newton/__init__.py +4 -0
  53. torchzero/modules/regularization/__init__.py +22 -0
  54. torchzero/modules/regularization/dropout.py +34 -0
  55. torchzero/modules/regularization/noise.py +77 -0
  56. torchzero/modules/regularization/normalization.py +328 -0
  57. torchzero/modules/regularization/ortho_grad.py +78 -0
  58. torchzero/modules/regularization/weight_decay.py +92 -0
  59. torchzero/modules/scheduling/__init__.py +2 -0
  60. torchzero/modules/scheduling/lr_schedulers.py +131 -0
  61. torchzero/modules/scheduling/step_size.py +80 -0
  62. torchzero/modules/second_order/__init__.py +4 -0
  63. torchzero/modules/second_order/newton.py +165 -0
  64. torchzero/modules/smoothing/__init__.py +5 -0
  65. torchzero/modules/smoothing/gaussian_smoothing.py +90 -0
  66. torchzero/modules/smoothing/laplacian_smoothing.py +128 -0
  67. torchzero/modules/weight_averaging/__init__.py +2 -0
  68. torchzero/modules/weight_averaging/ema.py +72 -0
  69. torchzero/modules/weight_averaging/swa.py +171 -0
  70. torchzero/optim/__init__.py +10 -0
  71. torchzero/optim/experimental/__init__.py +20 -0
  72. torchzero/optim/experimental/experimental.py +343 -0
  73. torchzero/optim/experimental/ray_search.py +83 -0
  74. torchzero/optim/first_order/__init__.py +18 -0
  75. torchzero/optim/first_order/cautious.py +158 -0
  76. torchzero/optim/first_order/forward_gradient.py +70 -0
  77. torchzero/optim/first_order/optimizers.py +570 -0
  78. torchzero/optim/modular.py +132 -0
  79. torchzero/optim/quasi_newton/__init__.py +1 -0
  80. torchzero/optim/quasi_newton/directional_newton.py +58 -0
  81. torchzero/optim/second_order/__init__.py +1 -0
  82. torchzero/optim/second_order/newton.py +94 -0
  83. torchzero/optim/wrappers/__init__.py +0 -0
  84. torchzero/optim/wrappers/nevergrad.py +113 -0
  85. torchzero/optim/wrappers/nlopt.py +165 -0
  86. torchzero/optim/wrappers/scipy.py +439 -0
  87. torchzero/optim/zeroth_order/__init__.py +4 -0
  88. torchzero/optim/zeroth_order/fdm.py +87 -0
  89. torchzero/optim/zeroth_order/newton_fdm.py +146 -0
  90. torchzero/optim/zeroth_order/rfdm.py +217 -0
  91. torchzero/optim/zeroth_order/rs.py +85 -0
  92. torchzero/random/__init__.py +1 -0
  93. torchzero/random/random.py +46 -0
  94. torchzero/tensorlist.py +819 -0
  95. torchzero/utils/__init__.py +0 -0
  96. torchzero/utils/compile.py +39 -0
  97. torchzero/utils/derivatives.py +99 -0
  98. torchzero/utils/python_tools.py +25 -0
  99. torchzero/utils/torch_tools.py +92 -0
  100. torchzero-0.0.1.dist-info/LICENSE +21 -0
  101. torchzero-0.0.1.dist-info/METADATA +118 -0
  102. torchzero-0.0.1.dist-info/RECORD +104 -0
  103. torchzero-0.0.1.dist-info/WHEEL +5 -0
  104. torchzero-0.0.1.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,39 @@
1
+ """Experimental and I need to test this on Windows."""
2
+ import warnings
3
+ import functools
4
+ import torch
5
+
6
+ ENABLE_COMPILING = True
7
+
8
+ def _try_compiling(warn=False):
9
+ def add(x,y): return x + y
10
+ compled_add = torch.compile(add)
11
+ try:
12
+ res = compled_add(torch.tensor(1.), torch.tensor(2.))
13
+ except Exception as e:
14
+ if warn: warnings.warn(f'Compiling failed so no further functions will be compiled:\n{e}')
15
+ return False
16
+ if res == 3: return True
17
+ return False
18
+
19
+ class _Compiler:
20
+ def __init__(self, warn=False):
21
+ self.can_compile = None
22
+ self.warn = warn
23
+
24
+ def maybe_compile(self, fn, **kwargs):
25
+ if self.can_compile is None: self.can_compile = _try_compiling(self.warn)
26
+ if self.can_compile: return torch.compile(fn, **kwargs)
27
+ return fn
28
+
29
+ _COMPILER = _Compiler(False)
30
+
31
+ @functools.wraps(torch.compile)
32
+ def maybe_compile(*args, **kwargs):
33
+ """Compiles a function if possible. Same usage as `torch.compile`.
34
+
35
+ On first try this will attempt to compile a simple test function. If that fails, all subsequent functions will not be compiled.
36
+ I need to actually test this on windows.
37
+ """
38
+ if ENABLE_COMPILING: return _COMPILER.maybe_compile(*args, **kwargs)
39
+ return args[0]
@@ -0,0 +1,99 @@
1
+ from collections.abc import Sequence, Iterable
2
+
3
+ import torch
4
+
5
+ def _jacobian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False):
6
+ flat_input = torch.cat([i.reshape(-1) for i in input])
7
+ grad_ouputs = torch.eye(len(flat_input), device=input[0].device, dtype=input[0].dtype)
8
+ jac = []
9
+ for i in range(flat_input.numel()):
10
+ jac.append(torch.autograd.grad(
11
+ flat_input,
12
+ wrt,
13
+ grad_ouputs[i],
14
+ retain_graph=True,
15
+ create_graph=create_graph,
16
+ allow_unused=True,
17
+ is_grads_batched=False,
18
+ ))
19
+ return [torch.stack(z) for z in zip(*jac)]
20
+
21
+
22
+
23
+ def _jacobian_batched(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False):
24
+ flat_input = torch.cat([i.reshape(-1) for i in input])
25
+ return torch.autograd.grad(
26
+ flat_input,
27
+ wrt,
28
+ torch.eye(len(flat_input), device=input[0].device, dtype=input[0].dtype),
29
+ retain_graph=True,
30
+ create_graph=create_graph,
31
+ allow_unused=True,
32
+ is_grads_batched=True,
33
+ )
34
+
35
+ def jacobian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True) -> Sequence[torch.Tensor]:
36
+ """Calculate jacobian of a sequence of tensors w.r.t another sequence of tensors.
37
+ Returns a sequence of tensors with the length as `wrt`.
38
+ Each tensor will have the shape `(*input.shape, *wrt[i].shape)`.
39
+
40
+ Args:
41
+ input (Sequence[torch.Tensor]): input sequence of tensors.
42
+ wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
43
+ create_graph (bool, optional):
44
+ pytorch option, if True, graph of the derivative will be constructed,
45
+ allowing to compute higher order derivative products. Default: False.
46
+ batched (bool, optional): use faster but experimental pytorch batched jacobian
47
+ This only has effect when `input` has more than 1 element. Defaults to True.
48
+
49
+ Returns:
50
+ sequence of tensors with the length as `wrt`.
51
+ """
52
+ if batched: return _jacobian_batched(input, wrt, create_graph)
53
+ return _jacobian(input, wrt, create_graph)
54
+
55
+ def hessian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True):
56
+ """Calculate hessian of a sequence of tensors w.r.t another sequence of tensors.
57
+ Returns a sequence of tensors with the length as `wrt`.
58
+ If you need a hessian matrix out of that sequence, pass it to `hessian_list_to_mat`.
59
+
60
+ Args:
61
+ input (Sequence[torch.Tensor]): input sequence of tensors.
62
+ wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
63
+ create_graph (bool, optional):
64
+ pytorch option, if True, graph of the derivative will be constructed,
65
+ allowing to compute higher order derivative products. Default: False.
66
+ batched (bool, optional): use faster but experimental pytorch batched grad. Defaults to True.
67
+
68
+ Returns:
69
+ sequence of tensors with the length as `wrt`.
70
+ """
71
+ return jacobian(jacobian(input, wrt, create_graph=True, batched=batched), wrt, create_graph=create_graph, batched=batched)
72
+
73
+ def jacobian_and_hessian(input: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True):
74
+ """Calculate jacobian and hessian of a sequence of tensors w.r.t another sequence of tensors.
75
+ Calculating hessian requires calculating the jacobian. So this function is more efficient than
76
+ calling `jacobian` and `hessian` separately, which would calculate jacobian twice.
77
+
78
+ Args:
79
+ input (Sequence[torch.Tensor]): input sequence of tensors.
80
+ wrt (Sequence[torch.Tensor]): sequence of tensors to differentiate w.r.t.
81
+ create_graph (bool, optional):
82
+ pytorch option, if True, graph of the derivative will be constructed,
83
+ allowing to compute higher order derivative products. Default: False.
84
+ batched (bool, optional): use faster but experimental pytorch batched grad. Defaults to True.
85
+
86
+ Returns:
87
+ tuple with jacobians sequence and hessians sequence.
88
+ """
89
+ jac = jacobian(input, wrt, create_graph=True, batched = batched)
90
+ return jac, jacobian(jac, wrt, batched = batched, create_graph=create_graph)
91
+
92
+ def jacobian_list_to_vec(jacobians: Iterable[torch.Tensor]):
93
+ """flattens and concatenates a sequence of tensors."""
94
+ return torch.cat([i.ravel() for i in jacobians], 0)
95
+
96
+ def hessian_list_to_mat(hessians: Sequence[torch.Tensor]):
97
+ """takes output of `hessian` and returns the 2D hessian matrix.
98
+ Note - I only tested this for cases where input is a scalar."""
99
+ return torch.cat([h.reshape(h.size(0), h[1].numel()) for h in hessians], 1)
@@ -0,0 +1,25 @@
1
+ import functools
2
+ import operator
3
+ from typing import Any, TypeVar
4
+ from collections.abc import Iterable
5
+
6
+ import torch
7
+
8
+ def _flatten_no_check(iterable: Iterable) -> list[Any]:
9
+ """Flatten an iterable of iterables, returns a flattened list. Note that if `iterable` is not Iterable, this will return `[iterable]`."""
10
+ if isinstance(iterable, Iterable):
11
+ return [a for i in iterable for a in _flatten_no_check(i)]
12
+ return [iterable]
13
+
14
+ def flatten(iterable: Iterable) -> list[Any]:
15
+ """Flatten an iterable of iterables, returns a flattened list. If `iterable` is not iterable, raises a TypeError."""
16
+ if isinstance(iterable, Iterable): return [a for i in iterable for a in _flatten_no_check(i)]
17
+ raise TypeError(f'passed object is not an iterable, {type(iterable) = }')
18
+
19
+ X = TypeVar("X")
20
+ # def reduce_dim[X](x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
21
+ def reduce_dim(x:Iterable[Iterable[X]]) -> list[X]: # pylint:disable=E0602
22
+ """Reduces one level of nesting. Takes an iterable of iterables of X, and returns an iterable of X."""
23
+ return functools.reduce(operator.iconcat, x, [])
24
+
25
+ _ScalarLoss = int | float | bool | torch.Tensor
@@ -0,0 +1,92 @@
1
+ import copyreg
2
+ import weakref
3
+
4
+ import torch
5
+ import numpy as np
6
+
7
+ def swap_tensors_no_use_count_check(t1, t2):
8
+ """
9
+ Copy of pytorch function with no use count check.
10
+
11
+ This function swaps the content of the two Tensor objects.
12
+ At a high level, this will make t1 have the content of t2 while preserving
13
+ its identity.
14
+
15
+ This will not work if t1 and t2 have different slots.
16
+ """
17
+ # Ensure there are no weakrefs
18
+ if weakref.getweakrefs(t1):
19
+ raise RuntimeError("Cannot swap t1 because it has weakref associated with it")
20
+ if weakref.getweakrefs(t2):
21
+ raise RuntimeError("Cannot swap t2 because it has weakref associated with it")
22
+ t1_slots = set(copyreg._slotnames(t1.__class__)) # type: ignore[attr-defined]
23
+ t2_slots = set(copyreg._slotnames(t2.__class__)) # type: ignore[attr-defined]
24
+ if t1_slots != t2_slots:
25
+ raise RuntimeError("Cannot swap t1 and t2 if they have different slots")
26
+
27
+ def swap_attr(name):
28
+ tmp = getattr(t1, name)
29
+ setattr(t1, name, (getattr(t2, name)))
30
+ setattr(t2, name, tmp)
31
+
32
+ # def error_pre_hook(grad_outputs):
33
+ # raise RuntimeError(
34
+ # "Trying to execute AccumulateGrad node that was poisoned by swap_tensors "
35
+ # "this can happen when you try to run backward on a tensor that was swapped. "
36
+ # "For a module m with `torch.__future__.set_swap_module_params_on_conversion(True)` "
37
+ # "you should not change the device or dtype of the module (e.g. `m.cpu()` or `m.half()`) "
38
+ # "between running forward and backward. To resolve this, please only change the "
39
+ # "device/dtype before running forward (or after both forward and backward)."
40
+ # )
41
+
42
+ # def check_use_count(t, name="t1"):
43
+ # use_count = t._use_count()
44
+ # error_str = (
45
+ # f"Expected use_count of {name} to be 1 or 2 with an AccumulateGrad node but got {use_count} "
46
+ # f"make sure you are not holding references to the tensor in other places."
47
+ # )
48
+ # if use_count > 1:
49
+ # if use_count == 2 and t.is_leaf:
50
+ # accum_grad_node = torch.autograd.graph.get_gradient_edge(t).node
51
+ # # Make sure that the accumulate_grad node was not lazy_init-ed by get_gradient_edge
52
+ # if t._use_count() == 2:
53
+ # accum_grad_node.register_prehook(error_pre_hook)
54
+ # else:
55
+ # raise RuntimeError(error_str)
56
+ # else:
57
+ # raise RuntimeError(error_str)
58
+
59
+ # check_use_count(t1, "t1")
60
+ # check_use_count(t2, "t2")
61
+
62
+ # Swap the types
63
+ # Note that this will fail if there are mismatched slots
64
+ swap_attr("__class__")
65
+
66
+ # Swap the dynamic attributes
67
+ swap_attr("__dict__")
68
+
69
+ # Swap the slots
70
+ for slot in t1_slots:
71
+ if hasattr(t1, slot) and hasattr(t2, slot):
72
+ swap_attr(slot)
73
+ elif hasattr(t1, slot):
74
+ setattr(t2, slot, (getattr(t1, slot)))
75
+ delattr(t1, slot)
76
+ elif hasattr(t2, slot):
77
+ setattr(t1, slot, (getattr(t2, slot)))
78
+ delattr(t2, slot)
79
+
80
+ # Swap the at::Tensor they point to
81
+ torch._C._swap_tensor_impl(t1, t2)
82
+
83
+
84
+ def totensor(x) -> torch.Tensor:
85
+ if isinstance(x, torch.Tensor): return x
86
+ if isinstance(x, np.ndarray): return torch.from_numpy(x)
87
+ return torch.from_numpy(np.asarray(x))
88
+
89
+ def tofloat(x) -> float:
90
+ if isinstance(x, torch.Tensor): return x.detach().cpu().item()
91
+ if isinstance(x, np.ndarray): return x.item()
92
+ return float()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 inikishev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.2
2
+ Name: torchzero
3
+ Version: 0.0.1
4
+ Author-email: Ivan Nikishev <nkshv2@gmail.com>
5
+ License: MIT License
6
+
7
+ Copyright (c) 2024 inikishev
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://github.com/inikishev/torchzero
28
+ Project-URL: Repository, https://github.com/inikishev/torchzero
29
+ Project-URL: Issues, https://github.com/inikishev/torchzero/isses
30
+ Requires-Python: >=3.10
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: torch
34
+ Requires-Dist: numpy
35
+ Requires-Dist: typing_extensions
36
+
37
+ ![example workflow](https://github.com/inikishev/torchzero/actions/workflows/tests.yml/badge.svg)
38
+
39
+ # torchzero
40
+
41
+ This is a work-in-progress optimizers library for pytorch with composable zeroth, first, second order and quasi newton methods, gradient approximation, line searches and a whole lot of other stuff.
42
+
43
+ Most optimizers are modular, meaning you can chain them like this:
44
+
45
+ ```py
46
+ optimizer = torchzero.optim.Modular(model.parameters(), [*list of modules*])`
47
+ ```
48
+
49
+ For example you might use `[ClipNorm(4), LR(1e-3), NesterovMomentum(0.9)]` for standard SGD with gradient clipping and nesterov momentum. Move `ClipNorm` to the end to clip the update instead of the gradients. If you don't have access to gradients, add a `RandomizedFDM()` at the beginning to approximate them via randomized finite differences. Add `Cautious()` to make the optimizer cautious.
50
+
51
+ Each new module takes previous module update and works on it. That way there is no need to reimplement stuff like laplacian smoothing for all optimizers, and it is easy to experiment with grafting, interpolation between different optimizers, and perhaps some weirder combinations like nested momentum.
52
+
53
+ # How to use
54
+
55
+ All modules are defined in `torchzero.modules`. You can generally mix and match them however you want. Some pre-made optimizers are available in `torchzero.optim`.
56
+
57
+ Some optimizers require closure, which should look like this:
58
+
59
+ ```py
60
+ def closure(backward = True):
61
+ preds = model(inputs)
62
+ loss = loss_fn(preds, targets)
63
+
64
+ # if you can't call loss.backward(), and instead use gradient-free methods,
65
+ # they always call closure with backward=False.
66
+ # so you can remove the part below, but keep the unused backward argument.
67
+ if backward:
68
+ optimizer.zero_grad()
69
+ loss.backward()
70
+ return loss
71
+
72
+ optimizer.step(closure)
73
+ ```
74
+
75
+ This closure will also work with all built in pytorch optimizers, including LBFGS, all optimizers in this library, as well as most custom ones.
76
+
77
+ # Contents
78
+
79
+ Docs are available at [torchzero.readthedocs.io](https://torchzero.readthedocs.io/en/latest/). A preliminary list of all modules is available here <https://torchzero.readthedocs.io/en/latest/autoapi/torchzero/modules/index.html#classes>. Some of the implemented algorithms:
80
+
81
+ - SGD/Rprop/RMSProp/AdaGrad/Adam as composable modules. They are also tested to exactly match built in pytorch versions.
82
+ - Cautious Optimizers (<https://huggingface.co/papers/2411.16085>)
83
+ - Optimizer grafting (<https://openreview.net/forum?id=FpKgG31Z_i9>)
84
+ - Laplacian smoothing (<https://arxiv.org/abs/1806.06317>)
85
+ - Polyak momentum, nesterov momentum
86
+ - Gradient norm and value clipping, gradient normalization
87
+ - Gradient centralization (<https://arxiv.org/abs/2004.01461>)
88
+ - Learning rate droput (<https://pubmed.ncbi.nlm.nih.gov/35286266/>).
89
+ - Forward gradient (<https://arxiv.org/abs/2202.08587>)
90
+ - Gradient approximation via finite difference or randomized finite difference, which includes SPSA, RDSA, FDSA and Gaussian smoothing (<https://arxiv.org/abs/2211.13566v3>)
91
+ - Various line searches
92
+ - Exact Newton's method (with Levenberg-Marquardt regularization), newton with hessian approximation via finite difference, subspace finite differences newton.
93
+ - Directional newton via one additional forward pass
94
+
95
+ All modules should be quite fast, especially on models with many different parameters, due to `_foreach` operations.
96
+
97
+ I am getting to the point where I can start focusing on good docs and tests. As of now, the code should be considered experimental, untested and subject to change, so feel free but be careful if using this for actual project.
98
+
99
+ # Wrappers
100
+
101
+ ### scipy.optimize.minimize wrapper
102
+
103
+ scipy.optimize.minimize wrapper with support for both gradient and hessian via batched autograd
104
+
105
+ ```py
106
+ from torchzero.optim.wrappers.scipy import ScipyMinimize
107
+ opt = ScipyMinimize(model.parameters(), method = 'trust-krylov')
108
+ ```
109
+
110
+ Use as any other optimizer (make sure closure accepts `backward` argument like one from **How to use**). Note that it performs full minimization on each step.
111
+
112
+ ### Nevergrad wrapper
113
+
114
+ ```py
115
+ opt = NevergradOptimizer(bench.parameters(), ng.optimizers.NGOptBase, budget = 1000)
116
+ ```
117
+
118
+ Use as any other optimizer (make sure closure accepts `backward` argument like one from **How to use**).
@@ -0,0 +1,104 @@
1
+ torchzero/__init__.py,sha256=CCIYfhGNYMnRP_cdXL7DgocxkEWYUZYgB3Sf1T5tdYY,203
2
+ torchzero/tensorlist.py,sha256=5XvutWeZdyUcAVVkoEXbgSjvTiDe0f1806wsINfzlRg,41387
3
+ torchzero/core/__init__.py,sha256=aw2p6Gt0qrtUM8x2msspAG8JHMaD2ma11sMhXm-rC90,267
4
+ torchzero/core/module.py,sha256=msvh-e7cE3vArXb5PLrrsG3j7d-94GG3V4aBXYKFZTo,21254
5
+ torchzero/core/tensorlist_optimizer.py,sha256=hIQIW7uBwddlJxXhR4-xnll0ixY9ZJY0i0oH-wqxfX0,9963
6
+ torchzero/modules/__init__.py,sha256=5f8kt2mMn1eo9YcjXc3ESW-bqMQIRf646V3zlr8UAO4,571
7
+ torchzero/modules/adaptive/__init__.py,sha256=YBVDXCosr4-C-GFCoreHS3DFyHiYMhCbOWgdhVVaZ_E,161
8
+ torchzero/modules/adaptive/adaptive.py,sha256=msQkLlxqt3fFWMLGyr1Pi77bGPv-QAyfDfQ1Oipl5Yo,6473
9
+ torchzero/modules/experimental/__init__.py,sha256=dRLPbhTO8efkRQHB-8Z29dKS3BEleCso1dsXxgccFgM,647
10
+ torchzero/modules/experimental/experimental.py,sha256=Z7g3AhZ5udtjIF_3a5-GdvtMOSt_uMmyJF8AKshw9i0,9853
11
+ torchzero/modules/experimental/quad_interp.py,sha256=iAP6r2uHp2BDsGNFYhpIHuv1l5z7ZXQZLZU1E-VO7eE,4117
12
+ torchzero/modules/experimental/subspace.py,sha256=KgTIdzv75hlmPFs9nCMV0M3NWa7zcp9IK0OLFsl5UrU,11801
13
+ torchzero/modules/gradient_approximation/__init__.py,sha256=q8rNkk2PK6Y9zk42Mq8pY2YF6fHt5QuJd7BK-RTFKpg,179
14
+ torchzero/modules/gradient_approximation/_fd_formulas.py,sha256=mXqRwsDYjXi3pnI4mkpwwtJE85omYEvcmtbJAOfpg9o,82
15
+ torchzero/modules/gradient_approximation/base_approximator.py,sha256=lNDrbGrsfhru7u40w2EDd5Xccv9oQxRaEDOLoVnBJdk,3975
16
+ torchzero/modules/gradient_approximation/fdm.py,sha256=yBvWmkKJqVMd0TUSZcEr6_ulNTtlZbE7rl0uLI7BADY,4977
17
+ torchzero/modules/gradient_approximation/forward_gradient.py,sha256=3CC-tcUIAL0d422WHUQLPx0UMcU5URQ5uYCNbgpi19M,6456
18
+ torchzero/modules/gradient_approximation/newton_fdm.py,sha256=foEjnOUn-9Til6IP6x6E7-wVoAnKGMfbyk1hfmuRYcI,7116
19
+ torchzero/modules/gradient_approximation/rfdm.py,sha256=iA2ydwkk9iRVPYcaECqUupprj6nrgmRKPVN0936gYf4,4967
20
+ torchzero/modules/line_search/__init__.py,sha256=hYysFi5b9a5g0jcRNkgZYGRcZ1V7_JacBVWMR7idP38,1380
21
+ torchzero/modules/line_search/armijo.py,sha256=2-tErIpO8p3XhY9uKrwGsaohEAN6h5tZ847_hXTPjxs,1966
22
+ torchzero/modules/line_search/base_ls.py,sha256=uRHg6n9lcLrBrIAUZLuEiJuWaZDQR-rqNO0ZxZYGAXo,5330
23
+ torchzero/modules/line_search/directional_newton.py,sha256=LLMjDu9nzy-WiqXb2Kzc4kRzhCoQOFc-1j_9hOyxt00,9168
24
+ torchzero/modules/line_search/grid_ls.py,sha256=PLpi8R_KIc8xZ6IxJmeLgKPJQPSgd5M4T-pj33ykLnw,5614
25
+ torchzero/modules/line_search/scipy_minimize_scalar.py,sha256=6JS603_sphNxj4Ji2Ia4gWcyqIM326MVGMHLWaQDXBA,2201
26
+ torchzero/modules/meta/__init__.py,sha256=ARVR3Vzvq50n-3uFMNxcGUDd2r4Euamay5UYtpIxXNg,407
27
+ torchzero/modules/meta/alternate.py,sha256=6LOYJI6_Q2X5MKAnFcymoJP8i4lcarhaPRp9Sm0eQS0,2124
28
+ torchzero/modules/meta/grafting.py,sha256=tWazkxlqw2brkJjkaugasoh3XGaAsR4xOqyjQMEv5uQ,7583
29
+ torchzero/modules/meta/optimizer_wrapper.py,sha256=cVe63uXMLLIbuj4wkSQyj0mBmKVx9SqSVzqsgcczbzA,6448
30
+ torchzero/modules/meta/return_overrides.py,sha256=6bveA6P0jgNiWu-P2NumAjfrAtpOL_uoIHBljOu-aYs,2031
31
+ torchzero/modules/misc/__init__.py,sha256=P43XRz1nnOuJbpq_bQboLJ7hip80SQmvhua2baPdJ-c,390
32
+ torchzero/modules/misc/accumulate.py,sha256=qN0xJ-wnhH6pyr8OY5g0N-ObYJrPyiYgVjX11Sss10s,1330
33
+ torchzero/modules/misc/basic.py,sha256=dWLMkj32bp4FN44wFrF5VP1_fyN7Xxb9FTVtWP_4EWw,3492
34
+ torchzero/modules/misc/lr.py,sha256=V6W5AU9upjpSVpFtlXVar-zJ-qZAgY2Bts-ibeKf4bk,3525
35
+ torchzero/modules/misc/multistep.py,sha256=L526iSNWg8UbXdulRT09r4qcOm6jHXi4v3Ho8PjkCPQ,1781
36
+ torchzero/modules/misc/on_increase.py,sha256=XoMzB6VWOIKpujL030fpwQcVyW_QSls-ipCwjoveMF0,2012
37
+ torchzero/modules/momentum/__init__.py,sha256=Cj_3KJ76RLX-WQ7xsOoLY9mucQvnkyudTeVH8fnvdwc,138
38
+ torchzero/modules/momentum/momentum.py,sha256=Tywb6g0PNY4gIfXRHxEIYxgH56qoAnAtE5MzPAJh7VU,3935
39
+ torchzero/modules/operations/__init__.py,sha256=4SxIQMh-ixEqEDXWdizeGOzfhFw-af4Wz0A-00ypmg0,378
40
+ torchzero/modules/operations/multi.py,sha256=XValEBe90w1uXY94kX_DItWvjchmOrAfpUa6hsi0sxk,10317
41
+ torchzero/modules/operations/reduction.py,sha256=uIRrqG2V-NOvljZVrJegjfjcCSQ4pI1rN9bQnZt-EW4,4652
42
+ torchzero/modules/operations/singular.py,sha256=Y6ImnsUB3CJPpe_7UkT1vq9diGWXf7XKpuA7Ev7Hq2g,3569
43
+ torchzero/modules/optimizers/__init__.py,sha256=QZu8yvqy7ouY45Xju5Z2oHWJiFa1CslknodhWWRZRms,247
44
+ torchzero/modules/optimizers/adagrad.py,sha256=20r1ghs67NfCED0Z0xPZflen1gLaG3tOLiTg0WEYsNU,2015
45
+ torchzero/modules/optimizers/adam.py,sha256=6Lq69rsyE_UI54z8T0HRyHqo0nXLwjCv35u6BsnpVSg,4722
46
+ torchzero/modules/optimizers/lion.py,sha256=LkXedRYK_IxJ1Xebn9dzOOMOnozM_OXVD20_wqOIB2w,905
47
+ torchzero/modules/optimizers/rmsprop.py,sha256=pqeaGxec-IY1i4Io5_iMZaef8nOKVZPVblVdibaWy40,2153
48
+ torchzero/modules/optimizers/rprop.py,sha256=nvv-PPvEpGJs0imJNr0BPRV0X_bMrEY2TaUeF986sa0,3604
49
+ torchzero/modules/optimizers/sgd.py,sha256=dCSQ1UwiycAR8-nSTLnk0G8sieiNAgUyChZsCwgvEOY,2246
50
+ torchzero/modules/orthogonalization/__init__.py,sha256=brvrj57U_1qKKU8AUqbe9lyY9jsfzZvUGnvsU4wjDSQ,151
51
+ torchzero/modules/orthogonalization/newtonschulz.py,sha256=IEqA_Udi2E5AbG2K_XZEz0O8GbGuKjNmQ1GVdEZP0xs,7520
52
+ torchzero/modules/orthogonalization/svd.py,sha256=SsuPFjODwM7uJzOxXW8LbLAbU-4hlLBmp1Fh5xWDCW4,3652
53
+ torchzero/modules/quasi_newton/__init__.py,sha256=G5lW-q0pI47SJ2AZuY4qkjbqfYzJS0qATDo7V0BGzD4,124
54
+ torchzero/modules/regularization/__init__.py,sha256=FD_KERcYY4bdVR22OuKXUUVt63jyfE9V84evwDC1edo,498
55
+ torchzero/modules/regularization/dropout.py,sha256=YlJmmYOVaYIoeQQW2z8kXZfRyXntZfg4tX0m6_w6JDo,1004
56
+ torchzero/modules/regularization/noise.py,sha256=Z_BrotV5QE5HY4E6DhIpoSjsqejaCNm_n393euTtA3o,3014
57
+ torchzero/modules/regularization/normalization.py,sha256=FE51Ww-aDXTQSJr-qj2YxBHRCOjluZC7TqCmXF3Xagc,12142
58
+ torchzero/modules/regularization/ortho_grad.py,sha256=DnUYXAc8VCMSOS5NoZSf1XrU9TStdyt8QpU8bhBzYqE,3127
59
+ torchzero/modules/regularization/weight_decay.py,sha256=4QeTpTra52MLyTrgCSaeaB8JxN-l8gVDq4JIwNoL41k,3625
60
+ torchzero/modules/scheduling/__init__.py,sha256=NxR1cpKXtZSbVqPRlzzzgH3_JBMuxQCf3nUhmxBN2Cc,89
61
+ torchzero/modules/scheduling/lr_schedulers.py,sha256=8zEK_wtE9IqnhHtS3FYNdh_f83q8V90YqLa1zWVzEW4,4965
62
+ torchzero/modules/scheduling/step_size.py,sha256=UOE2ZIcVTX7FHlO8BUqtMy31_jmOKQMpgkkc-WgLfZs,3674
63
+ torchzero/modules/second_order/__init__.py,sha256=oRyRy8mCjurMINHNdsxjlptLbQNU0VnstkDm1Ccv_80,182
64
+ torchzero/modules/second_order/newton.py,sha256=RPn0kHg6ZCAZLQLFW82eQAh7B1-U6d70xTb-CHbJLUs,6765
65
+ torchzero/modules/smoothing/__init__.py,sha256=-mxey48zc72aGV0dv4TLHeFpf98QZjlxMu5Pct1LI_Y,195
66
+ torchzero/modules/smoothing/gaussian_smoothing.py,sha256=9oxVMv--B0ESzOrhEaqQQeTWaVrSIqJXcU77VaRB2KE,3835
67
+ torchzero/modules/smoothing/laplacian_smoothing.py,sha256=TXy2IgVqZehH97PQWn655mK7nDlNEr3EeeCkKEVT0tA,5553
68
+ torchzero/modules/weight_averaging/__init__.py,sha256=nJJRs68AV2G4rGwiiHNRfm6XmtM-xUev1pCtzNIVfa8,66
69
+ torchzero/modules/weight_averaging/ema.py,sha256=tun6TNOMQWeAZyvkbJEDLf3tGgvJPhhWAAA5ScBsT08,2857
70
+ torchzero/modules/weight_averaging/swa.py,sha256=A4nRGQyMnZ2CaOW20iVfAs_iqV3lnULt7t--mjs9-TY,6772
71
+ torchzero/optim/__init__.py,sha256=vk6pIYJHWAGYJMdtJ1otsmVph-pdL5HwBg-CTeBCGso,253
72
+ torchzero/optim/modular.py,sha256=B1ypLnbGY87nUdylPcbukdNoXvKa5GHCl-_14XRqLWs,6066
73
+ torchzero/optim/experimental/__init__.py,sha256=RqNzJu5mVl3T0u7cf4TBzSiA20M1kxTZVYWjSVhEHuU,585
74
+ torchzero/optim/experimental/experimental.py,sha256=tMHZVbEXm3s6mMr7unFSvk_Jks3uAaAG0fzsH6gr098,10928
75
+ torchzero/optim/experimental/ray_search.py,sha256=GYyssL64D6RiImrZ2tchoZJ04x9rX-Bp1y2nQXEGxX0,2662
76
+ torchzero/optim/first_order/__init__.py,sha256=CRT4farcwi8sO1qqDGxXv1856zOwuKlJKBIAIvpL2Z0,336
77
+ torchzero/optim/first_order/cautious.py,sha256=XBeqrLQ4gFKVUYnJI5ROmF9wQJGhY90HR6UG5IS7vYk,6610
78
+ torchzero/optim/first_order/forward_gradient.py,sha256=EM6W8MezS6iUtW36lxozdo2U4aqlDKE7Zf-0s1LACXQ,3066
79
+ torchzero/optim/first_order/optimizers.py,sha256=jYmU6YDsYRGMRsCNkYc6AlvOf3wlU7Uv1xUrzl0o8zo,24501
80
+ torchzero/optim/quasi_newton/__init__.py,sha256=0X83dl-85_j3ck8itWxJR49ZbFeOcWurW6FI8J12F1w,49
81
+ torchzero/optim/quasi_newton/directional_newton.py,sha256=oZ-If8SRcFXTDFKS_zlAcJnif-v5dTCR9HXqmfvsvNA,2595
82
+ torchzero/optim/second_order/__init__.py,sha256=3Gt0dR4NzBK07TV0NF8KZImUGHbI8E2zncDmhIC377I,31
83
+ torchzero/optim/second_order/newton.py,sha256=-DqJrS8JPea6Y9jp5lDV14KyP8A24YKiPxJ32Vsfiv4,3848
84
+ torchzero/optim/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
+ torchzero/optim/wrappers/nevergrad.py,sha256=4PLqfs2L9XJhveyX6l7kJu1cIPl6uv7_UD76amIlP7I,4733
86
+ torchzero/optim/wrappers/nlopt.py,sha256=fGDOZ82sRI2VLH3hKIAhZY4EuKSdu_g217c-NZvD_rs,7104
87
+ torchzero/optim/wrappers/scipy.py,sha256=_BQwFDQ7SBqIA5i1SJ29Xj0jDXVV8MQ_9RcsPT3U6VQ,18047
88
+ torchzero/optim/zeroth_order/__init__.py,sha256=_6T0znO6V63Niq7DMhJPgUuMc_nPvAGxjCjMdf-r64U,218
89
+ torchzero/optim/zeroth_order/fdm.py,sha256=5iJc_F_tRR4cGQfy2Jr8PmAnCGrPva89ZWczSdcBkFk,3686
90
+ torchzero/optim/zeroth_order/newton_fdm.py,sha256=-5E1FGzeJMr8_IougzE_FEOPFt9pEjQxID4Y89Hpmh0,6537
91
+ torchzero/optim/zeroth_order/rfdm.py,sha256=_Y7yiF1bsVRlXt5IK-3zQccwVl95JF0-Xw-fl8Q_7y4,10529
92
+ torchzero/optim/zeroth_order/rs.py,sha256=3w2nnPGWPecourEdUG583vchcqdNxC6Q_PBL3l0PvCk,3333
93
+ torchzero/random/__init__.py,sha256=8EowQhC4yTZuF8w1ZDl73YZtLej8SuhxCk1Bkifbkms,93
94
+ torchzero/random/random.py,sha256=Oq4GvM_6AOsabg5ke6b8h51V9npyHVxp8ob_wC5D-Aw,2829
95
+ torchzero/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
+ torchzero/utils/compile.py,sha256=pYEyX8P26iCb_hFqAXC8IP2SSQrRfC7ZDhXS0vVCsfY,1257
97
+ torchzero/utils/derivatives.py,sha256=koLmuUcVcX41SrH_9rvfJyMXyHyocNLuZ-C8Kr2B7hk,4844
98
+ torchzero/utils/python_tools.py,sha256=kkyDhoP695HhapfKrdjcrRbRAbcvB0ArP1pkxuVUlf0,1192
99
+ torchzero/utils/torch_tools.py,sha256=sSBY5Bmk9LOAgPtaq-6TK4wDgPXsg6FIWxv8CVDx82k,3580
100
+ torchzero-0.0.1.dist-info/LICENSE,sha256=r9ZciAoZoqKC_FNADE0ORukj1p1XhLXEbegdsAyqhJs,1087
101
+ torchzero-0.0.1.dist-info/METADATA,sha256=WKhVATagGeS8DLjdTEc8nyxBfOO_MswXKjL5c49joVw,5963
102
+ torchzero-0.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
103
+ torchzero-0.0.1.dist-info/top_level.txt,sha256=isztuDR1ZGo8p2tORLa-vNuomcbLj7Xd208lhd-pVPs,10
104
+ torchzero-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.8.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ torchzero