torchzero 0.1.8__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. docs/source/conf.py +57 -0
  2. tests/test_identical.py +230 -0
  3. tests/test_module.py +50 -0
  4. tests/test_opts.py +884 -0
  5. tests/test_tensorlist.py +1787 -0
  6. tests/test_utils_optimizer.py +170 -0
  7. tests/test_vars.py +184 -0
  8. torchzero/__init__.py +4 -4
  9. torchzero/core/__init__.py +3 -13
  10. torchzero/core/module.py +629 -510
  11. torchzero/core/preconditioner.py +137 -0
  12. torchzero/core/transform.py +252 -0
  13. torchzero/modules/__init__.py +13 -21
  14. torchzero/modules/clipping/__init__.py +3 -0
  15. torchzero/modules/clipping/clipping.py +320 -0
  16. torchzero/modules/clipping/ema_clipping.py +135 -0
  17. torchzero/modules/clipping/growth_clipping.py +187 -0
  18. torchzero/modules/experimental/__init__.py +13 -18
  19. torchzero/modules/experimental/absoap.py +350 -0
  20. torchzero/modules/experimental/adadam.py +111 -0
  21. torchzero/modules/experimental/adamY.py +135 -0
  22. torchzero/modules/experimental/adasoap.py +282 -0
  23. torchzero/modules/experimental/algebraic_newton.py +145 -0
  24. torchzero/modules/experimental/curveball.py +89 -0
  25. torchzero/modules/experimental/dsoap.py +290 -0
  26. torchzero/modules/experimental/gradmin.py +85 -0
  27. torchzero/modules/experimental/reduce_outward_lr.py +35 -0
  28. torchzero/modules/experimental/spectral.py +286 -0
  29. torchzero/modules/experimental/subspace_preconditioners.py +128 -0
  30. torchzero/modules/experimental/tropical_newton.py +136 -0
  31. torchzero/modules/functional.py +209 -0
  32. torchzero/modules/grad_approximation/__init__.py +4 -0
  33. torchzero/modules/grad_approximation/fdm.py +120 -0
  34. torchzero/modules/grad_approximation/forward_gradient.py +81 -0
  35. torchzero/modules/grad_approximation/grad_approximator.py +66 -0
  36. torchzero/modules/grad_approximation/rfdm.py +259 -0
  37. torchzero/modules/line_search/__init__.py +5 -30
  38. torchzero/modules/line_search/backtracking.py +186 -0
  39. torchzero/modules/line_search/line_search.py +181 -0
  40. torchzero/modules/line_search/scipy.py +37 -0
  41. torchzero/modules/line_search/strong_wolfe.py +260 -0
  42. torchzero/modules/line_search/trust_region.py +61 -0
  43. torchzero/modules/lr/__init__.py +2 -0
  44. torchzero/modules/lr/lr.py +59 -0
  45. torchzero/modules/lr/step_size.py +97 -0
  46. torchzero/modules/momentum/__init__.py +14 -4
  47. torchzero/modules/momentum/averaging.py +78 -0
  48. torchzero/modules/momentum/cautious.py +181 -0
  49. torchzero/modules/momentum/ema.py +173 -0
  50. torchzero/modules/momentum/experimental.py +189 -0
  51. torchzero/modules/momentum/matrix_momentum.py +124 -0
  52. torchzero/modules/momentum/momentum.py +43 -106
  53. torchzero/modules/ops/__init__.py +103 -0
  54. torchzero/modules/ops/accumulate.py +65 -0
  55. torchzero/modules/ops/binary.py +240 -0
  56. torchzero/modules/ops/debug.py +25 -0
  57. torchzero/modules/ops/misc.py +419 -0
  58. torchzero/modules/ops/multi.py +137 -0
  59. torchzero/modules/ops/reduce.py +149 -0
  60. torchzero/modules/ops/split.py +75 -0
  61. torchzero/modules/ops/switch.py +68 -0
  62. torchzero/modules/ops/unary.py +115 -0
  63. torchzero/modules/ops/utility.py +112 -0
  64. torchzero/modules/optimizers/__init__.py +18 -10
  65. torchzero/modules/optimizers/adagrad.py +146 -49
  66. torchzero/modules/optimizers/adam.py +112 -118
  67. torchzero/modules/optimizers/lion.py +18 -11
  68. torchzero/modules/optimizers/muon.py +222 -0
  69. torchzero/modules/optimizers/orthograd.py +55 -0
  70. torchzero/modules/optimizers/rmsprop.py +103 -51
  71. torchzero/modules/optimizers/rprop.py +342 -99
  72. torchzero/modules/optimizers/shampoo.py +197 -0
  73. torchzero/modules/optimizers/soap.py +286 -0
  74. torchzero/modules/optimizers/sophia_h.py +129 -0
  75. torchzero/modules/projections/__init__.py +5 -0
  76. torchzero/modules/projections/dct.py +73 -0
  77. torchzero/modules/projections/fft.py +73 -0
  78. torchzero/modules/projections/galore.py +10 -0
  79. torchzero/modules/projections/projection.py +218 -0
  80. torchzero/modules/projections/structural.py +151 -0
  81. torchzero/modules/quasi_newton/__init__.py +7 -4
  82. torchzero/modules/quasi_newton/cg.py +218 -0
  83. torchzero/modules/quasi_newton/experimental/__init__.py +1 -0
  84. torchzero/modules/quasi_newton/experimental/modular_lbfgs.py +265 -0
  85. torchzero/modules/quasi_newton/lbfgs.py +228 -0
  86. torchzero/modules/quasi_newton/lsr1.py +170 -0
  87. torchzero/modules/quasi_newton/olbfgs.py +196 -0
  88. torchzero/modules/quasi_newton/quasi_newton.py +475 -0
  89. torchzero/modules/second_order/__init__.py +3 -4
  90. torchzero/modules/second_order/newton.py +142 -165
  91. torchzero/modules/second_order/newton_cg.py +84 -0
  92. torchzero/modules/second_order/nystrom.py +168 -0
  93. torchzero/modules/smoothing/__init__.py +2 -5
  94. torchzero/modules/smoothing/gaussian.py +164 -0
  95. torchzero/modules/smoothing/{laplacian_smoothing.py → laplacian.py} +115 -128
  96. torchzero/modules/weight_decay/__init__.py +1 -0
  97. torchzero/modules/weight_decay/weight_decay.py +52 -0
  98. torchzero/modules/wrappers/__init__.py +1 -0
  99. torchzero/modules/wrappers/optim_wrapper.py +91 -0
  100. torchzero/optim/__init__.py +2 -10
  101. torchzero/optim/utility/__init__.py +1 -0
  102. torchzero/optim/utility/split.py +45 -0
  103. torchzero/optim/wrappers/nevergrad.py +2 -28
  104. torchzero/optim/wrappers/nlopt.py +31 -16
  105. torchzero/optim/wrappers/scipy.py +79 -156
  106. torchzero/utils/__init__.py +27 -0
  107. torchzero/utils/compile.py +175 -37
  108. torchzero/utils/derivatives.py +513 -99
  109. torchzero/utils/linalg/__init__.py +5 -0
  110. torchzero/utils/linalg/matrix_funcs.py +87 -0
  111. torchzero/utils/linalg/orthogonalize.py +11 -0
  112. torchzero/utils/linalg/qr.py +71 -0
  113. torchzero/utils/linalg/solve.py +168 -0
  114. torchzero/utils/linalg/svd.py +20 -0
  115. torchzero/utils/numberlist.py +132 -0
  116. torchzero/utils/ops.py +10 -0
  117. torchzero/utils/optimizer.py +284 -0
  118. torchzero/utils/optuna_tools.py +40 -0
  119. torchzero/utils/params.py +149 -0
  120. torchzero/utils/python_tools.py +40 -25
  121. torchzero/utils/tensorlist.py +1081 -0
  122. torchzero/utils/torch_tools.py +48 -12
  123. torchzero-0.3.1.dist-info/METADATA +379 -0
  124. torchzero-0.3.1.dist-info/RECORD +128 -0
  125. {torchzero-0.1.8.dist-info → torchzero-0.3.1.dist-info}/WHEEL +1 -1
  126. {torchzero-0.1.8.dist-info → torchzero-0.3.1.dist-info/licenses}/LICENSE +0 -0
  127. torchzero-0.3.1.dist-info/top_level.txt +3 -0
  128. torchzero/core/tensorlist_optimizer.py +0 -219
  129. torchzero/modules/adaptive/__init__.py +0 -4
  130. torchzero/modules/adaptive/adaptive.py +0 -192
  131. torchzero/modules/experimental/experimental.py +0 -294
  132. torchzero/modules/experimental/quad_interp.py +0 -104
  133. torchzero/modules/experimental/subspace.py +0 -259
  134. torchzero/modules/gradient_approximation/__init__.py +0 -7
  135. torchzero/modules/gradient_approximation/_fd_formulas.py +0 -3
  136. torchzero/modules/gradient_approximation/base_approximator.py +0 -105
  137. torchzero/modules/gradient_approximation/fdm.py +0 -125
  138. torchzero/modules/gradient_approximation/forward_gradient.py +0 -163
  139. torchzero/modules/gradient_approximation/newton_fdm.py +0 -198
  140. torchzero/modules/gradient_approximation/rfdm.py +0 -125
  141. torchzero/modules/line_search/armijo.py +0 -56
  142. torchzero/modules/line_search/base_ls.py +0 -139
  143. torchzero/modules/line_search/directional_newton.py +0 -217
  144. torchzero/modules/line_search/grid_ls.py +0 -158
  145. torchzero/modules/line_search/scipy_minimize_scalar.py +0 -62
  146. torchzero/modules/meta/__init__.py +0 -12
  147. torchzero/modules/meta/alternate.py +0 -65
  148. torchzero/modules/meta/grafting.py +0 -195
  149. torchzero/modules/meta/optimizer_wrapper.py +0 -173
  150. torchzero/modules/meta/return_overrides.py +0 -46
  151. torchzero/modules/misc/__init__.py +0 -10
  152. torchzero/modules/misc/accumulate.py +0 -43
  153. torchzero/modules/misc/basic.py +0 -115
  154. torchzero/modules/misc/lr.py +0 -96
  155. torchzero/modules/misc/multistep.py +0 -51
  156. torchzero/modules/misc/on_increase.py +0 -53
  157. torchzero/modules/operations/__init__.py +0 -29
  158. torchzero/modules/operations/multi.py +0 -298
  159. torchzero/modules/operations/reduction.py +0 -134
  160. torchzero/modules/operations/singular.py +0 -113
  161. torchzero/modules/optimizers/sgd.py +0 -54
  162. torchzero/modules/orthogonalization/__init__.py +0 -2
  163. torchzero/modules/orthogonalization/newtonschulz.py +0 -159
  164. torchzero/modules/orthogonalization/svd.py +0 -86
  165. torchzero/modules/regularization/__init__.py +0 -22
  166. torchzero/modules/regularization/dropout.py +0 -34
  167. torchzero/modules/regularization/noise.py +0 -77
  168. torchzero/modules/regularization/normalization.py +0 -328
  169. torchzero/modules/regularization/ortho_grad.py +0 -78
  170. torchzero/modules/regularization/weight_decay.py +0 -92
  171. torchzero/modules/scheduling/__init__.py +0 -2
  172. torchzero/modules/scheduling/lr_schedulers.py +0 -131
  173. torchzero/modules/scheduling/step_size.py +0 -80
  174. torchzero/modules/smoothing/gaussian_smoothing.py +0 -90
  175. torchzero/modules/weight_averaging/__init__.py +0 -2
  176. torchzero/modules/weight_averaging/ema.py +0 -72
  177. torchzero/modules/weight_averaging/swa.py +0 -171
  178. torchzero/optim/experimental/__init__.py +0 -20
  179. torchzero/optim/experimental/experimental.py +0 -343
  180. torchzero/optim/experimental/ray_search.py +0 -83
  181. torchzero/optim/first_order/__init__.py +0 -18
  182. torchzero/optim/first_order/cautious.py +0 -158
  183. torchzero/optim/first_order/forward_gradient.py +0 -70
  184. torchzero/optim/first_order/optimizers.py +0 -570
  185. torchzero/optim/modular.py +0 -148
  186. torchzero/optim/quasi_newton/__init__.py +0 -1
  187. torchzero/optim/quasi_newton/directional_newton.py +0 -58
  188. torchzero/optim/second_order/__init__.py +0 -1
  189. torchzero/optim/second_order/newton.py +0 -94
  190. torchzero/optim/zeroth_order/__init__.py +0 -4
  191. torchzero/optim/zeroth_order/fdm.py +0 -87
  192. torchzero/optim/zeroth_order/newton_fdm.py +0 -146
  193. torchzero/optim/zeroth_order/rfdm.py +0 -217
  194. torchzero/optim/zeroth_order/rs.py +0 -85
  195. torchzero/random/__init__.py +0 -1
  196. torchzero/random/random.py +0 -46
  197. torchzero/tensorlist.py +0 -826
  198. torchzero-0.1.8.dist-info/METADATA +0 -130
  199. torchzero-0.1.8.dist-info/RECORD +0 -104
  200. torchzero-0.1.8.dist-info/top_level.txt +0 -1
@@ -1,8 +1,55 @@
1
1
  import copyreg
2
2
  import weakref
3
+ from collections.abc import Iterable
4
+ from typing import Any
3
5
 
4
- import torch
5
6
  import numpy as np
7
+ import torch
8
+
9
+
10
+ def totensor(x):
11
+ if isinstance(x, torch.Tensor): return x
12
+ if isinstance(x, np.ndarray): return torch.from_numpy(x)
13
+ return torch.from_numpy(np.asarray(x))
14
+
15
+ def tonumpy(x):
16
+ if isinstance(x, np.ndarray): return x
17
+ if isinstance(x, torch.Tensor): return x.detach().cpu().numpy()
18
+ return np.asarray(x)
19
+
20
+ def tofloat(x) -> float:
21
+ if isinstance(x, float): return x
22
+ if isinstance(x, torch.Tensor): return x.detach().cpu().item()
23
+ if isinstance(x, np.ndarray): return x.item() # type:ignore
24
+ return float(x)
25
+
26
+ def tolist(x):
27
+ if isinstance(x, list): return x
28
+ if isinstance(x, torch.Tensor): return x.detach().cpu().tolist()
29
+ if isinstance(x, np.ndarray): return x.tolist()
30
+ return np.asarray(x).tolist()
31
+
32
+ def vec_to_tensors(vec: torch.Tensor, reference: Iterable[torch.Tensor]) -> list[torch.Tensor]:
33
+ tensors = []
34
+ cur = 0
35
+ for r in reference:
36
+ numel = r.numel()
37
+ tensors.append(vec[cur:cur+numel].view_as(r))
38
+ cur += numel
39
+ return tensors
40
+
41
+ def vec_to_tensors_(vec: torch.Tensor, tensors_: Iterable[torch.Tensor]):
42
+ cur = 0
43
+ for t in tensors_:
44
+ numel = t.numel()
45
+ t.set_(vec[cur:cur+numel].view_as(t)) # pyright: ignore[reportArgumentType]
46
+ cur += numel
47
+
48
+ def set_storage_(tensor: torch.Tensor, storage: torch.Tensor):
49
+ """just calls `tensor.set_(storage)`. But the typing is correct"""
50
+ return tensor.set_(storage) # pyright:ignore[reportArgumentType]
51
+
52
+
6
53
 
7
54
  def swap_tensors_no_use_count_check(t1, t2):
8
55
  """
@@ -79,14 +126,3 @@ def swap_tensors_no_use_count_check(t1, t2):
79
126
 
80
127
  # Swap the at::Tensor they point to
81
128
  torch._C._swap_tensor_impl(t1, t2)
82
-
83
-
84
- def totensor(x) -> torch.Tensor:
85
- if isinstance(x, torch.Tensor): return x
86
- if isinstance(x, np.ndarray): return torch.from_numpy(x)
87
- return torch.from_numpy(np.asarray(x))
88
-
89
- def tofloat(x) -> float:
90
- if isinstance(x, torch.Tensor): return x.detach().cpu().item()
91
- if isinstance(x, np.ndarray): return x.item()
92
- return float()
@@ -0,0 +1,379 @@
1
+ Metadata-Version: 2.4
2
+ Name: torchzero
3
+ Version: 0.3.1
4
+ Summary: Modular optimization library for PyTorch.
5
+ Author-email: Ivan Nikishev <nkshv2@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2024 inikishev
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/inikishev/torchzero
29
+ Project-URL: Repository, https://github.com/inikishev/torchzero
30
+ Project-URL: Issues, https://github.com/inikishev/torchzero/isses
31
+ Keywords: optimization,optimizers,torch,neural networks,zeroth order,second order
32
+ Requires-Python: >=3.10
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: torch
36
+ Requires-Dist: numpy
37
+ Requires-Dist: typing_extensions
38
+ Dynamic: license-file
39
+
40
+ # torchzero
41
+
42
+ **Modular optimization library for PyTorch**
43
+
44
+ <!-- [![PyPI version](https://img.shields.io/pypi/v/torchzero.svg)](https://pypi.org/project/torchzero/)
45
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
46
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/torchzero/torchzero/ci.yml?branch=main)](https://github.com/torchzero/torchzero/actions)
47
+ [![Documentation Status](https://readthedocs.org/projects/torchzero/badge/?version=latest)](https://torchzero.readthedocs.io/en/latest/?badge=latest) -->
48
+
49
+ `torchzero` is a Python library providing a highly modular framework for creating and experimenting with optimization algorithms in PyTorch. It allows users to easily combine and customize various components of optimizers, such as momentum techniques, gradient clipping, line searches and more.
50
+
51
+ NOTE: torchzero is in active development, currently docs are in a state of flux and pip version is extremely outdated.
52
+
53
+ ## Installation
54
+
55
+ ```bash
56
+ pip install git+https://github.com/inikishev/torchzero
57
+ ```
58
+
59
+ (please don't use pip version yet, it is very outdated)
60
+
61
+ **Dependencies:**
62
+
63
+ * Python >= 3.10
64
+ * `torch`
65
+ * `numpy`
66
+ * `typing_extensions`
67
+
68
+ ## Core Concepts
69
+
70
+ <!-- ### Modular Design
71
+
72
+ `torchzero` is built around a few key abstractions:
73
+
74
+ * **`Module`**: The base class for all components in `torchzero`. Each `Module` implements a `step(vars)` method that processes the optimization variables.
75
+ * **`Modular`**: The main optimizer class that chains together a sequence of `Module`s. It orchestrates the flow of data through the modules in the order they are provided.
76
+ * **`Transform`**: A special type of `Module` designed for tensor transformations. These are often used for operations like applying momentum or scaling gradients.
77
+ * **`Preconditioner`**: A subclass of `Transform`, typically used for preconditioning gradients (e.g., Adam, RMSprop).
78
+
79
+ ### `Vars` Object
80
+
81
+ The `Vars` object is a data carrier that passes essential information between modules during an optimization step. It typically holds:
82
+
83
+ * `params`: The model parameters.
84
+ * `grad`: Gradients of the parameters.
85
+ * `update`: The update to be applied to the parameters.
86
+ * `loss`: The current loss value.
87
+ * `closure`: A function to re-evaluate the model and loss (used by some line search algorithms and other modules that might need to recompute gradients or loss).
88
+
89
+ ### `TensorList`
90
+
91
+ `torchzero` uses a custom `TensorList` class for efficient batched operations on lists of tensors. This allows for optimized performance when dealing with multiple parameter groups or complex update rules. -->
92
+
93
+ ## Quick Start / Usage Example
94
+
95
+ Here's a basic example of how to use `torchzero`:
96
+
97
+ ```python
98
+ import torch
99
+ from torch import nn
100
+ import torchzero as tz
101
+
102
+ # Define a simple model
103
+ model = nn.Linear(10, 1)
104
+ criterion = nn.MSELoss()
105
+ inputs = torch.randn(5, 10)
106
+ targets = torch.randn(5, 1)
107
+
108
+ # Create an optimizer
109
+ # The order of modules matters:
110
+ # 1. SOAP: Computes the update.
111
+ # 2. NormalizeByEMA: stabilizes the update by normalizing to an exponential moving average of past updates.
112
+ # 3. WeightDecay - semi-decoupled, because it is applied after SOAP, but before LR
113
+ # 4. LR: Scales the computed update by the learning rate (supports LR schedulers).
114
+ optimizer = tz.Modular(
115
+ model.parameters(),
116
+ tz.m.SOAP(),
117
+ tz.m.NormalizeByEMA(max_ema_growth=1.1),
118
+ tz.m.WeightDecay(1e-4),
119
+ tz.m.LR(1e-1),
120
+ )
121
+
122
+ # Standard training loop
123
+ for epoch in range(100):
124
+ optimizer.zero_grad()
125
+ output = model(inputs)
126
+ loss = criterion(output, targets)
127
+ loss.backward()
128
+ optimizer.step()
129
+ if (epoch+1) % 10 == 0: print(f"Epoch {epoch+1}, Loss: {loss.item()}")
130
+ ```
131
+
132
+ ## Overview of Available Modules
133
+
134
+ `torchzero` provides a rich set of pre-built modules. Here are some key categories and examples:
135
+
136
+ * **Optimizers (`torchzero/modules/optimizers/`)**: Optimization algorithms.
137
+ * `Adam`.
138
+ * `Shampoo`.
139
+ * `SOAP` (my current recommendation).
140
+ * `Muon`.
141
+ * `SophiaH`.
142
+ * `Adagrad` and `FullMatrixAdagrad`.
143
+ * `Lion`.
144
+ * `RMSprop`.
145
+ * `OrthoGrad`.
146
+ * `Rprop`.
147
+
148
+ Additionally many other optimizers can be easily defined via modules:
149
+ * Grams: `[tz.m.Adam(), tz.m.GradSign()]`
150
+ * LaProp: `[tz.m.RMSprop(), tz.m.EMA(0.9)]`
151
+ * Signum: `[tz.m.HeavyBall(), tz.m.Sign()]`
152
+ * Full matrix version of any diagonal optimizer, like Adam: `tz.m.FullMatrixAdagrad(beta=0.999, inner=tz.m.EMA(0.9))`
153
+ * Cautious version of any optimizer, like SOAP: `[tz.m.SOAP(), tz.m.Cautious()]`
154
+
155
+ * **Clipping (`torchzero/modules/clipping/`)**: Gradient clipping techniques.
156
+ * `ClipNorm`: Clips gradient L2 norm.
157
+ * `ClipValue`: Clips gradient values element-wise.
158
+ * `Normalize`: Normalizes gradients to unit norm.
159
+ * `Centralize`: Centralizes gradients by subtracting the mean.
160
+ * `ClipNormByEMA`, `NormalizeByEMA`, `ClipValueByEMA`: Clipping/Normalization based on EMA of past values.
161
+ * `ClipNormGrowth`, `ClipValueGrowth`: Limits norm or value growth.
162
+ * **Gradient Approximation (`torchzero/modules/grad_approximation/`)**: Methods for approximating gradients.
163
+ * `FDM`: Finite Difference Method.
164
+ * `RandomizedFDM` (`MeZO`, `SPSA`, `RDSA`, `Gaussian smoothing`): Randomized Finite Difference Methods (also subspaces).
165
+ * `ForwardGradient`: Randomized gradient approximation via forward mode automatic differentiation.
166
+ * **Line Search (`torchzero/modules/line_search/`)**: Techniques for finding optimal step sizes.
167
+ * `Backtracking`, `AdaptiveBacktracking`: Backtracking line searches.
168
+ * `StrongWolfe`: Cubic interpolation line search satisfying strong Wolfe conditions.
169
+ * `ScipyMinimizeScalar`: Wrapper for SciPy's scalar minimization for line search.
170
+ * `TrustRegion`: First order trust region method.
171
+ * **Learning Rate (`torchzero/modules/lr/`)**: Learning rate control.
172
+ * `LR`: Applies a fixed learning rate.
173
+ * `PolyakStepSize`: Polyak's method.
174
+ * `Warmup`: Learning rate warmup.
175
+ * **Momentum (`torchzero/modules/momentum/`)**: Momentum-based update modifications.
176
+ * `NAG`: Nesterov Accelerated Gradient.
177
+ * `HeavyBall`: Classic momentum (Polyak's momentum).
178
+ * `EMA`: Exponential moving average.
179
+ * `Averaging` (`Medianveraging`, `WeightedAveraging`): Simple, median, or weighted averaging of updates.
180
+ * `Cautious`, `ScaleByGradCosineSimilarity`: Momentum cautioning.
181
+ * `MatrixMomentum`, `AdaptiveMatrixMomentum`: Second order momentum.
182
+ <!-- * `CoordinateMomentum`: Momentum via random coordinates. -->
183
+ * **Projections (`torchzero/modules/projections/`)**: Gradient projection techniques.
184
+ * `FFTProjection`, `DCTProjection`: Use any update rule in Fourier or DCT domain.
185
+ * `VectorProjection`, `TensorizeProjection`, `BlockPartition`, `TensorNormsProjection`: Structural projection methods.
186
+ <!-- * *(Note: DCT and Galore were commented out in the `__init__.py` I read, might be experimental or moved).* -->
187
+ * **Quasi-Newton (`torchzero/modules/quasi_newton/`)**: Approximate second-order optimization methods.
188
+ * `LBFGS`: Limited-memory BFGS.
189
+ * `LSR1`: Limited-memory SR1.
190
+ * `OnlineLBFGS`: Online LBFGS.
191
+ <!-- * `ModularLBFGS`: A modular L-BFGS implementation (from experimental). -->
192
+ * `BFGS`, `SR1`, `DFP`, `BroydenGood`, `BroydenBad`, `Greenstadt1`, `Greenstadt2`, `ColumnUpdatingMethod`, `ThomasOptimalMethod`, `PSB`, `Pearson2`, `SSVM`: Classic full-matrix Quasi-Newton update formulas.
193
+ * Conjugate Gradient methods: `PolakRibiere`, `FletcherReeves`, `HestenesStiefel`, `DaiYuan`, `LiuStorey`, `ConjugateDescent`, `HagerZhang`, `HybridHS_DY`.
194
+ * **Second Order (`torchzero/modules/second_order/`)**: Second order methods.
195
+ * `Newton`: Classic Newton's method.
196
+ * `NewtonCG`: Matrix-free newton's method with conjugate gradient solver.
197
+ * `NystromSketchAndSolve`: Nyström sketch-and-solve method.
198
+ * `NystromPCG`: NewtonCG with Nyström preconditioning.
199
+ * **Smoothing (`torchzero/modules/smoothing/`)**: Techniques for smoothing the loss landscape or gradients.
200
+ * `LaplacianSmoothing`: Laplacian smoothing for gradients.
201
+ * `GaussianHomotopy`: Smoothing via randomized Gaussian homotopy.
202
+ * **Weight Decay (`torchzero/modules/weight_decay/`)**: Weight decay implementations.
203
+ * `WeightDecay`: Standard L2 or L1 weight decay.
204
+ <!-- * `DirectWeightDecay`: Applies weight decay directly to weights.
205
+ * `decay_weights_`: Functional form for decaying weights. -->
206
+ * **Ops (`torchzero/modules/ops/`)**: Various tensor operations and utilities.
207
+ * `GradientAccumulation`: easy way to add gradient accumulation.
208
+ * `Unary*` (e.g., `Abs`, `Sqrt`, `Sign`): Unary operations.
209
+ * `Binary*` (e.g., `Add`, `Mul`, `Graft`): Binary operations.
210
+ * `Multi*` (e.g., `ClipModules`, `LerpModules`): Operations on multiple module outputs.
211
+ * `Reduce*` (e.g., `Mean`, `Sum`, `WeightedMean`): Reduction operations on multiple module outputs.
212
+
213
+ * **Wrappers (`torchzero/modules/wrappers/`)**.
214
+ * `Wrap`: Wraps any PyTorch optimizer, allowing to use it as a module.
215
+
216
+ <!-- * **Experimental (`torchzero/modules/experimental/`)**: Experimental modules.
217
+ * `GradMin`: Attempts to minimize gradient norm.
218
+ * `ReduceOutwardLR`: Reduces learning rate for parameters with outward pointing gradients.
219
+ * `RandomSubspacePreconditioning`, `HistorySubspacePreconditioning`: Preconditioning techniques using random or historical subspaces. -->
220
+
221
+ ## Advanced Usage
222
+
223
+ ### Closure
224
+
225
+ Certain modules, particularly line searches and gradient approximations require a closure, similar to L-BFGS in PyTorch. In TorchZero closure accepts an additional `backward` argument, refer to example below:
226
+
227
+ ```python
228
+ # basic training loop
229
+ for inputs, targets in dataloader:
230
+
231
+ def closure(backward=True): # make sure it is True by default
232
+ preds = model(inputs)
233
+ loss = criterion(preds, targets)
234
+
235
+ if backward:
236
+ optimizer.zero_grad()
237
+ loss.backward()
238
+
239
+ return loss
240
+
241
+ loss = optimizer.step(closure)
242
+ ```
243
+
244
+ Also the closure above works with all PyTorch optimizers and most custom ones, so there is no need to rewrite the training loop.
245
+
246
+ Non-batched example (rosenbrock):
247
+
248
+ ```py
249
+ import torchzero as tz
250
+
251
+ def rosen(x, y):
252
+ return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
253
+
254
+ W = torch.tensor([-1.1, 2.5], requires_grad=True)
255
+
256
+ def closure(backward=True):
257
+ loss = rosen(*W)
258
+ if backward:
259
+ W.grad = None # same as opt.zero_grad()
260
+ loss.backward()
261
+ return loss
262
+
263
+ opt = tz.Modular([W], tz.m.NewtonCG(), tz.m.StrongWolfe())
264
+ for step in range(20):
265
+ loss = opt.step(closure)
266
+ print(f'{step} - {loss}')
267
+ ```
268
+
269
+ ### Low level modules
270
+
271
+ TorchZero provides a lot of low-level modules that can be used to recreate update rules, or combine existing update rules
272
+ in new ways. Here are some equivalent ways to make Adam in order of their involvement:
273
+
274
+ ```python
275
+ tz.m.Adam()
276
+ ```
277
+
278
+ ```python
279
+ tz.m.RMSprop(0.999, debiased=True, init='zeros', inner=tz.m.EMA(0.9))
280
+ ```
281
+
282
+ ```python
283
+ tz.m.DivModules(
284
+ tz.m.EMA(0.9, debiased=True),
285
+ [tz.m.SqrtEMASquared(0.999, debiased=True, amsgrad=amsgrad), tz.m.Add(1e-8)]
286
+ )
287
+ ```
288
+
289
+ ```python
290
+ tz.m.DivModules(
291
+ [tz.m.EMA(0.9), tz.m.Debias(beta1=0.9, beta2=0.999)],
292
+ [tz.m.EMASquared(0.999, amsgrad=amsgrad), tz.m.Sqrt(), tz.m.Add(1e-8)]
293
+ )
294
+ ```
295
+
296
+ ```python
297
+ tz.m.DivModules(
298
+ [tz.m.EMA(0.9), tz.m.Debias(beta1=0.9)],
299
+ [
300
+ tz.m.Pow(2),
301
+ tz.m.EMA(0.999),
302
+ tz.m.AccumulateMaximum() if amsgrad else tz.m.Identity(),
303
+ tz.m.Sqrt(),
304
+ tz.m.Debias2(beta=0.999),
305
+ tz.m.Add(1e-8)]
306
+ )
307
+ ```
308
+
309
+ There are practically no rules to the ordering of the modules - anything will work, even line search after line search or nested gaussian homotopy.
310
+
311
+ ### Quick guide to implementing new modules
312
+
313
+ Modules are quite similar to torch.optim.Optimizer, the main difference is that everything is stored in the Vars object,
314
+ not in the module itself. Also both per-parameter settings and state are stored in per-parameter dictionaries. Feel free to modify the example below.
315
+
316
+ ```python
317
+ import torch
318
+ from torchzero.core import Module, Vars
319
+
320
+ class HeavyBall(Module):
321
+ def __init__(self, momentum: float = 0.9, dampening: float = 0):
322
+ defaults = dict(momentum=momentum, dampening=dampening)
323
+ super().__init__(defaults)
324
+
325
+ def step(self, vars: Vars):
326
+ # a module takes a Vars object, modifies it or creates a new one, and returns it
327
+ # Vars has a bunch of attributes, including parameters, gradients, update, closure, loss
328
+ # for now we are only interested in update, and we will apply the heavyball rule to it.
329
+
330
+ params = vars.params
331
+ update = vars.get_update() # list of tensors
332
+
333
+ exp_avg_list = []
334
+ for p, u in zip(params, update):
335
+ state = self.state[p]
336
+ settings = self.settings[p]
337
+ momentum = settings['momentum']
338
+ dampening = settings['dampening']
339
+
340
+ if 'momentum_buffer' not in state:
341
+ state['momentum_buffer'] = torch.zeros_like(p)
342
+
343
+ buf = state['momentum_buffer']
344
+ u *= 1 - dampening
345
+
346
+ buf.mul_(momentum).add_(u)
347
+
348
+ # clone because further modules might modify exp_avg in-place
349
+ # and it is part of self.state
350
+ exp_avg_list.append(buf.clone())
351
+
352
+ # set new update to vars
353
+ vars.update = exp_avg_list
354
+ return vars
355
+ ```
356
+
357
+ There are a some specialized base modules.
358
+
359
+ * `GradApproximator` for gradient approximations
360
+ * `LineSearch` for line searches
361
+ * `Preconditioner` for gradient preconditioners
362
+ * `QuasiNewtonH` for full-matrix quasi-newton methods that update hessian inverse approximation (because they are all very similar)
363
+ * `ConguateGradientBase` for conjugate gradient methods, basically the only difference is how beta is calculated.
364
+
365
+ ## License
366
+
367
+ This project is licensed under the MIT License
368
+
369
+ ## Project Links
370
+
371
+ TODO (there are docs but from very old version)
372
+ <!-- * **Homepage**: `https://torchzero.github.io/torchzero/` (Placeholder - update if available)
373
+ * **Repository**: `https://github.com/torchzero/torchzero` (Assuming this is the correct path) -->
374
+
375
+ ## Other stuff
376
+
377
+ There are also wrappers providing `torch.optim.Optimizer` interface for for `scipy.optimize`, NLOpt and Nevergrad.
378
+
379
+ They are in `torchzero.optim.wrappers.scipy.ScipyMinimize`, `torchzero.optim.wrappers.nlopt.NLOptOptimizer`, and `torchzero.optim.wrappers.nevergrad.NevergradOptimizer`. Make sure closure has `backward` argument as described in **Advanced Usage**.
@@ -0,0 +1,128 @@
1
+ docs/source/conf.py,sha256=jd80ZT2IdCx7nlQrpOTJL8UhGBNm6KYyXlpp0jmRiAw,1849
2
+ tests/test_identical.py,sha256=NZ7A8Rm1U9Q16d-cG2G_wccpPtNALyoKYJt9qMownMc,11568
3
+ tests/test_module.py,sha256=qX3rjdSJsbA8JO17bPTUIDspe7bg2dogqxMw__KV7SU,2039
4
+ tests/test_opts.py,sha256=_SX23TW1PBsnJUbVNFdxqEXvyEByuTrK6eU95rrM5XU,41126
5
+ tests/test_tensorlist.py,sha256=6JTbhvABzXLpbYD-1m3YyPk_KHREMEOTSg4gGpJLuNc,72427
6
+ tests/test_utils_optimizer.py,sha256=bvC0Ehvs2L8fohpyIF5Vfr9OKTycpnODWLPflXilU1c,8414
7
+ tests/test_vars.py,sha256=3p9dsHk7SJpMd-WRD0ziBNq5FEHRBJGSxbMLD8ES4J0,6815
8
+ torchzero/__init__.py,sha256=L7IJ1qZ3o8E9oRwlJZBK2_2yII_eeGEk57Of6EfVbrk,112
9
+ torchzero/core/__init__.py,sha256=2JRyeGZprTexAeEPQOIl9fLFGBwzvya-AwKyt7XAmGQ,210
10
+ torchzero/core/module.py,sha256=Razw3c71Kfegznm0vQxsii1KuTUCPBC9UGyq2v-KX4M,27568
11
+ torchzero/core/preconditioner.py,sha256=rMYusKbaypm5K0Ii9VdjKhxi2YWNQbBk9f6AV_MJulY,6191
12
+ torchzero/core/transform.py,sha256=ajNJcX45ds-_lc5CqxgLfEFGil6_BYLerB0WvoTi8rM,10303
13
+ torchzero/modules/__init__.py,sha256=BDeyuSd2s1WFUUXIo3tGTNp4aYp4A2B94cydpPW24nY,332
14
+ torchzero/modules/functional.py,sha256=HXNzmPe7LsPadryEm7zrcEKqGej16QDwSgBkbEvggFM,6492
15
+ torchzero/modules/clipping/__init__.py,sha256=ZaffMF7mIRK6hZSfuZadgjNTX6hF5ANiLBny2w3S7I8,250
16
+ torchzero/modules/clipping/clipping.py,sha256=I-5utyrqdKtF5yaH-9m2F3UqdfpPmA2bSSFUAZ_d60Q,12544
17
+ torchzero/modules/clipping/ema_clipping.py,sha256=pLeNuEBLpJ74io2sHn_ZVYaQ6ydEfhpVfVEX2bFttd0,5947
18
+ torchzero/modules/clipping/growth_clipping.py,sha256=OD-kdia2Rn-DvYlYV6EZlGPDVTh9tj-W9mpiZPc3hOQ,6772
19
+ torchzero/modules/experimental/__init__.py,sha256=sJ6URgX35P3zJ2ugBKgAcwBWmdBmAPDW3vXHQ0sK-ro,443
20
+ torchzero/modules/experimental/absoap.py,sha256=XUHr5SeLdhLW2kMvWea5xAqZeuJBDQoO4zprDxs4bgU,13317
21
+ torchzero/modules/experimental/adadam.py,sha256=W7rRXYJ9tGrzqD_FdFX00HBLuWOEr2tHtfshf6lDFYE,4049
22
+ torchzero/modules/experimental/adamY.py,sha256=FoSn-qMI5_BdqZH10WGKkl-zYTPESBdGZ9lfhyqnbB0,4591
23
+ torchzero/modules/experimental/adasoap.py,sha256=07gPdEdBIKtmdmSzTGtTO0c2ZkS_otVLufQ76okBjHY,11239
24
+ torchzero/modules/experimental/algebraic_newton.py,sha256=_XFYR6bdHWgA5bozxc9AJYteBIAnHrSLgo_bSaZ13eg,5193
25
+ torchzero/modules/experimental/curveball.py,sha256=Nw9jtSp5QNj7-FN3qshjYEDHc68LwRLha-Co78mfR5w,3242
26
+ torchzero/modules/experimental/dsoap.py,sha256=BEZDw3_n5VDhu7VLgkoSN4rI9JeBdGoO9gFZfqsh74M,10983
27
+ torchzero/modules/experimental/gradmin.py,sha256=55dpBDNyrYJusluFhw-v1BXuj1UxER7pNEPTtwYKD4E,3648
28
+ torchzero/modules/experimental/reduce_outward_lr.py,sha256=kjtRwepBGBca77ToM-lw3b8ywptMtmSdC_jQfjJAwlY,1184
29
+ torchzero/modules/experimental/spectral.py,sha256=D3_nCI8teFirCdnnLprNnZ3G1gsOB6RUBWCeDbwi7P0,12043
30
+ torchzero/modules/experimental/subspace_preconditioners.py,sha256=4SRJOyTG-fJCGunHR62aRrzw3qFmeI6fRQAYHIadhWw,4682
31
+ torchzero/modules/experimental/tropical_newton.py,sha256=uq66ouhgrgc8iYGozDQ3_rtbubj8rKRwb1jfcdnlpHg,4903
32
+ torchzero/modules/grad_approximation/__init__.py,sha256=DVFjf0cXuF70NA0nJ2WklpP01PQgrRZxUjUQjjQeSos,195
33
+ torchzero/modules/grad_approximation/fdm.py,sha256=2PNNBIMup1xlOwLFAwAS3xAVd-7GGVyerMeKH1ug9LQ,3591
34
+ torchzero/modules/grad_approximation/forward_gradient.py,sha256=Kb8RNGAIb2tKzgofnEn4pQjS7TPq824B_P14idyy8e0,3564
35
+ torchzero/modules/grad_approximation/grad_approximator.py,sha256=Pa1Lv52T7WawUJUUA3IHm7mVypBQXLbjc5_15FkVwnQ,2938
36
+ torchzero/modules/grad_approximation/rfdm.py,sha256=s7OSMFnIEr43WKCT0TXdgzz_6odOkRN0BcKWkFbbPAE,10189
37
+ torchzero/modules/line_search/__init__.py,sha256=nkOUPLe88wE91ICEhprl2pJsvaKtbI3KzYOdT83AGsg,253
38
+ torchzero/modules/line_search/backtracking.py,sha256=FG_-KAN9whvBNZyhDa5-ta46IQFm8hagVvaPTXCCV88,6307
39
+ torchzero/modules/line_search/line_search.py,sha256=4z0fHJAGAZT2IVAOUxZetAszPtNuXfXdFzs1_WUWT2c,7296
40
+ torchzero/modules/line_search/scipy.py,sha256=7tfxXT8RAIHpRv-e5w9C8RNvkvgwgxHZaWI25RjTYy0,1156
41
+ torchzero/modules/line_search/strong_wolfe.py,sha256=Y6UXd2Br30YWta1phZx1wiSsFQC6wbgmvOpVITcmJpw,7504
42
+ torchzero/modules/line_search/trust_region.py,sha256=_zOje00BLvIMi0d5H9qZavqf3MWeB48Q-WosgXu3Ef4,2349
43
+ torchzero/modules/lr/__init__.py,sha256=pNxbBUGzDp24O6g7pu1bRg1tzh4eh-mSxVbhOItKHpc,90
44
+ torchzero/modules/lr/lr.py,sha256=wlubixzgxnm4ucyiEtGWzQOskaLXLInvSaR0sGKxto8,2161
45
+ torchzero/modules/lr/step_size.py,sha256=0HWYAYhVqWCCYe_-guBnMaOpqLbsMm4-F6bRFjltBsc,4036
46
+ torchzero/modules/momentum/__init__.py,sha256=pSD7vxu8PySrYOSHQMi3C9heYdcQr8y6WC_rwMybZm0,544
47
+ torchzero/modules/momentum/averaging.py,sha256=hyH5jzvYTbB1Vcjx0j_v4dtPp54GUUDOZYVDADGjcfE,2672
48
+ torchzero/modules/momentum/cautious.py,sha256=QCoBXpYcIUOrgY6XXHA30m0-MVy7iGCGxZGFLyDwqkc,5841
49
+ torchzero/modules/momentum/ema.py,sha256=4ubPpq9TL0oQZ5_eXBwU5oRbxV3faHMEM1a_kv8vRqI,7733
50
+ torchzero/modules/momentum/experimental.py,sha256=ze9oxqxdmqRFQyVdG7iBA-hICft5mxeAM6GCTQ4ewes,6352
51
+ torchzero/modules/momentum/matrix_momentum.py,sha256=IQjCp2Kb53bCaReM7fHBil_pwH9oiH029YkWFq0OIDw,4894
52
+ torchzero/modules/momentum/momentum.py,sha256=hcmmYysGItb3b7MBBVhoODh7p4Fyit68cZzD0NUBmvA,1540
53
+ torchzero/modules/ops/__init__.py,sha256=hxMZFSXX7xvitXkuBiYykVGX3p03Xprm_QA2CMg4eW8,1601
54
+ torchzero/modules/ops/accumulate.py,sha256=YGI11YxgTWvIBq5maDRWiSA-v-FS-XoaSYPU2SSrBY8,2759
55
+ torchzero/modules/ops/binary.py,sha256=-b0yvKvfDx9-HcaaxLWzg5C6rUl24oP3OltSF-iXi6w,9731
56
+ torchzero/modules/ops/debug.py,sha256=9sJOHRMwTMaOgOi2QFwCH7g2WPF1o3oyouPJO-MQQg4,862
57
+ torchzero/modules/ops/misc.py,sha256=xdxnGbRArWBqzyufUdrCQH-mAI9utRF0zxcvWCkEfZc,16383
58
+ torchzero/modules/ops/multi.py,sha256=P7mSG0LnDMkuZNSgtpHRNgqglqksrdxITCzkhmEjqxU,5742
59
+ torchzero/modules/ops/reduce.py,sha256=xvFHZG5Wf7KxfFLkynFGBOK6xywyTXsbCasW6h2OYAU,5695
60
+ torchzero/modules/ops/split.py,sha256=fFcDnJZ-e46cx_fx_TkGlVsFYOL1Y8UAp_pUPJOOdm4,2303
61
+ torchzero/modules/ops/switch.py,sha256=5idKd9xBP-KbqZjWBcr6ZDjso8BRpTNQYJg4xKWwmng,2511
62
+ torchzero/modules/ops/unary.py,sha256=h3MXS6jydZjfFetjaBCWCUWTXdQcNKnxEC6uGS6yh3c,4794
63
+ torchzero/modules/ops/utility.py,sha256=p-mc2j1mQEMLxp4brnAnzgmK6VKbSnYd2U8vkAwTKd8,3117
64
+ torchzero/modules/optimizers/__init__.py,sha256=BbT2nhIt4p74t1cO8ziQgzqZHaLvyuleXQbccugd06M,554
65
+ torchzero/modules/optimizers/adagrad.py,sha256=1DIBJ_7gJ35qidXMK4IkHYF_37Bl9Ptl9mAgfOq6YAk,4834
66
+ torchzero/modules/optimizers/adam.py,sha256=xctnENJ9rcpv2sis4zAGPGoy-ccJC1iVl8SvBynaG50,4093
67
+ torchzero/modules/optimizers/lion.py,sha256=eceNfITCozqYob0thWbIV7AdY1yAIJMqb4GJfB8a1SA,1087
68
+ torchzero/modules/optimizers/muon.py,sha256=m3LpwD6AF7E-1v3VVPHAN8S_tPTTFKZ5RpkzKea4K4g,9598
69
+ torchzero/modules/optimizers/orthograd.py,sha256=5BLnNJTYuGUClHmlxaXZ1jNvBR4zSFDGG8nM20lZdhk,2046
70
+ torchzero/modules/optimizers/rmsprop.py,sha256=d10Y9Ck-391tVysO3xMHg3g2Pe0UEZplgebEyDYi3Z4,4333
71
+ torchzero/modules/optimizers/rprop.py,sha256=n4k5-9F3ppH0Xl-4l4vNXfqVf2r67vMPCkstUaQKPLw,10974
72
+ torchzero/modules/optimizers/shampoo.py,sha256=AHHV6d71DqKDPCg52ShWIPIRSGtWkMc1v1XwXgDG3qY,8606
73
+ torchzero/modules/optimizers/soap.py,sha256=HL1YrfiEiRMh6aW9D5UEZXBjo3yMTqnpKPHXVD8fOa8,11590
74
+ torchzero/modules/optimizers/sophia_h.py,sha256=8pSlYVm66xWplzdP8MX3MCTzzIYHsxGzDEXJKA03Zgg,4279
75
+ torchzero/modules/projections/__init__.py,sha256=OCxlh_-Tx-xpl31X03CeFJH9XveH563oEsWc8rUvX0A,196
76
+ torchzero/modules/projections/dct.py,sha256=wxaEV6dTNiOqW_n2UHX0De6mMXTKDXK6UNcMNI4Rogk,2373
77
+ torchzero/modules/projections/fft.py,sha256=OpCcEM1-A2dgk1umwRsBsvK7ObiHtsBKlkkcw0IX83Q,2961
78
+ torchzero/modules/projections/galore.py,sha256=c9CZ0kHxpKEoyfc_lnmeHOkNp55jCppb7onN5YmWnN8,242
79
+ torchzero/modules/projections/projection.py,sha256=tvUBZ4XGY1GkOg6jrKS7FvpIpjUc2FJL_SMRpoROT1E,9330
80
+ torchzero/modules/projections/structural.py,sha256=QaCGHmzHCXj46sM-XZ5XlYU9BnuRKI2ReR3LE8y2R4g,5740
81
+ torchzero/modules/quasi_newton/__init__.py,sha256=0iOlX73PHj9lQS3_2cJ5lyCdas904MnFfIvR8Popvzw,402
82
+ torchzero/modules/quasi_newton/cg.py,sha256=h-di1oKKP1tDoh-LogBRIRCp2UF9GA6XjEJPlX6xXf4,9322
83
+ torchzero/modules/quasi_newton/lbfgs.py,sha256=jtO5ldbx66yUWv-20c-4mvq6HhCMuomCwJK8A8bjcYA,9168
84
+ torchzero/modules/quasi_newton/lsr1.py,sha256=F_DtMQZfQSjmSLjnx4nw16AV7qCdNxT9ITQbfNFrPdM,5879
85
+ torchzero/modules/quasi_newton/olbfgs.py,sha256=2YAOXlMnPGw22sNcIMH1hmggzAXQRbN59RSPUZNKUZY,8352
86
+ torchzero/modules/quasi_newton/quasi_newton.py,sha256=jwQkzlnozIaxHW9kuDAAlME0YuQdrdZX9OZZoTmej4Q,17384
87
+ torchzero/modules/quasi_newton/experimental/__init__.py,sha256=3qpZGgdsx6wpoafWaNWx-eamRl1FuxVCWQZq8Y7Cl98,39
88
+ torchzero/modules/quasi_newton/experimental/modular_lbfgs.py,sha256=PlyuIH2pFazIR89OGTrZESt752GkbArh_Zb8mtVCOi0,10731
89
+ torchzero/modules/second_order/__init__.py,sha256=5lRmwIU53eRc1owpOZ5FMDc7u1Z48I3PDc0NyCBaJNM,113
90
+ torchzero/modules/second_order/newton.py,sha256=XNhscAuWwxOUwps3sUrxc2ExgkNFbilnAdszrCvQxFg,5845
91
+ torchzero/modules/second_order/newton_cg.py,sha256=lUVn4-ZoW3qAxqEy8i7yz_aN7sZDoQChd-A_Ubrz-Ag,2871
92
+ torchzero/modules/second_order/nystrom.py,sha256=ZyCWrde-_-Ednj46jafuvBOzG3nC-3cPYGr-HytZbsE,6073
93
+ torchzero/modules/smoothing/__init__.py,sha256=tUTGN0A-EQC7xuLV2AuHFWk-t7D6jIJlpV_3qyfRqLk,80
94
+ torchzero/modules/smoothing/gaussian.py,sha256=YlT_G4MqAVkiWG56RHAwgt5SSPISpvQZQbSLh8mhF3I,6153
95
+ torchzero/modules/smoothing/laplacian.py,sha256=Bfrs7D59SfdU7j-97UBKD1hs0obC-ZgjJvG7oKwaa0o,5065
96
+ torchzero/modules/weight_decay/__init__.py,sha256=VdJfEx3uk8wYGCpMjYSeudXyGX8ONqsQYoBCE3cdM1U,72
97
+ torchzero/modules/weight_decay/weight_decay.py,sha256=p6jGD3hgC_rmZXiWYr7_IZWHMdVJJaT_bcHHzcdXSxU,1912
98
+ torchzero/modules/wrappers/__init__.py,sha256=6b5Ac-8u18IVp_Jnw1T1xQExwpQhpQ0JwNV9GyC_Yj8,31
99
+ torchzero/modules/wrappers/optim_wrapper.py,sha256=mcoQCUJwpMJuCDv03nDa0jZIb3Y0CyaeE1kNcJQozfo,3582
100
+ torchzero/optim/__init__.py,sha256=aXf7EkywqYiR50I4QeeVXro9aBhKiqfbY_BCia59sgU,46
101
+ torchzero/optim/utility/__init__.py,sha256=pUacok4XmebfxofE-QWZLgViajsU-3JkXcWi9OS-Jrw,24
102
+ torchzero/optim/utility/split.py,sha256=ZbazNuMTYunm75V_5ard0A_LletGaYAg-Pm2rANJKrE,1610
103
+ torchzero/optim/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
+ torchzero/optim/wrappers/nevergrad.py,sha256=2jHWQiWGjaffAqhJotMwOt03OtW-L57p8OesD2gVVow,3949
105
+ torchzero/optim/wrappers/nlopt.py,sha256=ZoHBf51OhwgAaExxmoFtvP8GqO9uBHdEsc4HLm0wcic,7588
106
+ torchzero/optim/wrappers/scipy.py,sha256=0BNBlHCbeTslXkXhnKvhuvJfNO7_CHFa2AXruYySnzM,14561
107
+ torchzero/utils/__init__.py,sha256=By___ngB1bcnrSZiJanvtKk8QFrPmLRhTOrkFYP2MU4,929
108
+ torchzero/utils/compile.py,sha256=N8AWLv_7oBUHYornmvvx_L4uynjiD-x5Hj1tBwei3-w,5127
109
+ torchzero/utils/derivatives.py,sha256=S4Vh2cwE2h6yvhqu799AjR4GVHOEg7yApH3SataKxnA,16881
110
+ torchzero/utils/numberlist.py,sha256=cbG0UsSb9WCRxVhw8sd7Yf0bDy_gSqtghiJtkUxIO6U,6139
111
+ torchzero/utils/ops.py,sha256=n4Su1sbgTzlHczuPEHkuWenTtNBCa_MvlQ_hCZkIPnQ,314
112
+ torchzero/utils/optimizer.py,sha256=-vuOZNu4luSZA5YtwC_7s-G2FvHKnM2k5KqC6bC_hcM,13097
113
+ torchzero/utils/optuna_tools.py,sha256=F-1Xg0n_29MVEb6lqgUFFNIl9BNJ6MOdIJPduoNH4JU,1325
114
+ torchzero/utils/params.py,sha256=nQo270aOURU7rJ_D102y2pSXbzhJPK0Z_ehx4mZBMes,5784
115
+ torchzero/utils/python_tools.py,sha256=RFBqNj8w52dpJ983pUPPDbg2x1MX_-SsBnBMffWGGIk,2066
116
+ torchzero/utils/tensorlist.py,sha256=qSbiliVo1euFAksdHHHRbPUdYYxfkw1dvhpXj71wGy0,53162
117
+ torchzero/utils/torch_tools.py,sha256=ohqnnZRlqdfp5PAfMSbQDIEKygW0_ARjxSEBp3Zo9nU,4756
118
+ torchzero/utils/linalg/__init__.py,sha256=Dzbho3_z7JDdKzYD-QdLArg0ZEoC2BVGdlE3JoAnXHQ,272
119
+ torchzero/utils/linalg/matrix_funcs.py,sha256=-LecWrPWbJvfeCgIzUhfWARa2aSZvJ12lHX7Jno38O4,3099
120
+ torchzero/utils/linalg/orthogonalize.py,sha256=mDCkET7qgDZqf_y6oPYAK3d2L5HrB8gzOFPl0YoONaY,399
121
+ torchzero/utils/linalg/qr.py,sha256=L-RXuYV-SIHI-Llq4y1rQ_Tz-yamds0_QNZeHapbjNE,2507
122
+ torchzero/utils/linalg/solve.py,sha256=hN450ONzAirYOvWF2g0E0Wy2n1bCw4X-KXWi6p4jvDM,5136
123
+ torchzero/utils/linalg/svd.py,sha256=wBxl-JSciINV-N6zvM4SGdveqMr6idq51h68LyQQRYg,660
124
+ torchzero-0.3.1.dist-info/licenses/LICENSE,sha256=r9ZciAoZoqKC_FNADE0ORukj1p1XhLXEbegdsAyqhJs,1087
125
+ torchzero-0.3.1.dist-info/METADATA,sha256=qj20d3Y50kCnIoeS_B5KFq_PQSclE3dqH5XPtKbx-wU,16267
126
+ torchzero-0.3.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
127
+ torchzero-0.3.1.dist-info/top_level.txt,sha256=YDdpIOb7HyKV9THOtOYsFFMTbxvCO0kiol4-83tDj-A,21
128
+ torchzero-0.3.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,3 @@
1
+ docs
2
+ tests
3
+ torchzero