PyPI - torchzero - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.14__py3-none-any.whl - Mend

torchzero 0.3.11py3-none-any.whl → 0.3.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

tests/test_opts.py +95 -76
tests/test_tensorlist.py +8 -7
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +2 -2
torchzero/core/module.py +229 -72
torchzero/core/reformulation.py +65 -0
torchzero/core/transform.py +44 -24
torchzero/modules/__init__.py +13 -5
torchzero/modules/{optimizers → adaptive}/__init__.py +5 -2
torchzero/modules/adaptive/adagrad.py +356 -0
torchzero/modules/{optimizers → adaptive}/adahessian.py +53 -52
torchzero/modules/{optimizers → adaptive}/adam.py +0 -3
torchzero/modules/{optimizers → adaptive}/adan.py +26 -40
torchzero/modules/{optimizers → adaptive}/adaptive_heavyball.py +3 -6
torchzero/modules/adaptive/aegd.py +54 -0
torchzero/modules/{optimizers → adaptive}/esgd.py +1 -1
torchzero/modules/{optimizers/ladagrad.py → adaptive/lmadagrad.py} +42 -39
torchzero/modules/{optimizers → adaptive}/mars.py +24 -36
torchzero/modules/adaptive/matrix_momentum.py +146 -0
torchzero/modules/{optimizers → adaptive}/msam.py +14 -12
torchzero/modules/{optimizers → adaptive}/muon.py +19 -20
torchzero/modules/adaptive/natural_gradient.py +175 -0
torchzero/modules/{optimizers → adaptive}/rprop.py +0 -2
torchzero/modules/{optimizers → adaptive}/sam.py +1 -1
torchzero/modules/{optimizers → adaptive}/shampoo.py +8 -4
torchzero/modules/{optimizers → adaptive}/soap.py +27 -50
torchzero/modules/{optimizers → adaptive}/sophia_h.py +2 -3
torchzero/modules/clipping/clipping.py +85 -92
torchzero/modules/clipping/ema_clipping.py +5 -5
torchzero/modules/conjugate_gradient/__init__.py +11 -0
torchzero/modules/{quasi_newton → conjugate_gradient}/cg.py +355 -369
torchzero/modules/experimental/__init__.py +9 -32
torchzero/modules/experimental/dct.py +2 -2
torchzero/modules/experimental/fft.py +2 -2
torchzero/modules/experimental/gradmin.py +4 -3
torchzero/modules/experimental/l_infinity.py +111 -0
torchzero/modules/{momentum/experimental.py → experimental/momentum.py} +3 -40
torchzero/modules/experimental/newton_solver.py +79 -17
torchzero/modules/experimental/newtonnewton.py +27 -14
torchzero/modules/experimental/scipy_newton_cg.py +105 -0
torchzero/modules/experimental/spsa1.py +93 -0
torchzero/modules/experimental/structural_projections.py +1 -1
torchzero/modules/functional.py +50 -14
torchzero/modules/grad_approximation/__init__.py +1 -1
torchzero/modules/grad_approximation/fdm.py +19 -20
torchzero/modules/grad_approximation/forward_gradient.py +6 -7
torchzero/modules/grad_approximation/grad_approximator.py +43 -47
torchzero/modules/grad_approximation/rfdm.py +114 -175
torchzero/modules/higher_order/__init__.py +1 -1
torchzero/modules/higher_order/higher_order_newton.py +31 -23
torchzero/modules/least_squares/__init__.py +1 -0
torchzero/modules/least_squares/gn.py +161 -0
torchzero/modules/line_search/__init__.py +2 -2
torchzero/modules/line_search/_polyinterp.py +289 -0
torchzero/modules/line_search/adaptive.py +69 -44
torchzero/modules/line_search/backtracking.py +83 -70
torchzero/modules/line_search/line_search.py +159 -68
torchzero/modules/line_search/scipy.py +16 -4
torchzero/modules/line_search/strong_wolfe.py +319 -220
torchzero/modules/misc/__init__.py +8 -0
torchzero/modules/misc/debug.py +4 -4
torchzero/modules/misc/escape.py +9 -7
torchzero/modules/misc/gradient_accumulation.py +88 -22
torchzero/modules/misc/homotopy.py +59 -0
torchzero/modules/misc/misc.py +82 -15
torchzero/modules/misc/multistep.py +47 -11
torchzero/modules/misc/regularization.py +5 -9
torchzero/modules/misc/split.py +55 -35
torchzero/modules/misc/switch.py +1 -1
torchzero/modules/momentum/__init__.py +1 -5
torchzero/modules/momentum/averaging.py +3 -3
torchzero/modules/momentum/cautious.py +42 -47
torchzero/modules/momentum/momentum.py +35 -1
torchzero/modules/ops/__init__.py +9 -1
torchzero/modules/ops/binary.py +9 -8
torchzero/modules/{momentum/ema.py → ops/higher_level.py} +10 -33
torchzero/modules/ops/multi.py +15 -15
torchzero/modules/ops/reduce.py +1 -1
torchzero/modules/ops/utility.py +12 -8
torchzero/modules/projections/projection.py +4 -4
torchzero/modules/quasi_newton/__init__.py +1 -16
torchzero/modules/quasi_newton/damping.py +105 -0
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +167 -163
torchzero/modules/quasi_newton/lbfgs.py +256 -200
torchzero/modules/quasi_newton/lsr1.py +167 -132
torchzero/modules/quasi_newton/quasi_newton.py +346 -446
torchzero/modules/restarts/__init__.py +7 -0
torchzero/modules/restarts/restars.py +253 -0
torchzero/modules/second_order/__init__.py +2 -1
torchzero/modules/second_order/multipoint.py +238 -0
torchzero/modules/second_order/newton.py +133 -88
torchzero/modules/second_order/newton_cg.py +207 -170
torchzero/modules/smoothing/__init__.py +1 -1
torchzero/modules/smoothing/sampling.py +300 -0
torchzero/modules/step_size/__init__.py +1 -1
torchzero/modules/step_size/adaptive.py +312 -47
torchzero/modules/termination/__init__.py +14 -0
torchzero/modules/termination/termination.py +207 -0
torchzero/modules/trust_region/__init__.py +5 -0
torchzero/modules/trust_region/cubic_regularization.py +170 -0
torchzero/modules/trust_region/dogleg.py +92 -0
torchzero/modules/trust_region/levenberg_marquardt.py +128 -0
torchzero/modules/trust_region/trust_cg.py +99 -0
torchzero/modules/trust_region/trust_region.py +350 -0
torchzero/modules/variance_reduction/__init__.py +1 -0
torchzero/modules/variance_reduction/svrg.py +208 -0
torchzero/modules/weight_decay/weight_decay.py +65 -64
torchzero/modules/zeroth_order/__init__.py +1 -0
torchzero/modules/zeroth_order/cd.py +122 -0
torchzero/optim/root.py +65 -0
torchzero/optim/utility/split.py +8 -8
torchzero/optim/wrappers/directsearch.py +0 -1
torchzero/optim/wrappers/fcmaes.py +3 -2
torchzero/optim/wrappers/nlopt.py +0 -2
torchzero/optim/wrappers/optuna.py +2 -2
torchzero/optim/wrappers/scipy.py +81 -22
torchzero/utils/__init__.py +40 -4
torchzero/utils/compile.py +1 -1
torchzero/utils/derivatives.py +123 -111
torchzero/utils/linalg/__init__.py +9 -2
torchzero/utils/linalg/linear_operator.py +329 -0
torchzero/utils/linalg/matrix_funcs.py +2 -2
torchzero/utils/linalg/orthogonalize.py +2 -1
torchzero/utils/linalg/qr.py +2 -2
torchzero/utils/linalg/solve.py +226 -154
torchzero/utils/metrics.py +83 -0
torchzero/utils/optimizer.py +2 -2
torchzero/utils/python_tools.py +7 -0
torchzero/utils/tensorlist.py +105 -34
torchzero/utils/torch_tools.py +9 -4
torchzero-0.3.14.dist-info/METADATA +14 -0
torchzero-0.3.14.dist-info/RECORD +167 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.14.dist-info}/top_level.txt +0 -1
docs/source/conf.py +0 -59
docs/source/docstring template.py +0 -46
torchzero/modules/experimental/absoap.py +0 -253
torchzero/modules/experimental/adadam.py +0 -118
torchzero/modules/experimental/adamY.py +0 -131
torchzero/modules/experimental/adam_lambertw.py +0 -149
torchzero/modules/experimental/adaptive_step_size.py +0 -90
torchzero/modules/experimental/adasoap.py +0 -177
torchzero/modules/experimental/cosine.py +0 -214
torchzero/modules/experimental/cubic_adam.py +0 -97
torchzero/modules/experimental/eigendescent.py +0 -120
torchzero/modules/experimental/etf.py +0 -195
torchzero/modules/experimental/exp_adam.py +0 -113
torchzero/modules/experimental/expanded_lbfgs.py +0 -141
torchzero/modules/experimental/hnewton.py +0 -85
torchzero/modules/experimental/modular_lbfgs.py +0 -265
torchzero/modules/experimental/parabolic_search.py +0 -220
torchzero/modules/experimental/subspace_preconditioners.py +0 -145
torchzero/modules/experimental/tensor_adagrad.py +0 -42
torchzero/modules/line_search/polynomial.py +0 -233
torchzero/modules/momentum/matrix_momentum.py +0 -193
torchzero/modules/optimizers/adagrad.py +0 -165
torchzero/modules/quasi_newton/trust_region.py +0 -397
torchzero/modules/smoothing/gaussian.py +0 -198
torchzero-0.3.11.dist-info/METADATA +0 -404
torchzero-0.3.11.dist-info/RECORD +0 -159
torchzero-0.3.11.dist-info/licenses/LICENSE +0 -21
/torchzero/modules/{optimizers → adaptive}/lion.py +0 -0
/torchzero/modules/{optimizers → adaptive}/orthograd.py +0 -0
/torchzero/modules/{optimizers → adaptive}/rmsprop.py +0 -0
{torchzero-0.3.11.dist-info → torchzero-0.3.14.dist-info}/WHEEL +0 -0

torchzero/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,11 @@
 from . import tensorlist as tl
-from .compile import _optional_compiler, benchmark_compile_cpu, benchmark_compile_cuda, set_compilation, enable_compilation
+from .compile import (
+    _optional_compiler,
+    benchmark_compile_cpu,
+    benchmark_compile_cuda,
+    enable_compilation,
+    set_compilation,
+)
 from .numberlist import NumberList
 from .optimizer import (
     Init,
@@ -18,6 +24,36 @@ from .params import (
     _copy_param_groups,
     _make_param_groups,
 )
-from .python_tools import flatten, generic_eq, generic_ne, reduce_dim, unpack_dicts
-from .tensorlist import TensorList, as_tensorlist, Distributions, generic_clamp, generic_numel, generic_vector_norm, generic_zeros_like, generic_randn_like, generic_finfo_eps
-from .torch_tools import tofloat, tolist, tonumpy, totensor, vec_to_tensors, vec_to_tensors_, set_storage_
+from .python_tools import (
+    flatten,
+    generic_eq,
+    generic_ne,
+    reduce_dim,
+    safe_dict_update_,
+    unpack_dicts,
+)
+from .tensorlist import (
+    Distributions,
+    Metrics,
+    TensorList,
+    as_tensorlist,
+    generic_clamp,
+    generic_finfo,
+    generic_finfo_eps,
+    generic_finfo_tiny,
+    generic_max,
+    generic_numel,
+    generic_randn_like,
+    generic_sum,
+    generic_vector_norm,
+    generic_zeros_like,
+)
+from .torch_tools import (
+    set_storage_,
+    tofloat,
+    tolist,
+    tonumpy,
+    totensor,
+    vec_to_tensors,
+    vec_to_tensors_,
+)

torchzero/utils/compile.py CHANGED Viewed

@@ -38,7 +38,7 @@ class _MaybeCompiledFunc:
 _optional_compiler = _OptionalCompiler()
 """this holds .enable attribute, set to True to enable compiling for a few functions that benefit from it."""
-def set_compilation(enable: bool):
+def set_compilation(enable: bool=True):
     """`enable` is False by default. When True, certain functions will be compiled, which may not work on some systems like Windows, but it usually improves performance."""
     _optional_compiler.enable = enable

torchzero/utils/derivatives.py CHANGED Viewed

@@ -2,7 +2,6 @@ from collections.abc import Iterable, Sequence
 import torch
 import torch.autograd.forward_ad as fwAD
-from typing import Literal
 from .torch_tools import swap_tensors_no_use_count_check, vec_to_tensors
@@ -35,10 +34,27 @@ def _jacobian_batched(output: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor
         is_grads_batched=True,
     )
+def flatten_jacobian(jacs: Sequence[torch.Tensor]) -> torch.Tensor:
+    """Converts the output of jacobian_wrt (a list of tensors) into a single 2D matrix.
+    Args:
+        jacs (Sequence[torch.Tensor]):
+            output from jacobian_wrt where ach tensor has the shape `(*output.shape, *wrt[i].shape)`.
+    Returns:
+        torch.Tensor: has the shape `(output.ndim, wrt.ndim)`.
+    """
+    if not jacs:
+        return torch.empty(0, 0)
+    n_out = jacs[0].shape[0]
+    return torch.cat([j.reshape(n_out, -1) for j in jacs], dim=1)
 def jacobian_wrt(output: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True) -> Sequence[torch.Tensor]:
     """Calculate jacobian of a sequence of tensors w.r.t another sequence of tensors.
     Returns a sequence of tensors with the length as `wrt`.
-    Each tensor will have the shape `(*input.shape, *wrt[i].shape)`.
+    Each tensor will have the shape `(*output.shape, *wrt[i].shape)`.
     Args:
         input (Sequence[torch.Tensor]): input sequence of tensors.
@@ -75,10 +91,10 @@ def jacobian_and_hessian_wrt(output: Sequence[torch.Tensor], wrt: Sequence[torch
     return jac, jacobian_wrt(jac, wrt, batched = batched, create_graph=create_graph)
-def hessian_list_to_mat(hessians: Sequence[torch.Tensor]):
-    """takes output of `hessian` and returns the 2D hessian matrix.
-    Note - I only tested this for cases where input is a scalar."""
-    return torch.cat([h.reshape(h.size(0), h[1].numel()) for h in hessians], 1)
+# def hessian_list_to_mat(hessians: Sequence[torch.Tensor]):
+#     """takes output of `hessian` and returns the 2D hessian matrix.
+#     Note - I only tested this for cases where input is a scalar."""
+#     return torch.cat([h.reshape(h.size(0), h[1].numel()) for h in hessians], 1)
 def jacobian_and_hessian_mat_wrt(output: Sequence[torch.Tensor], wrt: Sequence[torch.Tensor], create_graph=False, batched=True):
     """Calculate jacobian and hessian of a sequence of tensors w.r.t another sequence of tensors.
@@ -98,7 +114,7 @@ def jacobian_and_hessian_mat_wrt(output: Sequence[torch.Tensor], wrt: Sequence[t
     """
     jac = jacobian_wrt(output, wrt, create_graph=True, batched = batched)
     H_list = jacobian_wrt(jac, wrt, batched = batched, create_graph=create_graph)
-    return torch.cat([j.view(-1) for j in jac]), hessian_list_to_mat(H_list)
+    return flatten_jacobian(jac), flatten_jacobian(H_list)
 def hessian(
     fn,
@@ -115,19 +131,18 @@ def hessian(
     `vectorize` and `outer_jacobian_strategy` are only for `method = "torch.autograd"`, refer to its documentation.
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2) # (2, 4) weight and (2, ) bias
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
+    ```python
+    model = nn.Linear(4, 2) # (2, 4) weight and (2, ) bias
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        def fn():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            return loss
-        hessian_mat(fn, model.parameters()) # list of two lists of two lists of 3D and 4D tensors
+    def fn():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        return loss
+    hessian_mat(fn, model.parameters()) # list of two lists of two lists of 3D and 4D tensors
+    ```
     """
     params = list(params)
@@ -165,19 +180,18 @@ def hessian_mat(
     `vectorize` and `outer_jacobian_strategy` are only for `method = "torch.autograd"`, refer to its documentation.
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2) # 10 parameters in total
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
+    ```python
+    model = nn.Linear(4, 2) # 10 parameters in total
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        def fn():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            return loss
-        hessian_mat(fn, model.parameters()) # 10x10 tensor
+    def fn():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        return loss
+    hessian_mat(fn, model.parameters()) # 10x10 tensor
+    ```
     """
     params = list(params)
@@ -206,21 +220,20 @@ def jvp(fn, params: Iterable[torch.Tensor], tangent: Iterable[torch.Tensor]) ->
     """Jacobian vector product.
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
-        tangent = [torch.randn_like(p) for p in model.parameters()]
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        def fn():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            return loss
+    tangent = [torch.randn_like(p) for p in model.parameters()]
-        jvp(fn, model.parameters(), tangent) # scalar
+    def fn():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        return loss
+    jvp(fn, model.parameters(), tangent) # scalar
+    ```
     """
     params = list(params)
     tangent = list(tangent)
@@ -253,21 +266,20 @@ def jvp_fd_central(
     """Jacobian vector product using central finite difference formula.
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
-        tangent = [torch.randn_like(p) for p in model.parameters()]
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        def fn():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            return loss
+    tangent = [torch.randn_like(p) for p in model.parameters()]
-        jvp_fd_central(fn, model.parameters(), tangent) # scalar
+    def fn():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        return loss
+    jvp_fd_central(fn, model.parameters(), tangent) # scalar
+    ```
     """
     params = list(params)
     tangent = list(tangent)
@@ -304,24 +316,24 @@ def jvp_fd_forward(
     Loss at initial point can be specified in the `v_0` argument.
     Example:
-    .. code:: py
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
+    tangent1 = [torch.randn_like(p) for p in model.parameters()]
+    tangent2 = [torch.randn_like(p) for p in model.parameters()]
-        tangent1 = [torch.randn_like(p) for p in model.parameters()]
-        tangent2 = [torch.randn_like(p) for p in model.parameters()]
-        def fn():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            return loss
+    def fn():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        return loss
-        v_0 = fn() # pre-calculate loss at initial point
+    v_0 = fn() # pre-calculate loss at initial point
-        jvp1 = jvp_fd_forward(fn, model.parameters(), tangent1, v_0=v_0) # scalar
-        jvp2 = jvp_fd_forward(fn, model.parameters(), tangent2, v_0=v_0) # scalar
+    jvp1 = jvp_fd_forward(fn, model.parameters(), tangent1, v_0=v_0) # scalar
+    jvp2 = jvp_fd_forward(fn, model.parameters(), tangent2, v_0=v_0) # scalar
+    ```
     """
     params = list(params)
@@ -356,21 +368,21 @@ def hvp(
     """Hessian-vector product
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        y_hat = model(X)
-        loss = F.mse_loss(y_hat, y)
-        loss.backward(create_graph=True)
+    y_hat = model(X)
+    loss = F.mse_loss(y_hat, y)
+    loss.backward(create_graph=True)
-        grads = [p.grad for p in model.parameters()]
-        vec = [torch.randn_like(p) for p in model.parameters()]
+    grads = [p.grad for p in model.parameters()]
+    vec = [torch.randn_like(p) for p in model.parameters()]
-        # list of tensors, same layout as model.parameters()
-        hvp(model.parameters(), grads, vec=vec)
+    # list of tensors, same layout as model.parameters()
+    hvp(model.parameters(), grads, vec=vec)
+    ```
     """
     params = list(params)
     g = list(grads)
@@ -393,23 +405,23 @@ def hvp_fd_central(
     Please note that this will clear :code:`grad` attributes in params.
     Example:
-    .. code:: py
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        def closure():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            model.zero_grad()
-            loss.backward()
-            return loss
+    def closure():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        model.zero_grad()
+        loss.backward()
+        return loss
-        vec = [torch.randn_like(p) for p in model.parameters()]
+    vec = [torch.randn_like(p) for p in model.parameters()]
-        # list of tensors, same layout as model.parameters()
-        hvp_fd_central(closure, model.parameters(), vec=vec)
+    # list of tensors, same layout as model.parameters()
+    hvp_fd_central(closure, model.parameters(), vec=vec)
+    ```
     """
     params = list(params)
     vec = list(vec)
@@ -456,27 +468,27 @@ def hvp_fd_forward(
     Please note that this will clear :code:`grad` attributes in params.
     Example:
-    .. code:: py
+    ```python
+    model = nn.Linear(4, 2)
+    X = torch.randn(10, 4)
+    y = torch.randn(10, 2)
-        model = nn.Linear(4, 2)
-        X = torch.randn(10, 4)
-        y = torch.randn(10, 2)
-        def closure():
-            y_hat = model(X)
-            loss = F.mse_loss(y_hat, y)
-            model.zero_grad()
-            loss.backward()
-            return loss
+    def closure():
+        y_hat = model(X)
+        loss = F.mse_loss(y_hat, y)
+        model.zero_grad()
+        loss.backward()
+        return loss
-        vec = [torch.randn_like(p) for p in model.parameters()]
+    vec = [torch.randn_like(p) for p in model.parameters()]
-        # pre-compute gradient at initial point
-        closure()
-        g_0 = [p.grad for p in model.parameters()]
+    # pre-compute gradient at initial point
+    closure()
+    g_0 = [p.grad for p in model.parameters()]
-        # list of tensors, same layout as model.parameters()
-        hvp_fd_forward(closure, model.parameters(), vec=vec, g_0=g_0)
+    # list of tensors, same layout as model.parameters()
+    hvp_fd_forward(closure, model.parameters(), vec=vec, g_0=g_0)
+    ```
     """
     params = list(params)
@@ -485,7 +497,7 @@ def hvp_fd_forward(
     vec_norm = None
     if normalize:
-        vec_norm = torch.linalg.vector_norm(torch.cat([t.view(-1) for t in vec])) # pylint:disable=not-callable
+        vec_norm = torch.linalg.vector_norm(torch.cat([t.ravel() for t in vec])) # pylint:disable=not-callable
         if vec_norm == 0: return None, [torch.zeros_like(p) for p in params]
         vec = torch._foreach_div(vec, vec_norm)

torchzero/utils/linalg/__init__.py CHANGED Viewed

@@ -1,5 +1,12 @@
-from .matrix_funcs import inv_sqrt_2x2, eigvals_func, singular_vals_func, matrix_power_eigh, x_inv
+from . import linear_operator
+from .matrix_funcs import (
+    eigvals_func,
+    inv_sqrt_2x2,
+    matrix_power_eigh,
+    singular_vals_func,
+    x_inv,
+)
 from .orthogonalize import gram_schmidt
 from .qr import qr_householder
+from .solve import cg, nystrom_approximation, nystrom_sketch_and_solve
 from .svd import randomized_svd
-from .solve import cg, nystrom_approximation, nystrom_sketch_and_solve, steihaug_toint_cg

torchzero 0.3.11__py3-none-any.whl → 0.3.14__py3-none-any.whl

torchzero 0.3.11py3-none-any.whl → 0.3.14py3-none-any.whl