PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

tests/test_identical.py +22 -22
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +225 -214
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +2 -2
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +53 -57
torchzero/core/module.py +132 -52
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +11 -0
torchzero/linalg/eigh.py +253 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +93 -0
torchzero/{utils/linalg → linalg}/qr.py +16 -2
torchzero/{utils/linalg → linalg}/solve.py +74 -88
torchzero/linalg/svd.py +47 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/__init__.py +4 -3
torchzero/modules/adaptive/__init__.py +11 -3
torchzero/modules/adaptive/adagrad.py +167 -217
torchzero/modules/adaptive/adahessian.py +76 -105
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +50 -31
torchzero/modules/adaptive/adaptive_heavyball.py +12 -7
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/ggt.py +186 -0
torchzero/modules/adaptive/lion.py +7 -11
torchzero/modules/adaptive/lre_optimizers.py +299 -0
torchzero/modules/adaptive/mars.py +7 -7
torchzero/modules/adaptive/matrix_momentum.py +48 -52
torchzero/modules/adaptive/msam.py +71 -53
torchzero/modules/adaptive/muon.py +67 -129
torchzero/modules/adaptive/natural_gradient.py +63 -41
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/psgd/__init__.py +5 -0
torchzero/modules/adaptive/psgd/_psgd_utils.py +37 -0
torchzero/modules/adaptive/psgd/psgd.py +1390 -0
torchzero/modules/adaptive/psgd/psgd_dense_newton.py +174 -0
torchzero/modules/adaptive/psgd/psgd_kron_newton.py +203 -0
torchzero/modules/adaptive/psgd/psgd_kron_whiten.py +185 -0
torchzero/modules/adaptive/psgd/psgd_lra_newton.py +118 -0
torchzero/modules/adaptive/psgd/psgd_lra_whiten.py +116 -0
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +149 -130
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +22 -25
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +27 -38
torchzero/modules/experimental/__init__.py +7 -6
torchzero/modules/experimental/adanystrom.py +258 -0
torchzero/modules/experimental/common_directions_whiten.py +142 -0
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/cubic_adam.py +160 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/eigen_sr1.py +182 -0
torchzero/modules/experimental/eigengrad.py +207 -0
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/l_infinity.py +1 -1
torchzero/modules/experimental/matrix_nag.py +122 -0
torchzero/modules/experimental/newton_solver.py +23 -54
torchzero/modules/experimental/newtonnewton.py +45 -48
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/grad_approximation/fdm.py +2 -2
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +24 -21
torchzero/modules/least_squares/gn.py +121 -50
torchzero/modules/line_search/backtracking.py +4 -4
torchzero/modules/line_search/line_search.py +33 -33
torchzero/modules/line_search/strong_wolfe.py +4 -4
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +11 -79
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +121 -123
torchzero/modules/misc/multistep.py +52 -53
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +31 -29
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +37 -31
torchzero/modules/momentum/momentum.py +12 -12
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +20 -20
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/{functional.py → opt_utils.py} +1 -1
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +46 -43
torchzero/modules/quasi_newton/__init__.py +1 -1
torchzero/modules/quasi_newton/damping.py +2 -2
torchzero/modules/quasi_newton/diagonal_quasi_newton.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +10 -10
torchzero/modules/quasi_newton/lsr1.py +10 -10
torchzero/modules/quasi_newton/quasi_newton.py +54 -39
torchzero/modules/quasi_newton/sg2.py +69 -205
torchzero/modules/restarts/restars.py +39 -37
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +57 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +165 -196
torchzero/modules/second_order/newton_cg.py +105 -157
torchzero/modules/second_order/nystrom.py +216 -185
torchzero/modules/second_order/rsn.py +132 -125
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +10 -10
torchzero/modules/step_size/adaptive.py +24 -24
torchzero/modules/step_size/lr.py +17 -17
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +3 -3
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +2 -2
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +23 -21
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +17 -18
torchzero/modules/wrappers/optim_wrapper.py +14 -14
torchzero/modules/zeroth_order/cd.py +10 -7
torchzero/optim/mbs.py +291 -0
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -13
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +8 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/benchmarks/__init__.py +0 -0
torchzero/utils/benchmarks/logistic.py +122 -0
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +97 -73
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/METADATA +1 -1
torchzero-0.4.1.dist-info/RECORD +209 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/adaptive/lmadagrad.py +0 -186
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.1.dist-info}/top_level.txt +0 -0

tests/test_tensorlist.py CHANGED Viewed

@@ -1567,13 +1567,6 @@ def test_where(simple_tl: TensorList):
     assert_tl_allclose(result_module, expected_tl)
-    # Test inplace where_ (needs TensorList other)
-    tl_copy = simple_tl.clone()
-    result_inplace = tl_copy.where_(condition_tl, other_tl)
-    assert result_inplace is tl_copy
-    assert_tl_allclose(tl_copy, expected_tl)
 def test_masked_fill(simple_tl: TensorList):
     mask_tl = simple_tl.lt(0)
     fill_value_scalar = 99.0
@@ -1600,7 +1593,6 @@ def test_select_set_(simple_tl: TensorList):
     mask_tl = simple_tl.gt(0.5)
     value_scalar = -1.0
     value_list_scalar = [-1.0, -2.0, -3.0]
-    value_tl = simple_tl.clone().mul_(0.1)
     # Set with scalar value
     tl_copy_scalar = simple_tl.clone()

tests/test_utils_optimizer.py CHANGED Viewed

@@ -4,7 +4,6 @@ from functools import partial
 import pytest
 import torch
 from torchzero.utils.optimizer import (
-    Optimizer,
     get_group_vals,
     get_params,
     get_state_vals,

torchzero/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from . import core, optim, utils
-from .core import Modular
-from .utils import set_compilation
+from .core import Optimizer
+from .utils.compile import enable_compilation
 from . import modules as m

torchzero/core/__init__.py CHANGED Viewed

@@ -1,5 +1,8 @@
-from .chain import Chain, maybe_chain
-from .modular import Modular
+from .transform import TensorTransform, Transform
 from .module import Chainable, Module
-from .transform import Target, TensorwiseTransform, Transform, apply_transform
-from .var import Var
+from .objective import DerivativesMethod, HessianMethod, HVPMethod, Objective
+# order is important to avoid circular imports
+from .modular import Optimizer
+from .functional import apply, step, step_tensors, update
+from .chain import Chain, maybe_chain

torchzero/core/chain.py CHANGED Viewed

@@ -2,36 +2,33 @@ from collections.abc import Iterable
 from ..utils.python_tools import flatten
 from .module import Module, Chainable
+from .functional import _chain_step
 class Chain(Module):
-    """Chain of modules, mostly used internally"""
+    """Chain modules, mostly used internally"""
     def __init__(self, *modules: Module | Iterable[Module]):
         super().__init__()
         flat_modules: list[Module] = flatten(modules)
         for i, module in enumerate(flat_modules):
             self.set_child(f'module_{i}', module)
-    def update(self, var):
-        # note here that `update` and `apply` shouldn't be used directly
-        # as it will update all modules, and then apply all modules
-        # it is used in specific cases like Chain as trust region hessian module
-        for i in range(len(self.children)):
-            self.children[f'module_{i}'].update(var)
-            if var.stop: break
-        return var
-    def apply(self, var):
-        for i in range(len(self.children)):
-            var = self.children[f'module_{i}'].apply(var)
-            if var.stop: break
-        return var
-    def step(self, var):
-        for i in range(len(self.children)):
-            var = self.children[f'module_{i}'].step(var)
-            if var.stop: break
-        return var
+    def update(self, objective):
+        if len(self.children) > 1:
+            raise RuntimeError("can't call `update` on Chain with more than one child, as `update` and `apply` have to be called sequentially. Use the `step` method instead of update-apply.")
+        if len(self.children) == 0: return
+        return self.children['module_0'].update(objective)
+    def apply(self, objective):
+        if len(self.children) > 1:
+            raise RuntimeError("can't call `update` on Chain with more than one child, as `update` and `apply` have to be called sequentially. Use the `step` method instead of update-apply.")
+        if len(self.children) == 0: return objective
+        return self.children['module_0'].apply(objective)
+    def step(self, objective):
+        children = [self.children[f'module_{i}'] for i in range(len(self.children))]
+        return _chain_step(objective, children)
     def __repr__(self):
         s = self.__class__.__name__
@@ -41,7 +38,7 @@ class Chain(Module):
         return s
 def maybe_chain(*modules: Chainable) -> Module:
-    """Returns a single module directly if only one is provided, otherwise wraps them in a :code:`Chain`."""
+    """Returns a single module directly if only one is provided, otherwise wraps them in a ``Chain``."""
     flat_modules: list[Module] = flatten(modules)
     if len(flat_modules) == 1:
         return flat_modules[0]

torchzero/core/functional.py CHANGED Viewed

@@ -1,37 +1,103 @@
-from collections.abc import Sequence
-from typing import TYPE_CHECKING
+from collections.abc import Mapping, Sequence, Iterable, Callable
+from typing import TYPE_CHECKING, Any
+import torch
+from .objective import Objective
 if TYPE_CHECKING:
     from .module import Module
-    from .var import Var
+    from .transform import Transform
+def update(
+    objective: "Objective",
+    module: "Transform",
+    states: list[dict[str, Any]] | None = None,
+    settings: Sequence[Mapping[str, Any]] | None = None,
+) -> None:
+    if states is None:
+        assert settings is None
+        module.update(objective)
-def step(var: "Var", modules: "Sequence[Module]",) -> "Var":
-    """steps with ``modules`` and returns modified ``var``, doesn't update parameters.
+    else:
+        assert settings is not None
+        module.update_states(objective, states, settings)
-    Args:
-        var (Var): Var object.
-        modules (Sequence[Module]): sequence of modules to step with.
+def apply(
+    objective: "Objective",
+    module: "Transform",
+    states: list[dict[str, Any]] | None = None,
+    settings: Sequence[Mapping[str, Any]] | None = None,
+) -> "Objective":
+    if states is None:
+        assert settings is None
+        return module.apply(objective)
-    Returns:
-        Var: modified Var
-    """
-    # n_modules = len(modules)
-    # if n_modules == 0: return var.clone(clone_update=False)
-    # last_module = modules[-1]
-    # last_lr = last_module.defaults.get('lr', None)
+    else:
+        assert settings is not None
+        return module.apply_states(objective, states, settings)
+def _chain_step(objective: "Objective", modules: "Sequence[Module]"):
+    """steps with ``modules`` and returns updated objective, this is used within ``step`` and within ``Chain.step``"""
     # step
     for i, module in enumerate(modules):
-        if i!=0: var = var.clone(clone_update=False)
+        if i!=0: objective = objective.clone(clone_updates=False)
+        objective = module.step(objective)
+        if objective.stop: break
+    return objective
+def step(objective: "Objective", modules: "Module | Sequence[Module]"):
+    """doesn't apply hooks!"""
+    if not isinstance(modules, Sequence):
+        modules = (modules, )
+    if len(modules) == 0:
+        raise RuntimeError("`modules` is an empty sequence")
+    # if closure is None, assume backward has been called and gather grads
+    if objective.closure is None:
+        objective.grads = [p.grad if p.grad is not None else torch.zeros_like(p) for p in objective.params]
+    # step and return
+    return _chain_step(objective, modules)
+def step_tensors(
+    modules: "Module | Sequence[Module]",
+    tensors: Sequence[torch.Tensor],
+    params: Iterable[torch.Tensor] | None = None,
+    grads: Sequence[torch.Tensor] | None = None,
+    loss: torch.Tensor | None = None,
+    closure: Callable | None = None,
+    objective: "Objective | None" = None
+) -> list[torch.Tensor]:
+    if objective is not None:
+        if any(i is not None for i in (params, grads, loss, closure)):
+            raise RuntimeError("Specify either `objective` or `(params, grads, loss, closure)`")
+    if not isinstance(modules, Sequence):
+        modules = (modules, )
+    # make fake params if they are only used for shapes
+    if params is None:
+        params = [t.view_as(t).requires_grad_() for t in tensors]
+    # create objective
+    if objective is None:
+        objective = Objective(params=params, loss=loss, closure=closure)
+    if grads is not None:
+        objective.grads = list(grads)
-        # last module, or next to last module before lr
-        # if (i == n_modules - 1) or ((i == n_modules - 2) and (last_lr is not None)):
-        #     if len(module.children) != 0 or is_nested: var.nested_is_last = True
-        #     else: var.is_last = True
-        #     if last_lr is not None: var.last_module_lrs = [last_module.settings[p]['lr'] for p in var.params]
+    objective.updates = list(tensors)
-        var = module.step(var)
-        if var.stop: break
+    # step with modules
+    # this won't update parameters in-place because objective.Optimizer is None
+    objective = _chain_step(objective, modules)
-    return var
+    # return updates
+    return objective.get_updates()

torchzero/core/modular.py CHANGED Viewed

@@ -1,38 +1,27 @@
 import warnings
-from abc import ABC, abstractmethod
-from collections import ChainMap, defaultdict
-from collections.abc import Callable, Iterable, MutableMapping, Sequence
-from operator import itemgetter
-from typing import TYPE_CHECKING, Any, Literal, cast, final, overload
+from collections import ChainMap
+from collections.abc import MutableMapping
+from typing import Any
 import torch
-from ..utils import (
-    Init,
-    ListLike,
-    Params,
-    _make_param_groups,
-    get_state_vals,
-    vec_to_tensors,
-)
-from ..utils.derivatives import flatten_jacobian, hvp, hvp_fd_central, hvp_fd_forward
-from ..utils.linalg.linear_operator import LinearOperator
-from ..utils.python_tools import flatten
-from .module import Chainable, Module
-from .var import Var
+from ..utils.params import Params, _make_param_groups
 from .functional import step
+from .module import Chainable, Module
+from .objective import Objective
 class _EvalCounterClosure:
     """keeps track of how many times closure has been evaluated, and sets closure return"""
     __slots__ = ("modular", "closure")
-    def __init__(self, modular: "Modular", closure):
+    def __init__(self, modular: "Optimizer", closure):
         self.modular = modular
         self.closure = closure
     def __call__(self, *args, **kwargs):
         if self.closure is None:
-            raise RuntimeError("One of the modules requires closure to be passed to the step method")
+            raise RuntimeError("closure is None in _EvalCounterClosure, and this can't happen")
         v = self.closure(*args, **kwargs)
@@ -44,22 +33,22 @@ class _EvalCounterClosure:
         return v
-def unroll_modules(*modules: Chainable) -> list[Module]:
-    unrolled = []
+def flatten_modules(*modules: Chainable) -> list[Module]:
+    flat = []
     for m in modules:
         if isinstance(m, Module):
-            unrolled.append(m)
-            unrolled.extend(unroll_modules(list(m.children.values())))
+            flat.append(m)
+            flat.extend(flatten_modules(list(m.children.values())))
         else:
-            unrolled.extend(unroll_modules(*m))
+            flat.extend(flatten_modules(*m))
-    return unrolled
+    return flat
-# have to inherit from Modular to support lr schedulers
+# have to inherit from Optimizer to support lr schedulers
 # although Accelerate doesn't work due to converting param_groups to a dict
-class Modular(torch.optim.Optimizer):
+class Optimizer(torch.optim.Optimizer):
     """Chains multiple modules into an optimizer.
     Args:
@@ -73,7 +62,7 @@ class Modular(torch.optim.Optimizer):
     param_groups: list[ChainMap[str, Any]] # pyright:ignore[reportIncompatibleVariableOverride]
     def __init__(self, params: Params | torch.nn.Module, *modules: Module):
-        if len(modules) == 0: raise RuntimeError("Empty list of modules passed to `Modular`")
+        if len(modules) == 0: raise RuntimeError("Empty list of modules passed to `Optimizer`")
         self.model: torch.nn.Module | None = None
         """The model whose parameters are being optimized, if a model instance was passed to `__init__`."""
         if isinstance(params, torch.nn.Module):
@@ -83,7 +72,7 @@ class Modular(torch.optim.Optimizer):
         self.modules = modules
         """Top-level modules providedduring initialization."""
-        self.unrolled_modules = unroll_modules(self.modules)
+        self.flat_modules = flatten_modules(self.modules)
         """A flattened list of all modules including all children."""
         param_groups = _make_param_groups(params, differentiable=False)
@@ -92,7 +81,7 @@ class Modular(torch.optim.Optimizer):
         Each element in the list is ChainDict's 2nd map of a module."""
         # make sure there is no more than a single learning rate module
-        lr_modules = [m for m in self.unrolled_modules if 'lr' in m.defaults]
+        lr_modules = [m for m in self.flat_modules if 'lr' in m.defaults]
         if len(lr_modules) > 1:
             warnings.warn(f'multiple learning rate modules detected: {lr_modules}. This may lead to componding of learning rate multiplication with per-parameter learning rates and schedulers.')
@@ -100,13 +89,13 @@ class Modular(torch.optim.Optimizer):
         for group in param_groups:
             for k in group:
                 if k in ('params', 'lr'): continue
-                modules_with_k = [m for m in self.unrolled_modules if k in m.defaults and k not in m._overridden_keys]
+                modules_with_k = [m for m in self.flat_modules if k in m.defaults and k not in m._overridden_keys]
                 if len(modules_with_k) > 1:
                     warnings.warn(f'`params` has a `{k}` key, and multiple modules have that key: {modules_with_k}. If you intended to only set `{k}` to one of them, use `module.set_param_groups(params)`')
         # defaults for schedulers
         defaults = {}
-        for m in self.unrolled_modules: defaults.update(m.defaults)
+        for m in self.flat_modules: defaults.update(m.defaults)
         super().__init__(param_groups, defaults=defaults)
         # note - this is what super().__init__(param_groups, defaults=defaults) does:
@@ -146,7 +135,7 @@ class Modular(torch.optim.Optimizer):
         for p in proc_param_group['params']:
             # updates global per-parameter setting overrides (medium priority)
-            self._per_parameter_global_settings[p] = [m.settings[p].maps[1] for m in self.unrolled_modules]
+            self._per_parameter_global_settings[p] = [m.settings[p].maps[1] for m in self.flat_modules]
     def state_dict(self):
         all_params = [p for g in self.param_groups for p in g['params']]
@@ -163,7 +152,7 @@ class Modular(torch.optim.Optimizer):
             "params": all_params,
             "groups": groups,
             "defaults": self.defaults,
-            "modules": {i: m.state_dict() for i, m in enumerate(self.unrolled_modules)}
+            "modules": {i: m.state_dict() for i, m in enumerate(self.flat_modules)}
         }
         return state_dict
@@ -183,7 +172,7 @@ class Modular(torch.optim.Optimizer):
             self.add_param_group(group)
         id_to_tensor = {state_dict['idx_to_id'][i]: p for i,p in enumerate(state_dict['params'])}
-        for m, sd in zip(self.unrolled_modules, state_dict['modules'].values()):
+        for m, sd in zip(self.flat_modules, state_dict['modules'].values()):
             m._load_state_dict(sd, id_to_tensor)
@@ -201,37 +190,44 @@ class Modular(torch.optim.Optimizer):
                 if not p.requires_grad: continue
                 for map in self._per_parameter_global_settings[p]: map.update(settings)
-        # create var
+        # create Objective
         params = [p for g in self.param_groups for p in g['params'] if p.requires_grad]
-        var = Var(params=params, closure=_EvalCounterClosure(self, closure), model=self.model, current_step=self.current_step, modular=self, loss=loss, storage=kwargs)
-        # if closure is None, assume backward has been called and gather grads
-        if closure is None:
-            var.grad = [p.grad if p.grad is not None else torch.zeros_like(p) for p in params]
-            self.num_evaluations += 1
+        counter_closure = None
+        if closure is not None:
+            counter_closure = _EvalCounterClosure(self, closure)
-        if len(self.modules) == 0: raise RuntimeError("There are no modules in this `Modular` optimizer")
+        objective = Objective(
+            params=params, closure=counter_closure, model=self.model,
+            current_step=self.current_step, modular=self, loss=loss, storage=kwargs
+        )
-        # step
-        var = step(var, self.modules)
+        # step with all modules
+        objective = step(objective, self.modules)
-        # apply update
-        if not var.skip_update:
-            with torch.no_grad():
-                torch._foreach_sub_(params, var.get_update())
+        # apply update to parameters unless `objective.skip_update = True`
+        # this does:
+        # if not objective.skip_update:
+        #   torch._foreach_sub_(objective.params, objective.get_updates())
+        objective.update_parameters()
         # update attributes
-        self.attrs.update(var.attrs)
-        if var.should_terminate is not None: self.should_terminate = var.should_terminate
-        # hooks
-        for hook in var.post_step_hooks:
-            hook(self, var)
+        self.attrs.update(objective.attrs)
+        if objective.should_terminate is not None:
+            self.should_terminate = objective.should_terminate
         self.current_step += 1
-        #return var.loss if var.loss is not None else var.loss_approx
+        # apply hooks
+        # this does:
+        # for hook in objective.post_step_hooks:
+        #     hook(objective, modules)
+        objective.apply_post_step_hooks(self.modules)
+        # return the first closure evaluation return
+        # could return loss if it was passed but that's pointless
         return self._closure_return
     def __repr__(self):
-        return f'Modular({", ".join(str(m) for m in self.modules)})'
+        return f'Optimizer({", ".join(str(m) for m in self.modules)})'

torchzero 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl