PyPI - torchzero - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

tests/test_identical.py +2 -2
tests/test_module_autograd.py +586 -0
tests/test_objective.py +188 -0
tests/test_opts.py +43 -33
tests/test_tensorlist.py +0 -8
tests/test_utils_optimizer.py +0 -1
torchzero/__init__.py +1 -1
torchzero/core/__init__.py +7 -4
torchzero/core/chain.py +20 -23
torchzero/core/functional.py +90 -24
torchzero/core/modular.py +48 -52
torchzero/core/module.py +130 -50
torchzero/core/objective.py +948 -0
torchzero/core/reformulation.py +55 -24
torchzero/core/transform.py +261 -367
torchzero/linalg/__init__.py +10 -0
torchzero/linalg/eigh.py +34 -0
torchzero/linalg/linalg_utils.py +14 -0
torchzero/{utils/linalg → linalg}/linear_operator.py +99 -49
torchzero/linalg/matrix_power.py +28 -0
torchzero/linalg/orthogonalize.py +95 -0
torchzero/{utils/linalg → linalg}/qr.py +4 -2
torchzero/{utils/linalg → linalg}/solve.py +76 -88
torchzero/linalg/svd.py +20 -0
torchzero/linalg/torch_linalg.py +168 -0
torchzero/modules/adaptive/__init__.py +1 -1
torchzero/modules/adaptive/adagrad.py +163 -213
torchzero/modules/adaptive/adahessian.py +74 -103
torchzero/modules/adaptive/adam.py +53 -76
torchzero/modules/adaptive/adan.py +49 -30
torchzero/modules/adaptive/adaptive_heavyball.py +11 -6
torchzero/modules/adaptive/aegd.py +12 -12
torchzero/modules/adaptive/esgd.py +98 -119
torchzero/modules/adaptive/lion.py +5 -10
torchzero/modules/adaptive/lmadagrad.py +87 -32
torchzero/modules/adaptive/mars.py +5 -5
torchzero/modules/adaptive/matrix_momentum.py +47 -51
torchzero/modules/adaptive/msam.py +70 -52
torchzero/modules/adaptive/muon.py +59 -124
torchzero/modules/adaptive/natural_gradient.py +33 -28
torchzero/modules/adaptive/orthograd.py +11 -15
torchzero/modules/adaptive/rmsprop.py +83 -75
torchzero/modules/adaptive/rprop.py +48 -47
torchzero/modules/adaptive/sam.py +55 -45
torchzero/modules/adaptive/shampoo.py +123 -129
torchzero/modules/adaptive/soap.py +207 -143
torchzero/modules/adaptive/sophia_h.py +106 -130
torchzero/modules/clipping/clipping.py +15 -18
torchzero/modules/clipping/ema_clipping.py +31 -25
torchzero/modules/clipping/growth_clipping.py +14 -17
torchzero/modules/conjugate_gradient/cg.py +26 -37
torchzero/modules/experimental/__init__.py +2 -6
torchzero/modules/experimental/coordinate_momentum.py +36 -0
torchzero/modules/experimental/curveball.py +25 -41
torchzero/modules/experimental/gradmin.py +2 -2
torchzero/modules/experimental/higher_order_newton.py +14 -40
torchzero/modules/experimental/newton_solver.py +22 -53
torchzero/modules/experimental/newtonnewton.py +15 -12
torchzero/modules/experimental/reduce_outward_lr.py +7 -7
torchzero/modules/experimental/scipy_newton_cg.py +21 -24
torchzero/modules/experimental/spsa1.py +3 -3
torchzero/modules/experimental/structural_projections.py +1 -4
torchzero/modules/functional.py +1 -1
torchzero/modules/grad_approximation/forward_gradient.py +7 -7
torchzero/modules/grad_approximation/grad_approximator.py +23 -16
torchzero/modules/grad_approximation/rfdm.py +20 -17
torchzero/modules/least_squares/gn.py +90 -42
torchzero/modules/line_search/backtracking.py +2 -2
torchzero/modules/line_search/line_search.py +32 -32
torchzero/modules/line_search/strong_wolfe.py +2 -2
torchzero/modules/misc/debug.py +12 -12
torchzero/modules/misc/escape.py +10 -10
torchzero/modules/misc/gradient_accumulation.py +10 -78
torchzero/modules/misc/homotopy.py +16 -8
torchzero/modules/misc/misc.py +120 -122
torchzero/modules/misc/multistep.py +50 -48
torchzero/modules/misc/regularization.py +49 -44
torchzero/modules/misc/split.py +30 -28
torchzero/modules/misc/switch.py +37 -32
torchzero/modules/momentum/averaging.py +14 -14
torchzero/modules/momentum/cautious.py +34 -28
torchzero/modules/momentum/momentum.py +11 -11
torchzero/modules/ops/__init__.py +4 -4
torchzero/modules/ops/accumulate.py +21 -21
torchzero/modules/ops/binary.py +67 -66
torchzero/modules/ops/higher_level.py +19 -19
torchzero/modules/ops/multi.py +44 -41
torchzero/modules/ops/reduce.py +26 -23
torchzero/modules/ops/unary.py +53 -53
torchzero/modules/ops/utility.py +47 -46
torchzero/modules/projections/galore.py +1 -1
torchzero/modules/projections/projection.py +43 -43
torchzero/modules/quasi_newton/damping.py +1 -1
torchzero/modules/quasi_newton/lbfgs.py +7 -7
torchzero/modules/quasi_newton/lsr1.py +7 -7
torchzero/modules/quasi_newton/quasi_newton.py +10 -10
torchzero/modules/quasi_newton/sg2.py +19 -19
torchzero/modules/restarts/restars.py +26 -24
torchzero/modules/second_order/__init__.py +2 -2
torchzero/modules/second_order/ifn.py +31 -62
torchzero/modules/second_order/inm.py +49 -53
torchzero/modules/second_order/multipoint.py +40 -80
torchzero/modules/second_order/newton.py +57 -90
torchzero/modules/second_order/newton_cg.py +102 -154
torchzero/modules/second_order/nystrom.py +157 -177
torchzero/modules/second_order/rsn.py +106 -96
torchzero/modules/smoothing/laplacian.py +13 -12
torchzero/modules/smoothing/sampling.py +11 -10
torchzero/modules/step_size/adaptive.py +23 -23
torchzero/modules/step_size/lr.py +15 -15
torchzero/modules/termination/termination.py +32 -30
torchzero/modules/trust_region/cubic_regularization.py +2 -2
torchzero/modules/trust_region/levenberg_marquardt.py +25 -28
torchzero/modules/trust_region/trust_cg.py +1 -1
torchzero/modules/trust_region/trust_region.py +27 -22
torchzero/modules/variance_reduction/svrg.py +21 -18
torchzero/modules/weight_decay/__init__.py +2 -1
torchzero/modules/weight_decay/reinit.py +83 -0
torchzero/modules/weight_decay/weight_decay.py +12 -13
torchzero/modules/wrappers/optim_wrapper.py +10 -10
torchzero/modules/zeroth_order/cd.py +9 -6
torchzero/optim/root.py +3 -3
torchzero/optim/utility/split.py +2 -1
torchzero/optim/wrappers/directsearch.py +27 -63
torchzero/optim/wrappers/fcmaes.py +14 -35
torchzero/optim/wrappers/mads.py +11 -31
torchzero/optim/wrappers/moors.py +66 -0
torchzero/optim/wrappers/nevergrad.py +4 -4
torchzero/optim/wrappers/nlopt.py +31 -25
torchzero/optim/wrappers/optuna.py +6 -13
torchzero/optim/wrappers/pybobyqa.py +124 -0
torchzero/optim/wrappers/scipy/__init__.py +7 -0
torchzero/optim/wrappers/scipy/basin_hopping.py +117 -0
torchzero/optim/wrappers/scipy/brute.py +48 -0
torchzero/optim/wrappers/scipy/differential_evolution.py +80 -0
torchzero/optim/wrappers/scipy/direct.py +69 -0
torchzero/optim/wrappers/scipy/dual_annealing.py +115 -0
torchzero/optim/wrappers/scipy/experimental.py +141 -0
torchzero/optim/wrappers/scipy/minimize.py +151 -0
torchzero/optim/wrappers/scipy/sgho.py +111 -0
torchzero/optim/wrappers/wrapper.py +121 -0
torchzero/utils/__init__.py +7 -25
torchzero/utils/compile.py +2 -2
torchzero/utils/derivatives.py +93 -69
torchzero/utils/optimizer.py +4 -77
torchzero/utils/python_tools.py +31 -0
torchzero/utils/tensorlist.py +11 -5
torchzero/utils/thoad_tools.py +68 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/METADATA +1 -1
torchzero-0.4.0.dist-info/RECORD +191 -0
tests/test_vars.py +0 -185
torchzero/core/var.py +0 -376
torchzero/modules/experimental/momentum.py +0 -160
torchzero/optim/wrappers/scipy.py +0 -572
torchzero/utils/linalg/__init__.py +0 -12
torchzero/utils/linalg/matrix_funcs.py +0 -87
torchzero/utils/linalg/orthogonalize.py +0 -12
torchzero/utils/linalg/svd.py +0 -20
torchzero/utils/ops.py +0 -10
torchzero-0.3.15.dist-info/RECORD +0 -175
/torchzero/{utils/linalg → linalg}/benchmark.py +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/WHEEL +0 -0
{torchzero-0.3.15.dist-info → torchzero-0.4.0.dist-info}/top_level.txt +0 -0

torchzero/core/modular.py CHANGED Viewed

@@ -1,27 +1,16 @@
 import warnings
-from abc import ABC, abstractmethod
-from collections import ChainMap, defaultdict
-from collections.abc import Callable, Iterable, MutableMapping, Sequence
-from operator import itemgetter
-from typing import TYPE_CHECKING, Any, Literal, cast, final, overload
+from collections import ChainMap
+from collections.abc import MutableMapping
+from typing import Any
 import torch
-from ..utils import (
-    Init,
-    ListLike,
-    Params,
-    _make_param_groups,
-    get_state_vals,
-    vec_to_tensors,
-)
-from ..utils.derivatives import flatten_jacobian, hvp, hvp_fd_central, hvp_fd_forward
-from ..utils.linalg.linear_operator import LinearOperator
-from ..utils.python_tools import flatten
-from .module import Chainable, Module
-from .var import Var
+from ..utils.params import Params, _make_param_groups
 from .functional import step
+from .module import Chainable, Module
+from .objective import Objective
 class _EvalCounterClosure:
     """keeps track of how many times closure has been evaluated, and sets closure return"""
@@ -32,7 +21,7 @@ class _EvalCounterClosure:
     def __call__(self, *args, **kwargs):
         if self.closure is None:
-            raise RuntimeError("One of the modules requires closure to be passed to the step method")
+            raise RuntimeError("closure is None in _EvalCounterClosure, and this can't happen")
         v = self.closure(*args, **kwargs)
@@ -44,17 +33,17 @@ class _EvalCounterClosure:
         return v
-def unroll_modules(*modules: Chainable) -> list[Module]:
-    unrolled = []
+def flatten_modules(*modules: Chainable) -> list[Module]:
+    flat = []
     for m in modules:
         if isinstance(m, Module):
-            unrolled.append(m)
-            unrolled.extend(unroll_modules(list(m.children.values())))
+            flat.append(m)
+            flat.extend(flatten_modules(list(m.children.values())))
         else:
-            unrolled.extend(unroll_modules(*m))
+            flat.extend(flatten_modules(*m))
-    return unrolled
+    return flat
 # have to inherit from Modular to support lr schedulers
@@ -83,7 +72,7 @@ class Modular(torch.optim.Optimizer):
         self.modules = modules
         """Top-level modules providedduring initialization."""
-        self.unrolled_modules = unroll_modules(self.modules)
+        self.flat_modules = flatten_modules(self.modules)
         """A flattened list of all modules including all children."""
         param_groups = _make_param_groups(params, differentiable=False)
@@ -92,7 +81,7 @@ class Modular(torch.optim.Optimizer):
         Each element in the list is ChainDict's 2nd map of a module."""
         # make sure there is no more than a single learning rate module
-        lr_modules = [m for m in self.unrolled_modules if 'lr' in m.defaults]
+        lr_modules = [m for m in self.flat_modules if 'lr' in m.defaults]
         if len(lr_modules) > 1:
             warnings.warn(f'multiple learning rate modules detected: {lr_modules}. This may lead to componding of learning rate multiplication with per-parameter learning rates and schedulers.')
@@ -100,13 +89,13 @@ class Modular(torch.optim.Optimizer):
         for group in param_groups:
             for k in group:
                 if k in ('params', 'lr'): continue
-                modules_with_k = [m for m in self.unrolled_modules if k in m.defaults and k not in m._overridden_keys]
+                modules_with_k = [m for m in self.flat_modules if k in m.defaults and k not in m._overridden_keys]
                 if len(modules_with_k) > 1:
                     warnings.warn(f'`params` has a `{k}` key, and multiple modules have that key: {modules_with_k}. If you intended to only set `{k}` to one of them, use `module.set_param_groups(params)`')
         # defaults for schedulers
         defaults = {}
-        for m in self.unrolled_modules: defaults.update(m.defaults)
+        for m in self.flat_modules: defaults.update(m.defaults)
         super().__init__(param_groups, defaults=defaults)
         # note - this is what super().__init__(param_groups, defaults=defaults) does:
@@ -146,7 +135,7 @@ class Modular(torch.optim.Optimizer):
         for p in proc_param_group['params']:
             # updates global per-parameter setting overrides (medium priority)
-            self._per_parameter_global_settings[p] = [m.settings[p].maps[1] for m in self.unrolled_modules]
+            self._per_parameter_global_settings[p] = [m.settings[p].maps[1] for m in self.flat_modules]
     def state_dict(self):
         all_params = [p for g in self.param_groups for p in g['params']]
@@ -163,7 +152,7 @@ class Modular(torch.optim.Optimizer):
             "params": all_params,
             "groups": groups,
             "defaults": self.defaults,
-            "modules": {i: m.state_dict() for i, m in enumerate(self.unrolled_modules)}
+            "modules": {i: m.state_dict() for i, m in enumerate(self.flat_modules)}
         }
         return state_dict
@@ -183,7 +172,7 @@ class Modular(torch.optim.Optimizer):
             self.add_param_group(group)
         id_to_tensor = {state_dict['idx_to_id'][i]: p for i,p in enumerate(state_dict['params'])}
-        for m, sd in zip(self.unrolled_modules, state_dict['modules'].values()):
+        for m, sd in zip(self.flat_modules, state_dict['modules'].values()):
             m._load_state_dict(sd, id_to_tensor)
@@ -201,35 +190,42 @@ class Modular(torch.optim.Optimizer):
                 if not p.requires_grad: continue
                 for map in self._per_parameter_global_settings[p]: map.update(settings)
-        # create var
+        # create Objective
         params = [p for g in self.param_groups for p in g['params'] if p.requires_grad]
-        var = Var(params=params, closure=_EvalCounterClosure(self, closure), model=self.model, current_step=self.current_step, modular=self, loss=loss, storage=kwargs)
-        # if closure is None, assume backward has been called and gather grads
-        if closure is None:
-            var.grad = [p.grad if p.grad is not None else torch.zeros_like(p) for p in params]
-            self.num_evaluations += 1
+        counter_closure = None
+        if closure is not None:
+            counter_closure = _EvalCounterClosure(self, closure)
-        if len(self.modules) == 0: raise RuntimeError("There are no modules in this `Modular` optimizer")
+        objective = Objective(
+            params=params, closure=counter_closure, model=self.model,
+            current_step=self.current_step, modular=self, loss=loss, storage=kwargs
+        )
-        # step
-        var = step(var, self.modules)
+        # step with all modules
+        objective = step(objective, self.modules)
-        # apply update
-        if not var.skip_update:
-            with torch.no_grad():
-                torch._foreach_sub_(params, var.get_update())
+        # apply update to parameters unless `objective.skip_update = True`
+        # this does:
+        # if not objective.skip_update:
+        #   torch._foreach_sub_(objective.params, objective.get_updates())
+        objective.update_parameters()
         # update attributes
-        self.attrs.update(var.attrs)
-        if var.should_terminate is not None: self.should_terminate = var.should_terminate
-        # hooks
-        for hook in var.post_step_hooks:
-            hook(self, var)
+        self.attrs.update(objective.attrs)
+        if objective.should_terminate is not None:
+            self.should_terminate = objective.should_terminate
         self.current_step += 1
-        #return var.loss if var.loss is not None else var.loss_approx
+        # apply hooks
+        # this does:
+        # for hook in objective.post_step_hooks:
+        #     hook(objective, modules)
+        objective.apply_post_step_hooks(self.modules)
+        # return the first closure evaluation return
+        # could return loss if it was passed but that's pointless
         return self._closure_return
     def __repr__(self):

torchzero/core/module.py CHANGED Viewed

@@ -1,24 +1,18 @@
 import warnings
 from abc import ABC, abstractmethod
 from collections import ChainMap, defaultdict
-from collections.abc import Callable, Iterable, MutableMapping, Sequence
-from operator import itemgetter
-from typing import Any, Literal, cast, final, overload
+from collections.abc import Callable, Iterable, Sequence
+from typing import Any, overload, TYPE_CHECKING
 import torch
-from ..utils import (
-    Init,
-    ListLike,
-    Params,
-    _make_param_groups,
-    get_state_vals,
-    vec_to_tensors,
-)
-from ..utils.derivatives import flatten_jacobian, hvp, hvp_fd_central, hvp_fd_forward
-from ..utils.linalg.linear_operator import LinearOperator
-from ..utils.python_tools import flatten
-from .var import Var
+from ..linalg.linear_operator import LinearOperator
+from ..utils.optimizer import Init, ListLike, get_state_vals
+from ..utils.params import Params, _make_param_groups
+from .functional import step_tensors
+if TYPE_CHECKING:
+    from .objective import Objective
 class Module(ABC):
@@ -36,6 +30,7 @@ class Module(ABC):
 """
     def __init__(self, defaults: dict[str, Any] | None = None):
         if defaults is None: defaults = {}
+        if any(isinstance(v, Module) for v in defaults.values()): raise RuntimeError("Passed a module to defaults")
         self.defaults: dict[str, Any] = defaults
         # settings are stored like state in per-tensor defaultdict, with per-parameter overrides possible
@@ -55,7 +50,7 @@ class Module(ABC):
         """A dictionary of child modules."""
         self._overridden_keys = set()
-        """tracks keys overridden with `set_param_groups`, only used to not give a warning"""
+        """tracks keys overridden with ``set_param_groups``, only used to not give a warning"""
     def set_param_groups(self, param_groups: Params):
@@ -71,7 +66,12 @@ class Module(ABC):
                 self.settings[param].maps[0].update(settings) # set module-specific per-parameter settings
         return self
-    def set_child(self, key: str, module: "Module | Sequence[Module]"):
+    def set_child(self, key: str, module: "Module | Sequence[Module] | None"):
+        if key in self.children:
+            warnings.warn(f"set_child overwriting child `{key}`")
+        if module is None: return
         from .chain import maybe_chain
         self.children[key] = maybe_chain(module)
@@ -85,6 +85,62 @@ class Module(ABC):
     def get_children_sequence(self, prefix = 'module_'):
         return [self.children[f'{prefix}{i}'] for i in range(len(self.children)) if f'{prefix}{i}' in self.children]
+    def inner_step(
+        self,
+        key: str,
+        objective: "Objective",
+        must_exist: bool = True,
+    ) -> "Objective":
+        """Passes ``objective`` to child and returns it."""
+        child = self.children.get(key, None)
+        if child is None:
+            if must_exist: raise KeyError(f"child `{key}` doesn't exist")
+            return objective
+        return child.step(objective)
+    def inner_step_tensors(
+        self,
+        key: str,
+        tensors: list[torch.Tensor],
+        clone: bool,
+        params: Iterable[torch.Tensor] | None = None,
+        grads: Sequence[torch.Tensor] | None = None,
+        loss: torch.Tensor | None = None,
+        closure: Callable | None = None,
+        objective: "Objective | None" = None,
+        must_exist: bool = True
+    ) -> list[torch.Tensor]:
+        """Steps with child module. Can be used to apply transforms to any internal buffers.
+        If ``objective`` is specified, other attributes shouldn't to be specified.
+        Args:
+            key (str): Child module key.
+            tensors (Sequence[torch.Tensor]): tensors to pass to child module.
+            clone (bool):
+                If ``key`` exists, whether to clone ``tensors`` to avoid modifying buffers in-place.
+                If ``key`` doesn't exist, ``tensors`` are always returned without cloning
+            params (Iterable[torch.Tensor] | None, optional): pass None if ``tensors`` have different shape. Defaults to None.
+            grads (Sequence[torch.Tensor] | None, optional): grads. Defaults to None.
+            loss (torch.Tensor | None, optional): loss. Defaults to None.
+            closure (Callable | None, optional): closure. Defaults to None.
+            must_exist (bool, optional): if True, if ``key`` doesn't exist, raises ``KeyError``. Defaults to True.
+        """
+        child = self.children.get(key, None)
+        if child is None:
+            if must_exist: raise KeyError(f"child `{key}` doesn't exist")
+            return tensors
+        if clone: tensors = [t.clone() for t in tensors]
+        return step_tensors(modules=child, tensors=tensors, params=params, grads=grads,
+                            loss=loss, closure=closure, objective=objective)
     def __repr__(self):
         s = self.__class__.__name__
         if self.children:
@@ -106,7 +162,6 @@ class Module(ABC):
     def get_settings(self, params: Sequence[torch.Tensor], key: str | list[str] | tuple[str,...], key2: str | None = None,
                      *keys: str, cls: type[ListLike] = list) -> ListLike | list[ListLike]:
-        # if isinstance(params, Vars): params = params.params
         return get_state_vals(self.settings, params, key, key2, *keys, must_exist=True, cls=cls) # pyright:ignore[reportArgumentType]
@@ -176,13 +231,8 @@ class Module(ABC):
             - if state_keys has multiple keys and keys has a single key, return cls.
             - if state_keys has multiple keys and keys has multiple keys, return list of cls.
         """
-        # if isinstance(params, Vars): params = params.params
         return get_state_vals(self.state, params, key, key2, *keys, must_exist=must_exist, init=init, cls=cls) # pyright:ignore[reportArgumentType]
-    # def first_setting(self, *keys:str, params:Sequence[torch.Tensor]):
-    #     # if isinstance(params, Vars): params = params.params
-    #     return itemgetter(*keys)(self.settings[params[0]])
     def clear_state_keys(self, *keys:str):
         for s in self.state.values():
             for k in keys:
@@ -248,36 +298,73 @@ class Module(ABC):
         # extra info
         self._extra_unpack(state_dict['extra'])
-    # ---------------------------- OVERRIDABLE METHODS --------------------------- #
-    def step(self, var: Var) -> Var:
-        """performs a step, returns new ``var`` but may update it in-place."""
-        self.update(var)
-        return self.apply(var)
+    def get_generator(self, device: torch.types.Device, seed: int | None):
+        """If ``seed=None``, returns ``None``.
+        Otherwise, if generator on this device and with this seed hasn't been created,
+        creates it and stores in global state.
+        Returns ``torch.Generator``."""
+        if seed is None: return None
-    def update(self, var:Var) -> Any:
-        """Updates the internal state of this module. This should not modify ``var.update``.
+        if device is None: device_obj = torch.get_default_device()
+        else: device_obj = torch.device(device)
+        key = f"__generator-{seed}-{device_obj.type}:{device_obj.index}"
+        if key not in self.global_state:
+            self.global_state[key] = torch.Generator(device).manual_seed(seed)
+        return self.global_state[key]
+    def increment_counter(self, key: str, start: int):
+        """first value is ``start``"""
+        value = self.global_state.get(key, start - 1) + 1
+        self.global_state[key] = value
+        return value
+    # ---------------------------- OVERRIDABLE METHODS --------------------------- #
+    def update(self, objective:"Objective") -> None:
+        """Updates internal state of this module. This should not modify ``objective.update``.
         Specifying ``update`` and ``apply`` methods is optional and allows certain meta-modules to be used,
-        such as ``tz.m.Online`` or trust regions. Alternatively, simply override the ``step`` method.
+        such as ``tz.m.Online`` or trust regions. Alternatively, define all logic within the ``apply`` method.
+        ``update`` is guaranteed to be called at least once before ``apply``.
+        Args:
+            objective (Objective): ``Objective`` object
         """
-    def apply(self, var: Var) -> Var:
-        """Applies this module to ``var.get_update()``.
-        This should not modify the internal state of this module if possible.
+    @abstractmethod
+    def apply(self, objective: "Objective") -> "Objective":
+        """Updates ``objective`` using the internal state of this module.
+        If ``update`` method is defined, ``apply`` shouldn't modify the internal state of this module if possible.
         Specifying ``update`` and ``apply`` methods is optional and allows certain meta-modules to be used,
-        such as ``tz.m.Online`` or trust regions. Alternatively, simply override the ``step`` method.
+        such as ``tz.m.Online`` or trust regions. Alternatively, define all logic within the ``apply`` method.
+        ``update`` is guaranteed to be called at least once before ``apply``.
+        Args:
+            objective (Objective): ``Objective`` object
         """
-        return self.step(var)
+        # if apply is empty, it should be defined explicitly.
+        raise NotImplementedError(f"{self.__class__.__name__} doesn't implement `apply`.")
+    def step(self, objective: "Objective") -> "Objective":
+        """Perform a step with this module. Calls ``update``, then ``apply``."""
+        self.update(objective)
+        return self.apply(objective)
-    def get_H(self, var: Var) -> LinearOperator | None:
+    def get_H(self, objective: "Objective") -> LinearOperator | None:
         """returns a ``LinearOperator`` corresponding to hessian or hessian approximation.
         The hessian approximation is assumed to be for all parameters concatenated to a vector."""
         # if this method is not defined it searches in children
         # this should be overwritten to return None if child params are different from this modules params
         H = None
         for k,v in self.children.items():
-            H_v = v.get_H(var)
+            H_v = v.get_H(objective)
             if (H is not None) and (H_v is not None):
                 raise RuntimeError(f"Two children of {self} have a hessian, second one is {k}={v}")
@@ -307,21 +394,14 @@ class Module(ABC):
         """
         for c in self.children.values(): c.reset_for_online()
-    def _extra_pack(self):
-        """extra information to store in state_dict of this optimizer.
-        Will be passed to ``_extra_unpack`` when loading the state_dict."""
+    def _extra_pack(self) -> dict:
+        """extra information to store in ``state_dict`` of this optimizer.
+        Will be passed to ``_extra_unpack`` when loading the ``state_dict``."""
         return {}
-    def _extra_unpack(self, x):
-        """``_extra_pack`` return will be passed to this method when loading state_dict.
+    def _extra_unpack(self, d: dict):
+        """``_extra_pack`` return will be passed to this method when loading ``state_dict``.
         This method is called after loading the rest of the state dict"""
-    def get_generator(self, device: torch.types.Device, seed: int | None):
-        if seed is None: return None
-        if 'generator' not in self.global_state:
-            self.global_state['generator'] = torch.Generator(device).manual_seed(seed)
-        return self.global_state['generator']
 Chainable = Module | Sequence[Module]

torchzero 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl

torchzero 0.3.15py3-none-any.whl → 0.4.0py3-none-any.whl