PyPI - torchzero - Versions diffs - 0.3.11__tar.gz → 0.3.14__tar.gz - Mend

torchzero 0.3.11tar.gz → 0.3.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (217) hide show

torchzero-0.3.14/PKG-INFO ADDED Viewed

@@ -0,0 +1,14 @@
+Metadata-Version: 2.4
+Name: torchzero
+Version: 0.3.14
+Summary: Modular optimization library for PyTorch.
+Author-email: Ivan Nikishev <nkshv2@gmail.com>
+Project-URL: Homepage, https://github.com/inikishev/torchzero
+Project-URL: Repository, https://github.com/inikishev/torchzero
+Project-URL: Issues, https://github.com/inikishev/torchzero/isses
+Keywords: optimization,optimizers,torch,neural networks,zeroth order,second order
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: torch
+Requires-Dist: numpy
+Requires-Dist: typing_extensions

{torchzero-0.3.11 → torchzero-0.3.14}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ build-backend = "setuptools.build_meta"
 name = "torchzero"
 description = "Modular optimization library for PyTorch."
-version = "0.3.11"
+version = "0.3.14"
 dependencies = [
   "torch",
   "numpy",

{torchzero-0.3.11 → torchzero-0.3.14}/tests/test_opts.py RENAMED Viewed

@@ -56,14 +56,17 @@ def _run_objective(opt: tz.Modular, objective: Callable, use_closure: bool, step
         if use_closure:
             def closure(backward=True):
                 loss = objective()
+                losses.append(loss.detach())
                 if backward:
                     opt.zero_grad()
                     loss.backward()
                 return loss
-            loss = opt.step(closure)
-            assert loss is not None
-            assert torch.isfinite(loss), f"{opt}: Inifinite loss - {[l.item() for l in losses]}"
-            losses.append(loss)
+            ret = opt.step(closure)
+            assert ret is not None # the return should be the loss
+            with torch.no_grad():
+                loss = objective() # in case f(x_0) is not evaluated
+                assert torch.isfinite(loss), f"{opt}: Inifinite loss - {[l.item() for l in losses]}"
+                losses.append(loss.detach())
         else:
             loss = objective()
@@ -71,7 +74,7 @@ def _run_objective(opt: tz.Modular, objective: Callable, use_closure: bool, step
             loss.backward()
             opt.step()
             assert torch.isfinite(loss), f"{opt}: Inifinite loss - {[l.item() for l in losses]}"
-            losses.append(loss)
+            losses.append(loss.detach())
     losses.append(objective())
     return torch.stack(losses).nan_to_num(0,10000,10000).min()
@@ -374,6 +377,21 @@ RandomizedFDM_central4 = Run(
     func='booth', steps=50, loss=10, merge_invariant=True,
     sphere_steps=100, sphere_loss=450,
 )
+RandomizedFDM_forward4 = Run(
+    func_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(formula='forward4', seed=0), tz.m.LR(0.01)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(formula='forward4', seed=0), tz.m.LR(0.001)),
+    needs_closure=True,
+    func='booth', steps=50, loss=10, merge_invariant=True,
+    sphere_steps=100, sphere_loss=450,
+)
+RandomizedFDM_forward5 = Run(
+    func_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(formula='forward5', seed=0), tz.m.LR(0.01)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(formula='forward5', seed=0), tz.m.LR(0.001)),
+    needs_closure=True,
+    func='booth', steps=50, loss=10, merge_invariant=True,
+    sphere_steps=100, sphere_loss=450,
+)
 RandomizedFDM_4samples = Run(
     func_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(n_samples=4, seed=0), tz.m.LR(0.1)),
@@ -382,13 +400,6 @@ RandomizedFDM_4samples = Run(
     func='booth', steps=50, loss=1e-5, merge_invariant=True,
     sphere_steps=100, sphere_loss=400,
 )
-RandomizedFDM_4samples_lerp = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(n_samples=4, beta=0.99, seed=0), tz.m.LR(0.1)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(n_samples=4, beta=0.9, seed=0), tz.m.LR(0.001)),
-    needs_closure=True,
-    func='booth', steps=50, loss=1e-5, merge_invariant=True,
-    sphere_steps=100, sphere_loss=505,
-)
 RandomizedFDM_4samples_no_pre_generate = Run(
     func_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(n_samples=4, pre_generate=False, seed=0), tz.m.LR(0.1)),
     sphere_opt=lambda p: tz.Modular(p, tz.m.RandomizedFDM(n_samples=4, pre_generate=False, seed=0), tz.m.LR(0.001)),
@@ -455,25 +466,11 @@ Backtracking = Run(
     func='booth', steps=50, loss=0, merge_invariant=True,
     sphere_steps=2, sphere_loss=0,
 )
-Backtracking_try_negative = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.Mul(-1), tz.m.Backtracking(try_negative=True)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.Mul(-1), tz.m.Backtracking(try_negative=True)),
-    needs_closure=True,
-    func='booth', steps=50, loss=1e-9, merge_invariant=True,
-    sphere_steps=2, sphere_loss=1e-10,
-)
 AdaptiveBacktracking = Run(
     func_opt=lambda p: tz.Modular(p, tz.m.AdaptiveBacktracking()),
     sphere_opt=lambda p: tz.Modular(p, tz.m.AdaptiveBacktracking()),
     needs_closure=True,
-    func='booth', steps=50, loss=1e-12, merge_invariant=True,
-    sphere_steps=2, sphere_loss=1e-10,
-)
-AdaptiveBacktracking_try_negative = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.Mul(-1), tz.m.AdaptiveBacktracking(try_negative=True)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.Mul(-1), tz.m.AdaptiveBacktracking(try_negative=True)),
-    needs_closure=True,
-    func='booth', steps=50, loss=1e-8, merge_invariant=True,
+    func='booth', steps=50, loss=1e-11, merge_invariant=True,
     sphere_steps=2, sphere_loss=1e-10,
 )
 # ----------------------------- line_search/scipy ---------------------------- #
@@ -578,8 +575,8 @@ UpdateGradientSignConsistency = Run(
     sphere_steps=10, sphere_loss=2,
 )
 IntermoduleCautious = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.IntermoduleCautious(tz.m.NAG(), tz.m.BFGS(ptol_reset=True)), tz.m.LR(0.01)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.IntermoduleCautious(tz.m.NAG(), tz.m.BFGS(ptol_reset=True)), tz.m.LR(0.1)),
+    func_opt=lambda p: tz.Modular(p, tz.m.IntermoduleCautious(tz.m.NAG(), tz.m.BFGS(ptol_restart=True)), tz.m.LR(0.01)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.IntermoduleCautious(tz.m.NAG(), tz.m.BFGS(ptol_restart=True)), tz.m.LR(0.1)),
     needs_closure=False,
     func='booth', steps=50, loss=1e-4, merge_invariant=True,
     sphere_steps=10, sphere_loss=0.1,
@@ -592,8 +589,8 @@ ScaleByGradCosineSimilarity = Run(
     sphere_steps=10, sphere_loss=0.1,
 )
 ScaleModulesByCosineSimilarity = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.ScaleModulesByCosineSimilarity(tz.m.HeavyBall(0.9), tz.m.BFGS(ptol_reset=True)),tz.m.LR(0.05)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.ScaleModulesByCosineSimilarity(tz.m.HeavyBall(0.9), tz.m.BFGS(ptol_reset=True)),tz.m.LR(0.1)),
+    func_opt=lambda p: tz.Modular(p, tz.m.ScaleModulesByCosineSimilarity(tz.m.HeavyBall(0.9), tz.m.BFGS(ptol_restart=True)),tz.m.LR(0.05)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.ScaleModulesByCosineSimilarity(tz.m.HeavyBall(0.9), tz.m.BFGS(ptol_restart=True)),tz.m.LR(0.1)),
     needs_closure=False,
     func='booth', steps=50, loss=0.005, merge_invariant=True,
     sphere_steps=10, sphere_loss=0.1,
@@ -601,47 +598,69 @@ ScaleModulesByCosineSimilarity = Run(
 # ------------------------- momentum/matrix_momentum ------------------------- #
 MatrixMomentum_forward = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='forward'), tz.m.LR(0.01)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='forward'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.01, hvp_method='forward'),),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='forward')),
     needs_closure=True,
     func='booth', steps=50, loss=0.05, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    sphere_steps=10, sphere_loss=0.01,
 )
 MatrixMomentum_forward = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='central'), tz.m.LR(0.01)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='central'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.01, hvp_method='central')),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='central')),
     needs_closure=True,
     func='booth', steps=50, loss=0.05, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    sphere_steps=10, sphere_loss=0.01,
 )
 MatrixMomentum_forward = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='autograd'), tz.m.LR(0.01)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(hvp_method='autograd'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.01, hvp_method='autograd')),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='autograd')),
     needs_closure=True,
     func='booth', steps=50, loss=0.05, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    sphere_steps=10, sphere_loss=0.01,
 )
 AdaptiveMatrixMomentum_forward = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='forward'), tz.m.LR(0.05)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='forward'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='forward', adaptive=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='forward', adaptive=True)),
     needs_closure=True,
-    func='booth', steps=50, loss=0.002, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
 )
 AdaptiveMatrixMomentum_central = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='central'), tz.m.LR(0.05)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='central'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='central', adaptive=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='central', adaptive=True)),
     needs_closure=True,
-    func='booth', steps=50, loss=0.002, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
 )
 AdaptiveMatrixMomentum_autograd = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='autograd'), tz.m.LR(0.05)),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.AdaptiveMatrixMomentum(hvp_method='autograd'), tz.m.LR(0.5)),
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='autograd', adaptive=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='autograd', adaptive=True)),
     needs_closure=True,
-    func='booth', steps=50, loss=0.002, merge_invariant=True,
-    sphere_steps=10, sphere_loss=0,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
+)
+StochasticAdaptiveMatrixMomentum_forward = Run(
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='forward', adaptive=True, adapt_freq=1)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='forward', adaptive=True, adapt_freq=1)),
+    needs_closure=True,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
+)
+StochasticAdaptiveMatrixMomentum_central = Run(
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='central', adaptive=True, adapt_freq=1)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='central', adaptive=True, adapt_freq=1)),
+    needs_closure=True,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
+)
+StochasticAdaptiveMatrixMomentum_autograd = Run(
+    func_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.05, hvp_method='autograd', adaptive=True, adapt_freq=1)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.MatrixMomentum(0.5, hvp_method='autograd', adaptive=True, adapt_freq=1)),
+    needs_closure=True,
+    func='booth', steps=50, loss=0.05, merge_invariant=True,
+    sphere_steps=10, sphere_loss=0.05,
 )
 # EMA, momentum are covered by test_identical
@@ -668,8 +687,8 @@ UpdateSign = Run(
     sphere_steps=10, sphere_loss=0,
 )
 GradAccumulation = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.GradientAccumulation(tz.m.LR(0.05), 10), ),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.GradientAccumulation(tz.m.LR(0.5), 10), ),
+    func_opt=lambda p: tz.Modular(p, tz.m.GradientAccumulation(n=10), tz.m.LR(0.05)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.GradientAccumulation(n=10), tz.m.LR(0.5)),
     needs_closure=False,
     func='booth', steps=50, loss=25, merge_invariant=True,
     sphere_steps=20, sphere_loss=1e-11,
@@ -725,24 +744,24 @@ Shampoo = Run(
 # ------------------------- quasi_newton/quasi_newton ------------------------ #
 BFGS = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.BFGS(ptol_reset=True), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.BFGS(ptol_reset=True), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.BFGS(ptol_restart=True), tz.m.StrongWolfe()),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.BFGS(ptol_restart=True), tz.m.StrongWolfe()),
     needs_closure=True,
     func='rosen', steps=50, loss=1e-10, merge_invariant=True,
     sphere_steps=10, sphere_loss=1e-10,
 )
 SR1 = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.SR1(ptol_reset=True), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.SR1(ptol_reset=True), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.SR1(ptol_restart=True, scale_first=True), tz.m.StrongWolfe(fallback=False)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.SR1(scale_first=True), tz.m.StrongWolfe(fallback=False)),
     needs_closure=True,
     func='rosen', steps=50, loss=1e-12, merge_invariant=True,
     sphere_steps=10, sphere_loss=0,
 )
 SSVM = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.SSVM(1, ptol_reset=True), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.SSVM(1, ptol_reset=True), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.SSVM(1, ptol_restart=True), tz.m.StrongWolfe(fallback=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.SSVM(1, ptol_restart=True), tz.m.StrongWolfe(fallback=True)),
     needs_closure=True,
-    func='rosen', steps=50, loss=0.5, merge_invariant=True,
+    func='rosen', steps=50, loss=0.2, merge_invariant=True,
     sphere_steps=10, sphere_loss=0,
 )
@@ -757,8 +776,8 @@ LBFGS = Run(
 # ----------------------------- quasi_newton/lsr1 ---------------------------- #
 LSR1 = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.LSR1(scale_second=True), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.LSR1(scale_second=True), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.LSR1(), tz.m.StrongWolfe(c2=0.1, fallback=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.LSR1(), tz.m.StrongWolfe(c2=0.1, fallback=True)),
     needs_closure=True,
     func='rosen', steps=50, loss=0, merge_invariant=True,
     sphere_steps=10, sphere_loss=0,
@@ -775,8 +794,8 @@ LSR1 = Run(
 # ---------------------------- second_order/newton --------------------------- #
 Newton = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.Newton(), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.Newton(), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.Newton(), tz.m.StrongWolfe(fallback=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.Newton(), tz.m.StrongWolfe(fallback=True)),
     needs_closure=True,
     func='rosen', steps=20, loss=1e-7, merge_invariant=True,
     sphere_steps=2, sphere_loss=1e-9,
@@ -784,8 +803,8 @@ Newton = Run(
 # --------------------------- second_order/newton_cg -------------------------- #
 NewtonCG = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.NewtonCG(), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.NewtonCG(), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.NewtonCG(), tz.m.StrongWolfe(fallback=True)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.NewtonCG(), tz.m.StrongWolfe(fallback=True)),
     needs_closure=True,
     func='rosen', steps=20, loss=1e-7, merge_invariant=True,
     sphere_steps=2, sphere_loss=3e-4,
@@ -793,11 +812,11 @@ NewtonCG = Run(
 # ---------------------------- smoothing/gaussian ---------------------------- #
 GaussianHomotopy = Run(
-    func_opt=lambda p: tz.Modular(p, tz.m.GaussianHomotopy(10, 1, tol=1e-1, seed=0), tz.m.BFGS(ptol_reset=True), tz.m.StrongWolfe()),
-    sphere_opt=lambda p: tz.Modular(p, tz.m.GaussianHomotopy(10, 1, tol=1e-1, seed=0), tz.m.BFGS(ptol_reset=True), tz.m.StrongWolfe()),
+    func_opt=lambda p: tz.Modular(p, tz.m.GradientSampling([tz.m.BFGS(), tz.m.Backtracking()], 1, 10, termination=tz.m.TerminateByUpdateNorm(1e-1), seed=0)),
+    sphere_opt=lambda p: tz.Modular(p, tz.m.GradientSampling([tz.m.BFGS(), tz.m.Backtracking()], 1e-1, 10, termination=tz.m.TerminateByUpdateNorm(1e-1), seed=0)),
     needs_closure=True,
-    func='booth', steps=20, loss=0.1, merge_invariant=True,
-    sphere_steps=10, sphere_loss=200,
+    func='booth', steps=20, loss=0.01, merge_invariant=True,
+    sphere_steps=10, sphere_loss=1,
 )
 # ---------------------------- smoothing/laplacian --------------------------- #
@@ -879,14 +898,14 @@ Adan = Run(
 )
 # ------------------------------------ CGs ----------------------------------- #
-for CG in (tz.m.PolakRibiere, tz.m.FletcherReeves, tz.m.HestenesStiefel, tz.m.DaiYuan, tz.m.LiuStorey, tz.m.ConjugateDescent, tz.m.HagerZhang, tz.m.HybridHS_DY, tz.m.ProjectedGradientMethod):
+for CG in (tz.m.PolakRibiere, tz.m.FletcherReeves, tz.m.HestenesStiefel, tz.m.DaiYuan, tz.m.LiuStorey, tz.m.ConjugateDescent, tz.m.HagerZhang, tz.m.DYHS, tz.m.ProjectedGradientMethod):
     for func_steps,sphere_steps_ in ([3,2], [10,10]): # CG should converge on 2D quadratic after 2nd step
         # but also test 10 to make sure it doesn't explode after converging
         Run(
             func_opt=lambda p: tz.Modular(p, CG(), tz.m.StrongWolfe(c2=0.1)),
             sphere_opt=lambda p: tz.Modular(p, CG(), tz.m.StrongWolfe(c2=0.1)),
             needs_closure=True,
-            func='lstsq', steps=func_steps, loss=1e-10, merge_invariant=False, # strong wolfe adds float imprecision
+            func='lstsq', steps=func_steps, loss=1e-10, merge_invariant=True,
             sphere_steps=sphere_steps_, sphere_loss=0,
         )
@@ -917,10 +936,10 @@ for QN in (
     tz.m.SSVM,
 ):
     Run(
-        func_opt=lambda p: tz.Modular(p, QN(scale_first=False, ptol_reset=True), tz.m.StrongWolfe()),
-        sphere_opt=lambda p: tz.Modular(p, QN(scale_first=False, ptol_reset=True), tz.m.StrongWolfe()),
+        func_opt=lambda p: tz.Modular(p, QN(scale_first=False, ptol_restart=True), tz.m.StrongWolfe()),
+        sphere_opt=lambda p: tz.Modular(p, QN(scale_first=False, ptol_restart=True), tz.m.StrongWolfe()),
         needs_closure=True,
-        func='lstsq', steps=50, loss=1e-10, merge_invariant=False,
+        func='lstsq', steps=50, loss=1e-10, merge_invariant=True,
         sphere_steps=10, sphere_loss=1e-20,
     )

{torchzero-0.3.11 → torchzero-0.3.14}/tests/test_tensorlist.py RENAMED Viewed

@@ -977,22 +977,23 @@ def test_rademacher_like(big_tl: TensorList):
 @pytest.mark.parametrize("dist", ['normal', 'uniform', 'sphere', 'rademacher'])
 def test_sample_like(simple_tl: TensorList, dist):
-    eps_scalar = 2.0
-    result_tl_scalar = simple_tl.sample_like(eps_scalar, distribution=dist)
+    eps_scalar = 1
+    result_tl_scalar = simple_tl.sample_like(distribution=dist)
     assert isinstance(result_tl_scalar, TensorList)
     assert result_tl_scalar.shape == simple_tl.shape
-    eps_list = [0.5, 1.0, 1.5]
-    result_tl_list = simple_tl.sample_like(eps_list, distribution=dist)
+    eps_list = [1.0,]
+    result_tl_list = simple_tl.sample_like(distribution=dist)
     assert isinstance(result_tl_list, TensorList)
     assert result_tl_list.shape == simple_tl.shape
     # Basic checks based on distribution
     if dist == 'uniform':
-        assert all(torch.all((t >= -eps_scalar/2) & (t <= eps_scalar/2)) for t in result_tl_scalar)
-        assert all(torch.all((t >= -e/2) & (t <= e/2)) for t, e in zip(result_tl_list, eps_list))
+        assert all(torch.all((t >= -eps_scalar) & (t <= eps_scalar)) for t in result_tl_scalar)
+        assert all(torch.all((t >= -e) & (t <= e)) for t, e in zip(result_tl_list, eps_list))
     elif dist == 'sphere':
-        assert torch.allclose(result_tl_scalar.global_vector_norm(), torch.tensor(eps_scalar))
+        # assert torch.allclose(result_tl_scalar.global_vector_norm(), torch.tensor(eps_scalar))
+        pass
         # Cannot check list version easily
     elif dist == 'rademacher':
          assert all(torch.all((t == -eps_scalar) | (t == eps_scalar)) for t in result_tl_scalar)

torchzero-0.3.14/torchzero/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from . import core, optim, utils
+from .core import Modular
+from .utils import set_compilation
+from . import modules as m

torchzero-0.3.14/torchzero/core/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .module import Chain, Chainable, Modular, Module, Var, maybe_chain
2	+ from .transform import Target, TensorwiseTransform, Transform, apply_transform

torchzero 0.3.11__tar.gz → 0.3.14__tar.gz

torchzero 0.3.11tar.gz → 0.3.14tar.gz