PyPI - titans-pytorch - Versions diffs - 0.4.1__tar.gz → 0.4.3__tar.gz - Mend

titans-pytorch 0.4.1tar.gz → 0.4.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{titans_pytorch-0.4.1 → titans_pytorch-0.4.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.4.1
+Version: 0.4.3
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch

{titans_pytorch-0.4.1 → titans_pytorch-0.4.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "titans-pytorch"
-version = "0.4.1"
+version = "0.4.3"
 description = "Titans"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{titans_pytorch-0.4.1 → titans_pytorch-0.4.3}/tests/test_titans.py RENAMED Viewed

@@ -85,9 +85,9 @@ def test_return_surprises():
     seq = torch.randn(4, 64, 384)
-    _, _, surprises = mem(seq, return_surprises = True)
+    _, _, (surprises, adaptive_lr) = mem(seq, return_surprises = True)
-    assert surprises.shape == (4, 4, 64)
+    assert all([t.shape == (4, 4, 64) for t in (surprises, adaptive_lr)])
 @pytest.mark.parametrize('learned_momentum_combine', (False, True))
 @pytest.mark.parametrize('learned_combine_include_zeroth', (False, True))

{titans_pytorch-0.4.1 → titans_pytorch-0.4.3}/titans_pytorch/neural_memory.py RENAMED Viewed

@@ -652,6 +652,7 @@ class NeuralMemory(Module):
         # surprises
+        adaptive_lr = rearrange(adaptive_lr, '(b h n) c -> b h (n c)', b = batch, h = heads)
         unweighted_mem_model_loss = rearrange(unweighted_mem_model_loss, '(b h n) c -> b h (n c)', b = batch, h = heads)
         # maybe softclamp grad norm
@@ -695,7 +696,7 @@ class NeuralMemory(Module):
             if not return_surprises:
                 return output
-            return (*output, unweighted_mem_model_loss)
+            return (*output, (unweighted_mem_model_loss, adaptive_lr))
         # momentum + weight decay - momentum is the new contribution, as most linear RNNs have learned forgetting gates
@@ -755,7 +756,7 @@ class NeuralMemory(Module):
         if not return_surprises:
             return updates, next_store_state
-        return updates, next_store_state, unweighted_mem_model_loss
+        return updates, next_store_state, (unweighted_mem_model_loss, adaptive_lr)
     def retrieve_memories(
         self,
@@ -939,7 +940,7 @@ class NeuralMemory(Module):
         # whether to allow network to slowly adjust from initial weight throughout (residual path) to fully updating weights every batch
-        surprises = None
+        surprises = (None, None)
         gate = None
         if exists(self.transition_gate):
@@ -966,7 +967,7 @@ class NeuralMemory(Module):
             updates = accum_updates(updates, next_updates)
-            surprises = safe_cat((surprises, chunk_surprises), dim = -1)
+            surprises = tuple(safe_cat(args, dim = -1) for args in zip(surprises, chunk_surprises))
             if is_last and not update_after_final_store:
                 continue