PyPI - titans-pytorch - Versions diffs - 0.4.2__tar.gz → 0.4.4__tar.gz - Mend

titans-pytorch 0.4.2tar.gz → 0.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.4.2
+Version: 0.4.4
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -56,7 +56,7 @@ Description-Content-Type: text/markdown
 <img src="./fig1.png" width="400px"></img>
-## Titans - Pytorch (wip)
+## Titans - Pytorch
 Unofficial implementation of [Titans](https://arxiv.org/abs/2501.00663) in Pytorch. Will also contain some explorations into architectures beyond their simple 1-4 layer MLP for the neural memory module, if it works well to any degree.

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 <img src="./fig1.png" width="400px"></img>
-## Titans - Pytorch (wip)
+## Titans - Pytorch
 Unofficial implementation of [Titans](https://arxiv.org/abs/2501.00663) in Pytorch. Will also contain some explorations into architectures beyond their simple 1-4 layer MLP for the neural memory module, if it works well to any degree.

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "titans-pytorch"
-version = "0.4.2"
+version = "0.4.4"
 description = "Titans"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/tests/test_titans.py RENAMED Viewed

@@ -405,3 +405,23 @@ def test_assoc_scan(
     assert second_half.shape == inputs2.shape
     assert torch.allclose(output[:, -1], second_half[:, -1], atol = 1e-5)
+def test_mem_state_detach():
+    from titans_pytorch.neural_memory import mem_state_detach
+    mem = NeuralMemory(
+        dim = 384,
+        chunk_size = 2,
+        qk_rmsnorm = True,
+        dim_head = 64,
+        heads = 4,
+    )
+    seq = torch.randn(4, 64, 384)
+    state = None
+    for _ in range(2):
+        parallel_retrieved, state = mem(seq, state = state)
+        state = mem_state_detach(state)
+        parallel_retrieved.sum().backward()

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/titans_pytorch/__init__.py RENAMED Viewed

@@ -1,5 +1,7 @@
 from titans_pytorch.neural_memory import (
     NeuralMemory,
+    NeuralMemState,
+    mem_state_detach
 )
 from titans_pytorch.memory_models import (

{titans_pytorch-0.4.2 → titans_pytorch-0.4.4}/titans_pytorch/neural_memory.py RENAMED Viewed

@@ -7,10 +7,11 @@ from itertools import zip_longest
 from collections import namedtuple
 import torch
-from torch import nn, stack, cat, tensor, Tensor
+from torch import nn, stack, cat, is_tensor, tensor, Tensor
 import torch.nn.functional as F
 from torch.nn import Linear, Module, Parameter, ParameterList, ParameterDict
 from torch.func import functional_call, vmap, grad
+from torch.utils._pytree import tree_map, tree_flatten, tree_unflatten
 from tensordict import TensorDict
@@ -40,6 +41,8 @@ u - key / value updates - allowing a token to emit multiple key / values
 LinearNoBias = partial(Linear, bias = False)
+# neural mem state related
 NeuralMemState = namedtuple('NeuralMemState', [
     'seq_index',
     'weights',
@@ -48,6 +51,13 @@ NeuralMemState = namedtuple('NeuralMemState', [
     'updates',
 ])
+def mem_state_detach(
+    state: NeuralMemState
+):
+    assert isinstance(state, NeuralMemState)
+    state = tree_map(lambda t: t.detach() if is_tensor(t) else t, tuple(state))
+    return NeuralMemState(*state)
 # functions
 def exists(v):
@@ -940,7 +950,7 @@ class NeuralMemory(Module):
         # whether to allow network to slowly adjust from initial weight throughout (residual path) to fully updating weights every batch
-        surprises = None
+        surprises = (None, None)
         gate = None
         if exists(self.transition_gate):
@@ -967,7 +977,7 @@ class NeuralMemory(Module):
             updates = accum_updates(updates, next_updates)
-            surprises = safe_cat((surprises, chunk_surprises), dim = -1)
+            surprises = tuple(safe_cat(args, dim = -1) for args in zip(surprises, chunk_surprises))
             if is_last and not update_after_final_store:
                 continue