PyPI - titans-pytorch - Versions diffs - 0.1.22__py3-none-any.whl → 0.1.23__py3-none-any.whl - Mend

titans-pytorch 0.1.22py3-none-any.whl → 0.1.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

titans_pytorch/titans.py CHANGED Viewed

@@ -44,6 +44,16 @@ def default(v, d):
 def xnor(x, y):
     return not (x ^ y)
+def safe_cat(inputs, dim = -2):
+    inputs = tuple(filter(exists, inputs))
+    if len(inputs) == 0:
+        return None
+    elif len(inputs) == 1:
+        return inputs[0]
+    return cat(inputs, dim = dim)
 def identity(t):
     return t
@@ -314,7 +324,11 @@ class AssocScan(Module):
         super().__init__()
         self.use_accelerated = use_accelerated
-    def forward(self, gates, inputs):
+    def forward(self, gates, inputs, prev = None):
+        if exists(prev):
+            inputs, _ = pack([prev, inputs], 'b * d')
+            gates = pad_at_dim(gates, (1, 0), value = 1., dim = -2)
         if not self.use_accelerated:
             _, outputs = associative_scan(binary_operator, (gates, inputs))
@@ -678,7 +692,7 @@ class NeuralMemory(Module):
     def retrieve_memories(
         self,
         seq,
-        past_weights: dict[str, Tensor] | None = None,
+        past_weights: dict[str, Tensor],
         chunk_size = None
     ):
         chunk_size = default(chunk_size, self.retrieve_chunk_size)
@@ -700,13 +714,7 @@ class NeuralMemory(Module):
         # the parameters of the memory model stores the memories of the key / values
         # when the MLP has only 1 weight matrix, it is equivalent to `kv` fast weight memories from linear attention literature (recall fetching of memories is q @ (kv)) / schmidhuber's paper
-        curr_weights = TensorDict(dict(self.memory_model.named_parameters()))
-        if exists(past_weights):
-            past_weights = TensorDict(past_weights)
-            assert past_weights.keys() == curr_weights.keys()
-            curr_weights = curr_weights + past_weights
+        curr_weights = TensorDict(past_weights)
         # sequence Float['b n d'] to queries
@@ -753,6 +761,56 @@ class NeuralMemory(Module):
         return values[:, :seq_len]
+    def forward_inference(
+        self,
+        token: Tensor,
+        seq_index = None, # the index of the token in the sequence, starts at 0
+        mem_model_state = None,
+        cache_store_seq = None
+    ):
+        seq_index = default(seq_index, 0)
+        curr_seq_len = seq_index + 1
+        batch = token.shape[0]
+        if token.ndim == 2:
+            token = rearrange(token, 'b d -> b 1 d')
+        # init memory model if needed
+        if not exists(mem_model_state):
+            mem_model_state = self.init_weights_and_momentum()
+        # increment the sequence cache which is at most the chunk size
+        cache_store_seq = safe_cat((cache_store_seq, token), dim = -2)
+        # early return empty memory, when no memories are stored for steps < first chunk size
+        if curr_seq_len < self.chunk_size:
+            empty_mem = self.init_empty_memory_embed(batch, 1)
+            return empty_mem, cache_store_seq, mem_model_state
+        # store if storage sequence cache hits the chunk size
+        store_seq_cache_len = cache_store_seq.shape[-2]
+        if store_seq_cache_len == self.chunk_size:
+            updates, _ = self.store_memories(cache_store_seq, mem_model_state)
+            past_weights, past_momentum = mem_model_state
+            mem_model_state = (past_weights + updates, past_momentum)
+            cache_store_seq = None
+        # retrieve
+        past_weights, _ = mem_model_state
+        retrieved = self.retrieve_memories(token, past_weights, chunk_size = 1)
+        return retrieved, cache_store_seq, mem_model_state
     def forward(
         self,
         seq,

{titans_pytorch-0.1.22.dist-info → titans_pytorch-0.1.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.1.22
+Version: 0.1.23
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -78,7 +78,7 @@ from titans_pytorch import NeuralMemory
 mem = NeuralMemory(
     dim = 384,
-    chunk_size = 64
+    chunk_size = 64 # set to smaller chunk size for better perf on smaller sequence lengths (but more memory usage)
 ).cuda()
 seq = torch.randn(2, 1024, 384).cuda()

titans_pytorch-0.1.23.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
+titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
+titans_pytorch/mac_transformer.py,sha256=Ejq1r3GQQnlT1Fo4McaOOie19t1HjwVlYbD90GLQCYI,22859
+titans_pytorch/titans.py,sha256=WbagKMYDs-3NoW2j_pAyHEnvR9QzH3A9WntHuV_FKOo,25109
+titans_pytorch-0.1.23.dist-info/METADATA,sha256=H7QbLscawNObHGeoTbnKbf-NOqkMqWCu4yWeZJ0yKMA,6814
+titans_pytorch-0.1.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.1.23.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.1.23.dist-info/RECORD,,

titans_pytorch-0.1.22.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
-titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
-titans_pytorch/mac_transformer.py,sha256=Ejq1r3GQQnlT1Fo4McaOOie19t1HjwVlYbD90GLQCYI,22859
-titans_pytorch/titans.py,sha256=7PGnZxdKq6T6e51RL7-QV43wp-46YmrytTZLt0McMco,23407
-titans_pytorch-0.1.22.dist-info/METADATA,sha256=HCOAqLK605c8R2mvgQ80kwE9jayZ2CwJqHLsJtFx7Vs,6718
-titans_pytorch-0.1.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.1.22.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.1.22.dist-info/RECORD,,

{titans_pytorch-0.1.22.dist-info → titans_pytorch-0.1.23.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.1.22.dist-info → titans_pytorch-0.1.23.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.1.22__py3-none-any.whl → 0.1.23__py3-none-any.whl

titans-pytorch 0.1.22py3-none-any.whl → 0.1.23py3-none-any.whl