PyPI - titans-pytorch - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

titans-pytorch 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

titans_pytorch/titans.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from __future__ import annotations
+import math
 from functools import partial
 import torch
@@ -11,7 +12,8 @@ from tensordict import TensorDict
 from titans_pytorch.associative_scan import (
     associative_scan,
-    binary_operator
+    binary_operator,
+    pad_at_dim
 )
 import einx
@@ -41,6 +43,9 @@ def default(v, d):
 def round_down_multiple(seq, mult):
     return seq // mult * mult
+def round_up_multiple(seq, mult):
+    return math.ceil(seq / mult) * mult
 def pack_one_with_inverse(t, pattern):
     packed, packed_shape = pack([t], pattern)
@@ -159,7 +164,7 @@ class NeuralMemory(Module):
         # curtail sequence by multiple of the chunk size
         # only a complete chunk of the sequence provides the memory for the next chunk
-        seq_len = seq.shape[-2]
+        seq_len, chunk_size = seq.shape[-2], self.chunk_size
         round_down_seq_len = round_down_multiple(seq_len, self.chunk_size)
         seq = seq[:, :round_down_seq_len]
@@ -229,14 +234,26 @@ class NeuralMemory(Module):
         next_state = (curr_weights + last_update, next_momentum)
-        return updates, next_state, aux_store_loss.mean()
+        return updates, next_state, aux_store_loss.mean() / chunk_size
     def retrieve_memories(
         self,
         seq,
         past_weights: dict[str, Tensor] | None = None,
     ):
-        batch = seq.shape[0]
+        chunk_size = self.chunk_size
+        batch, seq_len = seq.shape[:2]
+        assert seq_len >= chunk_size
+        seq = seq[:, (chunk_size - 1):]
+        curtailed_seq_len = seq.shape[-2]
+        next_seq_len = round_up_multiple(curtailed_seq_len, chunk_size)
+        padding = next_seq_len - curtailed_seq_len
+        seq = pad_at_dim(seq, (0, padding), dim = 1)
         # the parameters of the memory model stores the memories of the key / values
         # when the MLP has only 1 weight matrix, it is equivalent to `kv` fast weight memories from linear attention literature (recall fetching of memories is q @ (kv)) / schmidhuber's paper
@@ -256,7 +273,7 @@ class NeuralMemory(Module):
         # fetch values from memory model
         curr_weights = curr_weights.apply(lambda t: rearrange(t, 'b n ... -> (b n) ...'))
-        queries = rearrange(queries, 'b n d -> (b n) 1 d')
+        queries = rearrange(queries, 'b (n c) d -> (b n) c d', c = chunk_size)
         # forward functional call
@@ -264,7 +281,12 @@ class NeuralMemory(Module):
         # reconstitute batch dimension
-        values = rearrange(values, '(b n) 1 d -> b n d', b = batch)
+        values = rearrange(values, '(b n) c d -> b (n c) d', b = batch)
+        # restore
+        values = pad_at_dim(values, (chunk_size - 1, 0), dim = 1, value = 0.) # todo, used a learned null memory embedding instead of 0s for retrieving from empty neural memory
+        values = values[:, :-padding]
         return values

{titans_pytorch-0.0.4.dist-info → titans_pytorch-0.0.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.0.4
+Version: 0.0.6
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -39,6 +39,7 @@ Requires-Dist: einx>=0.3.0
 Requires-Dist: tensordict>=0.6.2
 Requires-Dist: torch>=2.3
 Provides-Extra: examples
+Requires-Dist: local-attention>=1.9.15; extra == 'examples'
 Provides-Extra: test
 Requires-Dist: pytest; extra == 'test'
 Description-Content-Type: text/markdown
@@ -65,7 +66,10 @@ from titans_pytorch import NeuralMemory
 x = torch.randn(2, 64, 32)
-mem = NeuralMemory(32)
+mem = NeuralMemory(
+    dim = 32,
+    chunk_size = 2
+)
 out = mem(x)

titans_pytorch-0.0.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+titans_pytorch/__init__.py,sha256=QKuJPCOJCdgtaPeKoHEkYkiQe65_LV9_8-cIMbBPU30,55
+titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
+titans_pytorch/titans.py,sha256=S8J8B9o7Rlnj2hU3FZgpn28GTmis3ZbenLqjB_uny54,9470
+titans_pytorch-0.0.6.dist-info/METADATA,sha256=t4HXD6sZT7_pgcwD8TBY6ojYHUHiZ05J6t19wRKtHNc,3092
+titans_pytorch-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.0.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.0.6.dist-info/RECORD,,

titans_pytorch-0.0.4.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-titans_pytorch/__init__.py,sha256=QKuJPCOJCdgtaPeKoHEkYkiQe65_LV9_8-cIMbBPU30,55
-titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
-titans_pytorch/titans.py,sha256=Cue4Q3OCRPh-lUF99x-1LXmjIecbWOj8bDz8-xP-Rt0,8719
-titans_pytorch-0.0.4.dist-info/METADATA,sha256=IDXO4RWPda9jJak-7_Y0lEW2ADEao79XQqxcYVaLWxI,3000
-titans_pytorch-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.0.4.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.0.4.dist-info/RECORD,,

{titans_pytorch-0.0.4.dist-info → titans_pytorch-0.0.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.0.4.dist-info → titans_pytorch-0.0.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

titans-pytorch 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl