PyPI - titans-pytorch - Versions diffs - 0.1.29__py3-none-any.whl → 0.1.31__py3-none-any.whl - Mend

titans-pytorch 0.1.29py3-none-any.whl → 0.1.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

titans_pytorch/mac_transformer.py CHANGED Viewed

@@ -582,13 +582,8 @@ class MemoryAsContextTransformer(Module):
         self,
         seq_index
     ):
-        total_segment_len = self.attn_window_size
-        seq = seq_index + 1
-        seq -= int((seq % total_segment_len) == 0)
-        last_segment_len = round_down_multiple(seq, total_segment_len)
-        segment_seq = seq - last_segment_len
-        return (segment_seq - self.segment_len) > 0
+        total_segment_len, segment_len = self.attn_window_size, self.segment_len
+        return ((seq_index % total_segment_len + 1) - segment_len) > 0
     def seq_len_with_longterm_mem(
         self,
@@ -597,7 +592,7 @@ class MemoryAsContextTransformer(Module):
         assert seq_len > 0
         segment_len, num_mem = self.segment_len, self.num_longterm_mem_tokens
-        return ceil(seq_len / segment_len) * num_mem + seq_len
+        return ((seq_len - 1) // segment_len) * num_mem + seq_len
     @torch.no_grad()
     def sample(
@@ -723,9 +718,9 @@ class MemoryAsContextTransformer(Module):
         is_inferencing = exists(cache)
         if not exists(cache):
-            cache = (None, None)
+            cache = (seq_len_with_mem - 1, None, None)
-        kv_caches, neural_mem_caches = cache
+        inference_seq_index, kv_caches, neural_mem_caches = cache
         kv_caches = iter(default(kv_caches, []))
         neural_mem_caches = iter(default(neural_mem_caches, []))
@@ -744,7 +739,8 @@ class MemoryAsContextTransformer(Module):
         # when inferencing, only do one token at a time
         if is_inferencing:
-            x = x[:, -1:]
+            ind = inference_seq_index
+            x = x[:, ind:(ind + 1)]
         # expand and reduce streams for hyper connections
@@ -817,6 +813,17 @@ class MemoryAsContextTransformer(Module):
             if not self.sliding_window_attn and divisible_by(seq_len_with_mem, attn_window_size):
                 next_kv_caches = next_kv_caches[..., 0:0, :]
+            next_cache = (
+                inference_seq_index + 1,
+                next_kv_caches,
+                next_neural_mem_caches
+            )
+            is_longterm_mem = self.seq_index_is_longterm(inference_seq_index)
+            if is_inferencing and is_longterm_mem:
+                return None, next_cache
         # hyper connection reducing of streams
         x = self.reduce_streams(x)
@@ -829,7 +836,7 @@ class MemoryAsContextTransformer(Module):
             x, _ = inverse_pack_mems(x)
-            x = inverse_segment(x)
+            x = inverse_segment(x, remove_pad = False)
             x = x[:, :seq_len]
@@ -843,7 +850,7 @@ class MemoryAsContextTransformer(Module):
             if not return_cache:
                 return logits
-            return logits, (next_kv_caches, next_neural_mem_caches)
+            return logits, next_cache
         ar_loss = F.cross_entropy(rearrange(logits, 'b n l -> b l n'), labels)

titans_pytorch/titans.py CHANGED Viewed

@@ -409,6 +409,7 @@ class NeuralMemory(Module):
     ):
         super().__init__()
         dim_head = default(dim_head, dim)
+        assert not (heads == 1 and dim_head != dim)
         self.retrieve_chunk_size, self.store_chunk_size = pair(chunk_size)
@@ -566,7 +567,7 @@ class NeuralMemory(Module):
     ):
         assert xnor(exists(value_residual), exists(self.learned_value_residual))
-        seq_len, chunk_size = seq.shape[-2], default(chunk_size, self.store_chunk_size)
+        seq_len, heads, chunk_size = seq.shape[-2], self.heads, default(chunk_size, self.store_chunk_size)
         # handle edge case
@@ -645,7 +646,7 @@ class NeuralMemory(Module):
         # restore batch and sequence dimension
-        grads = grads.apply(lambda t: rearrange(t, '(b n) ... -> b n ...', b = batch))
+        grads = grads.apply(lambda t: rearrange(t, '(b n) ... -> b n ...', b = batch * heads))
         # maybe per layer modulation

{titans_pytorch-0.1.29.dist-info → titans_pytorch-0.1.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.1.29
+Version: 0.1.31
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch

titans_pytorch-0.1.31.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
+titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
+titans_pytorch/mac_transformer.py,sha256=pKFRL_ISoHEKUyfssKwfBfwFO2eQN9objJmxLrNsYrU,24838
+titans_pytorch/titans.py,sha256=6B8ioP26RTja5kVFMsorAnM9CcxIUySJS9RZBlDPI2s,25825
+titans_pytorch-0.1.31.dist-info/METADATA,sha256=9ejOFuH2B2-yCRFK4x_C1DONPxecW8VcjEUeRh9OzXg,6815
+titans_pytorch-0.1.31.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.1.31.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.1.31.dist-info/RECORD,,

titans_pytorch-0.1.29.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
-titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
-titans_pytorch/mac_transformer.py,sha256=RRLdVa8z-2IWbhhmRGfoNBycwaL32aMbpqutzmSQqpc,24575
-titans_pytorch/titans.py,sha256=gjoDcTsvw5X2d1I2xq4cM45YJIBqtLFuws8_jVylW_4,25746
-titans_pytorch-0.1.29.dist-info/METADATA,sha256=9Na2UlBJ4mECXXY5GIyuokgN0oxs38rps24TIM6CNFY,6815
-titans_pytorch-0.1.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.1.29.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.1.29.dist-info/RECORD,,

{titans_pytorch-0.1.29.dist-info → titans_pytorch-0.1.31.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.1.29.dist-info → titans_pytorch-0.1.31.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.1.29__py3-none-any.whl → 0.1.31__py3-none-any.whl

titans-pytorch 0.1.29py3-none-any.whl → 0.1.31py3-none-any.whl