PyPI - titans-pytorch - Versions diffs - 0.1.38__tar.gz → 0.2.0__tar.gz - Mend

titans-pytorch 0.1.38tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.1.38
+Version: 0.2.0
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -56,7 +56,7 @@ Description-Content-Type: text/markdown
 <img src="./fig1.png" width="400px"></img>
-## Titans - Pytorch (wip)
+## Titans - Pytorch
 Unofficial implementation of [Titans](https://arxiv.org/abs/2501.00663) in Pytorch. Will also contain some explorations into architectures beyond their simple 1-4 layer MLP for the neural memory module, if it works well to any degree.

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 <img src="./fig1.png" width="400px"></img>
-## Titans - Pytorch (wip)
+## Titans - Pytorch
 Unofficial implementation of [Titans](https://arxiv.org/abs/2501.00663) in Pytorch. Will also contain some explorations into architectures beyond their simple 1-4 layer MLP for the neural memory module, if it works well to any degree.

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "titans-pytorch"
-version = "0.1.38"
+version = "0.2.0"
 description = "Titans"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/tests/test_titans.py RENAMED Viewed

@@ -184,9 +184,10 @@ def test_mac(
     assert logits.shape == (1, seq_len, 256)
 @pytest.mark.parametrize('sliding', (False, True))
-@pytest.mark.parametrize('mem_layers', (()))
+@pytest.mark.parametrize('mem_layers', ((), None))
 @pytest.mark.parametrize('longterm_mems', (0, 4, 16))
-@pytest.mark.parametrize('prompt_len', (0, 4, 16))
+@pytest.mark.parametrize('prompt_len', (4, 16))
+@torch_default_dtype(torch.float64)
 def test_mac_sampling(
     sliding,
     mem_layers,

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/titans_pytorch/mac_transformer.py RENAMED Viewed

@@ -111,6 +111,7 @@ def pad_and_segment_with_inverse(
     seq,
     segment_len,
     fold_into_batch = True,
+    inverse_remove_pad = True
 ):
     batch, seq_len = seq.shape[:2]
     next_seq_len_mult = round_up_multiple(seq_len, segment_len)
@@ -124,15 +125,12 @@ def pad_and_segment_with_inverse(
     if fold_into_batch:
         seq = rearrange(seq, 'b (w n) d -> (b w) n d', n = segment_len)
-    shape = seq.shape
     def inverse(out):
-        unchanged_shape = out.shape == shape
         if fold_into_batch:
             out = rearrange(out, '(b w) ... n d -> b ... (w n) d', b = batch)
-        if needs_pad and unchanged_shape:
+        if needs_pad and inverse_remove_pad:
             out = out[..., :-padding, :]
         return out
@@ -714,7 +712,7 @@ class MemoryAsContextTransformer(Module):
         # intersperse longterm memory
-        x, inverse_segment = pad_and_segment_with_inverse(x, segment_len)
+        x, inverse_segment = pad_and_segment_with_inverse(x, segment_len, inverse_remove_pad = False)
         mems = repeat(self.longterm_mems, 'n d -> b n d', b = x.shape[0])
         x, inverse_pack_mems = pack_with_inverse((x, mems), 'b * d')
@@ -856,7 +854,9 @@ class MemoryAsContextTransformer(Module):
             next_kv_caches = next_kv_caches[..., -attn_window_size:, :]
-            if not self.sliding_window_attn and divisible_by(seq_len_with_mem, attn_window_size):
+            kv_cache_length = next_kv_caches.shape[-2]
+            if not self.sliding_window_attn and divisible_by(kv_cache_length, attn_window_size):
                 next_kv_caches = next_kv_caches[..., 0:0, :]
             next_cache = (
@@ -878,7 +878,7 @@ class MemoryAsContextTransformer(Module):
         if not is_inferencing:
-            x, inverse_segment = pad_and_segment_with_inverse(x, attn_window_size)
+            x, inverse_segment = pad_and_segment_with_inverse(x, attn_window_size, inverse_remove_pad = False)
             x, _ = inverse_pack_mems(x)

{titans_pytorch-0.1.38 → titans_pytorch-0.2.0}/train_mac.py RENAMED Viewed

@@ -53,6 +53,7 @@ WANDB_ONLINE = False # turn this on to pipe experiment to cloud
 USE_ACCELERATED_SCAN = True
 USE_FLEX_ATTN = True
+USE_FAST_INFERENCE = False
 # wandb experiment tracker
@@ -163,6 +164,6 @@ for i in tqdm.tqdm(range(NUM_BATCHES), mininterval = 10., desc = 'training'):
         prime = decode_tokens(inp)
         print(f'%s \n\n %s', (prime, '*' * 100))
-        sample = model.sample(inp[None, ...], GENERATE_LENGTH)
+        sample = model.sample(inp[None, ...], GENERATE_LENGTH, use_cache = True)
         output_str = decode_tokens(sample[0])
         print(output_str)