titans-pytorch 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/PKG-INFO +1 -1
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/pyproject.toml +1 -1
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/titans_pytorch/mac_transformer.py +10 -2
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/titans_pytorch/titans.py +2 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/.github/workflows/python-publish.yml +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/.github/workflows/test.yaml +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/.gitignore +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/LICENSE +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/README.md +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/data/README.md +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/data/enwik8.gz +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/fig1.png +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/fig2.png +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/tests/test_titans.py +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/titans_pytorch/__init__.py +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/titans_pytorch/associative_scan.py +0 -0
- {titans_pytorch-0.1.8 → titans_pytorch-0.1.9}/train_mac.py +0 -0
|
@@ -83,6 +83,14 @@ def identity(t):
|
|
|
83
83
|
def round_up_multiple(seq, mult):
|
|
84
84
|
return ceil(seq / mult) * mult
|
|
85
85
|
|
|
86
|
+
def pack_with_inverse(t, pattern):
|
|
87
|
+
packed, packed_shape = pack(t, pattern)
|
|
88
|
+
|
|
89
|
+
def inverse(out, inv_pattern = None):
|
|
90
|
+
return unpack(out, packed_shape, default(inv_pattern, pattern))
|
|
91
|
+
|
|
92
|
+
return packed, inverse
|
|
93
|
+
|
|
86
94
|
def pad_at_dim(t, pad, dim = -1, value = 0.):
|
|
87
95
|
dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
|
|
88
96
|
zeros = ((0, 0) * dims_from_right)
|
|
@@ -576,7 +584,7 @@ class MemoryAsContextTransformer(Module):
|
|
|
576
584
|
x, inverse_segment = pad_and_segment_with_inverse(x, segment_len)
|
|
577
585
|
|
|
578
586
|
mems = repeat(self.longterm_mems, 'n d -> b n d', b = x.shape[0])
|
|
579
|
-
x,
|
|
587
|
+
x, inverse_pack_mems = pack_with_inverse((x, mems), 'b * d')
|
|
580
588
|
|
|
581
589
|
x = inverse_segment(x)
|
|
582
590
|
|
|
@@ -634,7 +642,7 @@ class MemoryAsContextTransformer(Module):
|
|
|
634
642
|
|
|
635
643
|
x, inverse_segment = pad_and_segment_with_inverse(x, segment_len + num_longterm_mem_tokens)
|
|
636
644
|
|
|
637
|
-
x, _ =
|
|
645
|
+
x, _ = inverse_pack_mems(x)
|
|
638
646
|
|
|
639
647
|
x = inverse_segment(x)
|
|
640
648
|
|
|
@@ -391,6 +391,8 @@ class NeuralMemory(Module):
|
|
|
391
391
|
|
|
392
392
|
# whether to use averaging of chunks, or attention pooling
|
|
393
393
|
|
|
394
|
+
assert not (attn_pool_chunks and chunk_size == 1), '`attn_pool_chunks` cannot be set to True if `chunk_size` is set to 1'
|
|
395
|
+
|
|
394
396
|
if not attn_pool_chunks:
|
|
395
397
|
chunk_reduce_module = Reduce('b (n c) ... -> b n ...', 'mean', c = chunk_size)
|
|
396
398
|
else:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|