PyPI - titans-pytorch - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

titans-pytorch 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

titans_pytorch/memory_models.py CHANGED Viewed

@@ -36,10 +36,14 @@ class MemoryMLP(Module):
     def __init__(
         self,
         dim,
-        depth
+        depth,
+        expansion_factor = 2.
     ):
         super().__init__()
-        self.weights = ParameterList([Parameter(torch.randn(dim, dim)) for _ in range(depth)])
+        dim_hidden = int(dim * expansion_factor)
+        dims = (dim, *((dim_hidden,) * (depth - 1)), dim)
+        self.weights = ParameterList([Parameter(torch.randn(dim_in, dim_out)) for dim_in, dim_out in zip(dims[:-1], dims[1:])])
         self.ln = LayerNorm(dim)

titans_pytorch/neural_memory.py CHANGED Viewed

@@ -299,7 +299,8 @@ class NeuralMemory(Module):
         accept_weight_residual = False,
         gated_transition = False,
         default_model_kwargs: dict = dict(
-            depth = 2
+            depth = 2,
+            expansion_factor = 4.
         )
     ):
         super().__init__()
@@ -689,16 +690,27 @@ class NeuralMemory(Module):
     def retrieve_memories(
         self,
         seq,
-        past_weights: dict[str, Tensor],
-        chunk_size = None,
-        need_pad = True
+        weights: dict[str, Tensor],
     ):
-        chunk_size = default(chunk_size, self.retrieve_chunk_size)
+        chunk_size = self.retrieve_chunk_size
+        weights_have_expanded_shape = dict_get_shape(weights) != self.init_weight_shape
         batch, seq_len = seq.shape[:2]
-        seq = self.retrieve_norm(seq)
+        # auto infer single token decoding, if there are only 1 set of weights and 1 token
+        is_one_token = seq_len == 1
+        is_one_weight = (not weights_have_expanded_shape) or next(iter(weights.values())).shape[1] == 1
+        is_single_token_decode = is_one_token and is_one_weight
+        if is_single_token_decode:
+            chunk_size = 1
+        # padding related, for chunked processing
-        need_pad = need_pad or chunk_size > 1
+        need_pad = chunk_size > 1 or not is_one_weight
         if need_pad:
             seq = pad_at_dim(seq, (1, 0), dim = 1)
@@ -713,7 +725,11 @@ class NeuralMemory(Module):
         # the parameters of the memory model stores the memories of the key / values
         # when the MLP has only 1 weight matrix, it is equivalent to `kv` fast weight memories from linear attention literature (recall fetching of memories is q @ (kv)) / schmidhuber's paper
-        curr_weights = TensorDict(past_weights)
+        weights = TensorDict(weights)
+        # pre norm
+        seq = self.retrieve_norm(seq)
         # sequence Float['b n d'] to queries
@@ -729,14 +745,14 @@ class NeuralMemory(Module):
         # fetch values from memory model
-        if dict_get_shape(curr_weights) != self.init_weight_shape:
-            curr_weights = rearrange_dict_values(curr_weights, 'b n ... -> (b n) ...')
+        if weights_have_expanded_shape:
+            weights = rearrange_dict_values(weights, 'b n ... -> (b n) ...')
         queries = rearrange(queries, 'b h (n c) d -> (b h n) c d', c = chunk_size)
         # forward functional call
-        values = functional_call(self.memory_model, dict(curr_weights), queries)
+        values = functional_call(self.memory_model, dict(weights), queries)
         # reconstitute batch dimension
@@ -884,22 +900,13 @@ class NeuralMemory(Module):
         # retrieve
-        need_pad = True
-        retrieve_chunk_size = None
         if is_single_token:
-            retrieve_chunk_size = 1
-            need_pad = False
             last_update, _ = next_neural_mem_state.states
             updates = rearrange_dict_values(last_update, 'b ... -> b 1 ...')
         retrieved = self.retrieve_memories(
             seq,
-            updates,
-            chunk_size = retrieve_chunk_size,
-            need_pad = need_pad,
+            updates
         )
         return retrieved, next_neural_mem_state

{titans_pytorch-0.3.1.dist-info → titans_pytorch-0.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.3.1
+Version: 0.3.3
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch

titans_pytorch-0.3.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+titans_pytorch/__init__.py,sha256=Y3m_ZlpEqYwp-Md1ARhNGJxq8bQp8ty1o039nZOOJo0,276
+titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
+titans_pytorch/mac_transformer.py,sha256=5rO4GQxSyFWWEc3pc3xNyG0sK5EXE7MmxKI-_kEMl2M,24941
+titans_pytorch/memory_models.py,sha256=0KLHZN-y_7lwrhWSnFRaYJ3GiUV3tzVjxS9CxIx_eI8,4843
+titans_pytorch/neural_memory.py,sha256=Ff-IBv-CCQAP7IYIpokPDoGtsvpzotAJsHB1d_-xd98,27934
+titans_pytorch-0.3.3.dist-info/METADATA,sha256=CutjohW8xSNycd5W-uyXC4827ubmIpAJCs9xoMbfZzo,6815
+titans_pytorch-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.3.3.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.3.3.dist-info/RECORD,,

titans_pytorch-0.3.1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-titans_pytorch/__init__.py,sha256=Y3m_ZlpEqYwp-Md1ARhNGJxq8bQp8ty1o039nZOOJo0,276
-titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
-titans_pytorch/mac_transformer.py,sha256=5rO4GQxSyFWWEc3pc3xNyG0sK5EXE7MmxKI-_kEMl2M,24941
-titans_pytorch/memory_models.py,sha256=Q9SAIyAbStF5Tz0EhvRbn3yAdE3nk3xKc1ndieIe714,4671
-titans_pytorch/neural_memory.py,sha256=K1z7wtv366Y6-eEyXMFZ_j7D2frPl5RxfSgxzFYoFMc,27704
-titans_pytorch-0.3.1.dist-info/METADATA,sha256=ZAxucKq2DZBtW-BI_O2sUQ5RXy11a7eu48yPpwnanpw,6815
-titans_pytorch-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.3.1.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.3.1.dist-info/RECORD,,

{titans_pytorch-0.3.1.dist-info → titans_pytorch-0.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.3.1.dist-info → titans_pytorch-0.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

titans-pytorch 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl