PyPI - titans-pytorch - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

titans-pytorch 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

titans_pytorch/titans.py CHANGED Viewed

@@ -44,6 +44,9 @@ def default(v, d):
 def identity(t):
     return t
+def pair(v):
+    return (v, v) if not isinstance(v, tuple) else v
 def round_down_multiple(seq, mult):
     return seq // mult * mult
@@ -161,14 +164,16 @@ class GatedResidualMemoryMLP(Module):
     def __init__(
         self,
         dim,
-        depth
+        depth,
+        expansion_factor = 2.
     ):
         super().__init__()
-        self.depth = depth
+        dim_hidden = int(dim * expansion_factor)
         self.weights = ParameterList([
             ParameterList([
-                Parameter(torch.randn(dim, dim)),
+                Parameter(torch.randn(dim, dim_hidden)),
+                Parameter(torch.randn(dim_hidden, dim)),
                 Parameter(torch.randn(dim * 2, dim)),
             ]) for _ in range(depth)
         ])
@@ -182,16 +187,17 @@ class GatedResidualMemoryMLP(Module):
         self,
         x
     ):
-        for weight, to_gates in self.weights:
+        for weight1, weight2, to_gates in self.weights:
             res = x
-            x = x @ weight
-            x = F.silu(x)
+            hidden = x @ weight1
+            hidden = F.silu(hidden)
+            branch_out = hidden @ weight2
             # gated residual
-            gates = cat((x, res), dim = -1) @ to_gates
-            x = res.lerp(x, gates.sigmoid())
+            gates = cat((branch_out, res), dim = -1) @ to_gates
+            x = res.lerp(branch_out, gates.sigmoid())
         return x @ self.final_proj
@@ -287,7 +293,7 @@ class NeuralMemory(Module):
     def __init__(
         self,
         dim,
-        chunk_size = 1,
+        chunk_size: int | tuple[int, int] = 1,
         dim_head = None,
         heads = 1,
         model: Module | None = None,
@@ -310,6 +316,8 @@ class NeuralMemory(Module):
         super().__init__()
         dim_head = default(dim_head, dim)
+        self.retrieve_chunk_size, self.store_chunk_size = pair(chunk_size)
         # norms
         self.retrieve_norm = nn.RMSNorm(dim) if pre_rmsnorm else nn.Identity()
@@ -377,6 +385,10 @@ class NeuralMemory(Module):
         self.empty_memory_embed = nn.Parameter(torch.zeros(dim))
         nn.init.normal_(self.empty_memory_embed, std = 0.02)
+        # `chunk_size` refers to chunk size used for storing to memory model weights
+        chunk_size = self.store_chunk_size
         # whether to use averaging of chunks, or attention pooling
         if not attn_pool_chunks:
@@ -448,11 +460,11 @@ class NeuralMemory(Module):
         past_state: tuple[dict[str, Tensor], dict[str, Tensor]],
         return_aux_kv_loss = False
     ):
-        seq_len = seq.shape[-2]
+        seq_len, chunk_size = seq.shape[-2], self.store_chunk_size
         # handle edge case
-        if seq_len < self.chunk_size:
+        if seq_len < chunk_size:
             past_weight, _ = past_state
             return TensorDict(past_weight).clone().zero_(), self.zero
@@ -461,8 +473,7 @@ class NeuralMemory(Module):
         # curtail sequence by multiple of the chunk size
         # only a complete chunk of the sequence provides the memory for the next chunk
-        seq_len, chunk_size = seq.shape[-2], self.chunk_size
-        round_down_seq_len = round_down_multiple(seq_len, self.chunk_size)
+        round_down_seq_len = round_down_multiple(seq_len, chunk_size)
         seq = seq[:, :round_down_seq_len]
@@ -594,12 +605,12 @@ class NeuralMemory(Module):
         seq,
         past_weights: dict[str, Tensor] | None = None,
     ):
-        chunk_size = self.chunk_size
+        chunk_size = self.retrieve_chunk_size
         batch, seq_len = seq.shape[:2]
         seq = self.retrieve_norm(seq)
-        if seq_len < self.chunk_size:
+        if seq_len < chunk_size:
             return self.init_empty_memory_embed(batch, seq_len)
         seq = seq[:, (chunk_size - 1):]
@@ -671,7 +682,7 @@ class NeuralMemory(Module):
     ):
         batch, seq_len = seq.shape[:2]
-        if seq_len < self.chunk_size:
+        if seq_len < self.retrieve_chunk_size:
             out = self.init_empty_memory_embed(batch, seq_len)
             if not return_aux_kv_loss:

{titans_pytorch-0.1.6.dist-info → titans_pytorch-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.1.6
+Version: 0.1.8
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch

titans_pytorch-0.1.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
+titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
+titans_pytorch/mac_transformer.py,sha256=YowJzQC2p3fWgzFCe9jXrw1b3wWPKN-dcLs5sX-3r8Y,19123
+titans_pytorch/titans.py,sha256=qRUw-Lad_dkMqV7ASMNoGLgxYwGD-maAadetAd_qmc8,21031
+titans_pytorch-0.1.8.dist-info/METADATA,sha256=0-m6h7GERineU8N9_2cW6nCuXs96twFEwVYkHVuuuLM,4747
+titans_pytorch-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.1.8.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.1.8.dist-info/RECORD,,

titans_pytorch-0.1.6.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-titans_pytorch/__init__.py,sha256=u0tta_KqhOdfzCEDWT9P4_jejJEK2q1XxhsEzB5MnQU,223
-titans_pytorch/associative_scan.py,sha256=Y-iYqmFuG-NoCKu6kgql1mhowXTeJfyawi3eUIXamp0,2650
-titans_pytorch/mac_transformer.py,sha256=YowJzQC2p3fWgzFCe9jXrw1b3wWPKN-dcLs5sX-3r8Y,19123
-titans_pytorch/titans.py,sha256=VMcPcKsoR3G13Um62Aa1HbdwrrV60ljPhP-yF40x90I,20555
-titans_pytorch-0.1.6.dist-info/METADATA,sha256=LJW26WfT9WB-0NfokLLHhcRpWnt76jwkXMt_FSTI3SM,4747
-titans_pytorch-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.1.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.1.6.dist-info/RECORD,,

{titans_pytorch-0.1.6.dist-info → titans_pytorch-0.1.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.1.6.dist-info → titans_pytorch-0.1.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

titans-pytorch 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl