PyPI - x-transformers - Versions diffs - 1.42.19__tar.gz → 1.42.21__tar.gz - Mend

x-transformers 1.42.19tar.gz → 1.42.21tar.gz

Files changed (22) hide show

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: x-transformers
-Version: 1.42.19
+Version: 1.42.21
 Summary: X-Transformers - Pytorch
 Home-page: https://github.com/lucidrains/x-transformers
 Author: Phil Wang

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
   name = 'x-transformers',
   packages = find_packages(exclude=['examples']),
-  version = '1.42.19',
+  version = '1.42.21',
   license='MIT',
   description = 'X-Transformers - Pytorch',
   author = 'Phil Wang',

@@ -388,7 +388,8 @@ def test_neo_mlp():
     out = mlp(x)
     assert out.shape == (3, 7)
-def test_custom_alibi():
+@pytest.mark.parametrize('flash', (True, False))
+def test_custom_alibi(flash: bool):
     model = TransformerWrapper(
         num_tokens = 20_000,
@@ -397,7 +398,8 @@ def test_custom_alibi():
             dim = 512,
             depth = 2,
             heads = 8,
-            alibi_pos_bias = True
+            alibi_pos_bias = True,
+            attn_flash = flash
         )
     )
@@ -407,8 +409,30 @@ def test_custom_alibi():
     logits = model(x, pos = pos)
-def test_custom_alibi_across_heads():
+def test_custom_rotary_pos_emb():
+    from einops import repeat
+    model = TransformerWrapper(
+        num_tokens = 20_000,
+        max_seq_len = 1024,
+        attn_layers = Decoder(
+            dim = 512,
+            depth = 2,
+            heads = 8,
+            rotary_pos_emb = True
+        )
+    )
+    x = torch.randint(0, 20000, (4, 4))
+    pos = repeat(torch.arange(0, 4), "n -> b n", b=4)
+    logits1 = model(x, pos = pos)
+    logits2 = model(x)
+    assert torch.allclose(logits1, logits2)
+@pytest.mark.parametrize('flash', (True, False))
+def test_custom_alibi_across_heads(flash: bool):
     model = Decoder(
         dim = 512,
         depth = 2,
@@ -417,6 +441,7 @@ def test_custom_alibi_across_heads():
         rel_pos_kwargs = dict(
             slopes = [1, 1]
         ),
+        attn_flash = flash
     )
     x = torch.randn(2, 4, 512)

@@ -370,7 +370,7 @@ class Attend(Module):
         # convert from bool to float
         if exists(attn_bias):
-            attn_bias = rearrange(attn_bias, 'h i j -> 1 h i j').expand(batch, heads, -1, -1)
+            attn_bias = attn_bias.expand(batch, heads, -1, -1)
             # if mask given, the mask would already contain the causal mask from above logic
             # otherwise, if no mask given but still causal, mask out alibi positional bias to a large negative number

@@ -655,7 +655,10 @@ class RotaryEmbedding(Module):
     def forward(self, t):
         max_pos = t.max() + 1
-        freqs = torch.einsum('i , j -> i j', t.type_as(self.inv_freq), self.inv_freq) / self.interpolation_factor
+        if t.ndim == 1:
+            t = rearrange(t, 'n -> 1 n')
+        freqs = torch.einsum('b i , j -> b i j', t.type_as(self.inv_freq), self.inv_freq) / self.interpolation_factor
         freqs = torch.stack((freqs, freqs), dim = -1)
         freqs = rearrange(freqs, '... d r -> ... (d r)')
@@ -679,8 +682,8 @@ def rotate_half(x):
 def apply_rotary_pos_emb(t, freqs, scale = 1):
     rot_dim, seq_len, orig_dtype = freqs.shape[-1], t.shape[-2], t.dtype
-    freqs = freqs[-seq_len:, :]
-    scale = scale[-seq_len:, :] if isinstance(scale, torch.Tensor) else scale
+    freqs = freqs[:, -seq_len:, :]
+    scale = scale[:, -seq_len:, :] if isinstance(scale, torch.Tensor) else scale
     if t.ndim == 4 and freqs.ndim == 3:
         freqs = rearrange(freqs, 'b n d -> b 1 n d')

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: x-transformers
-Version: 1.42.19
+Version: 1.42.21
 Summary: X-Transformers - Pytorch
 Home-page: https://github.com/lucidrains/x-transformers
 Author: Phil Wang