PyPI - x-transformers - Versions diffs - 1.16.6__py3-none-any.whl → 1.16.7__py3-none-any.whl - Mend

x-transformers 1.16.6py3-none-any.whl → 1.16.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

x_transformers/attend.py CHANGED Viewed

@@ -253,9 +253,9 @@ class Attend(nn.Module):
 # cascading heads logic
-def to_single_heads(t):
-    heads = t.unbind(dim = 1)
-    return tuple(rearrange(head, 'b ... -> b 1 ...') for head in heads)
+def to_single_heads(t, dim = 1):
+    heads = t.unbind(dim = dim)
+    return tuple(head.unsqueeze(dim) for head in heads)
 class CascadingHeads(nn.Module):
     def __init__(self, attend: Attend):
@@ -281,9 +281,7 @@ class CascadingHeads(nn.Module):
         mask = (mask,) * heads
-        attn_bias = attn_bias.unbind(dim = 0) if exists(attn_bias) else ((None,) * heads)
-        attn_bias = map(lambda t: rearrange(t, '... -> 1 ...'), attn_bias)
+        attn_bias = to_single_heads(attn_bias, dim = 0) if exists(attn_bias) else ((None,) * heads)
         prev_attn = to_single_heads(prev_attn) if exists(prev_attn) else ((None,) * heads)
         # now loop through each head, without output of previous head summed with the next head

x_transformers/x_transformers.py CHANGED Viewed

@@ -358,7 +358,7 @@ class AlibiPositionalBias(nn.Module):
     def forward(self, i, j):
         h, device = self.total_heads, self.device
-        if exists(self.bias) and self.bias.shape[-1] >= j:
+        if exists(self.bias) and self.bias.shape[-1] >= j and self.bias.shape[-2] >= i:
             return self.bias[..., :i, :j]
         bias = self.get_bias(i, j, device)
@@ -382,7 +382,7 @@ class LearnedAlibiPositionalBias(AlibiPositionalBias):
         def get_slopes(param):
             return pad_at_dim(param.exp(), (0, h - param.shape[0]), dim = -2)
-        if exists(self.bias) and self.bias.shape[-1] >= j:
+        if exists(self.bias) and self.bias.shape[-1] >= j and self.bias.shape[-2] >= i:
             bias = self.bias[..., :i, :j]
         else:
             bias = self.get_bias(i, j, device)
@@ -971,6 +971,8 @@ class AttentionLayers(nn.Module):
         self.residual_attn = residual_attn
         self.cross_residual_attn = cross_residual_attn
+        assert not (flash_attn and (residual_attn or cross_residual_attn)), 'flash attention is not compatible with residual attention'
         self.cross_attend = cross_attend
         norm_class = ScaleNorm if use_scalenorm else nn.LayerNorm

{x_transformers-1.16.6.dist-info → x_transformers-1.16.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: x-transformers
-Version: 1.16.6
+Version: 1.16.7
 Summary: X-Transformers - Pytorch
 Home-page: https://github.com/lucidrains/x-transformers
 Author: Phil Wang

{x_transformers-1.16.6.dist-info → x_transformers-1.16.7.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 x_transformers/__init__.py,sha256=FDb654rUx8FpXRd76B8q0diH8I7q-ZjTWEtEJ4UM21Y,701
-x_transformers/attend.py,sha256=dm301IVJAxVI7UthAoyr2wrxaC4bgprjZL3hmEVZ91M,10450
+x_transformers/attend.py,sha256=ivhVpP_5vd6798HNq92DY0XZWjAJGmpE4qOdKW5yRaI,10379
 x_transformers/autoregressive_wrapper.py,sha256=u2celA8KeHm_Gd83Q7qaiLbJnwaDGdsbUck-JiokpKg,4446
 x_transformers/continuous_autoregressive_wrapper.py,sha256=pTiDqu6JRUlnQJQp_xHATYHy0lgSd6ERLqyiFO3pC-4,1575
 x_transformers/nonautoregressive_wrapper.py,sha256=AQLE4rA_Kh8VNoe9OzpwyeWson34sRkhks4dn4seNjI,10414
-x_transformers/x_transformers.py,sha256=Rax04MaANgByO2ZERoqptGD4Lo-RL2nsWAFE85nQ3_I,54004
+x_transformers/x_transformers.py,sha256=xc4b05Y9vlGBXayJvpK775r4Dr7NlVusIVdqS3I09-4,54199
 x_transformers/xl_autoregressive_wrapper.py,sha256=-CAYjTtqrks8ZTxjYm2stOelZpU4MbZIvLjUxWO0P9Y,3988
-x_transformers-1.16.6.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
-x_transformers-1.16.6.dist-info/METADATA,sha256=69XyDEAwgGUIZxWik2cqgqyeZqFxnN4KjWSeKPn0pzY,665
-x_transformers-1.16.6.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
-x_transformers-1.16.6.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
-x_transformers-1.16.6.dist-info/RECORD,,
+x_transformers-1.16.7.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
+x_transformers-1.16.7.dist-info/METADATA,sha256=wpuosM4b40fjCe0WHAilFQDVzLDx7_yqIyImTp-2380,665
+x_transformers-1.16.7.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
+x_transformers-1.16.7.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
+x_transformers-1.16.7.dist-info/RECORD,,

{x_transformers-1.16.6.dist-info → x_transformers-1.16.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{x_transformers-1.16.6.dist-info → x_transformers-1.16.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{x_transformers-1.16.6.dist-info → x_transformers-1.16.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

x-transformers 1.16.6__py3-none-any.whl → 1.16.7__py3-none-any.whl

x-transformers 1.16.6py3-none-any.whl → 1.16.7py3-none-any.whl