PyPI - x-transformers - Versions diffs - 1.40.9__py3-none-any.whl → 1.40.11__py3-none-any.whl - Mend

x-transformers 1.40.9py3-none-any.whl → 1.40.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

x_transformers/x_transformers.py CHANGED Viewed

@@ -1019,7 +1019,7 @@ class Attention(Module):
         self.qk_norm_q_scale = self.qk_norm_k_scale = 1
         if qk_norm and qk_norm_dim_scale:
             self.qk_norm_q_scale = nn.Parameter(torch.ones(heads, 1, dim_head))
-            self.qk_norm_k_scale = nn.Parameter(torch.ones(heads, 1, dim_head))
+            self.qk_norm_k_scale = nn.Parameter(torch.ones(kv_heads, 1, dim_head))
         assert (not qk_norm) or divisible_by(dim_head, qk_norm_groups), 'dimension per attention head must be divisible by the qk norm groups'
         assert not (qk_norm and (dim_head // qk_norm_groups) <= 2), 'the group dimension may be too small (2 was too small in my tests, but 4 still works, surprisingly)'
@@ -2104,6 +2104,7 @@ class TransformerWrapper(Module):
         attn_z_loss_weight = 1e-4,
         average_pool_embed = False,
         use_cls_token = False,
+        num_cls_tokens = 1,
         squeeze_out_last_dim = False,
         token_emb: TokenEmbedding | None = None,
         mixture_of_softmax = False,
@@ -2116,6 +2117,7 @@ class TransformerWrapper(Module):
         emb_dim = default(emb_dim, dim)
         self.emb_dim = emb_dim
         self.num_tokens = num_tokens
+        self.num_cls_tokens = num_cls_tokens
         self.max_seq_len = max_seq_len
         self.max_mem_len = max_mem_len
@@ -2172,7 +2174,7 @@ class TransformerWrapper(Module):
         self.cls_token = None
         if use_cls_token:
-            self.cls_token = nn.Parameter(torch.zeros(dim))
+            self.cls_token = nn.Parameter(torch.zeros(num_cls_tokens, dim))
             nn.init.normal_(self.cls_token, std = 0.02)
         # whether to average pool the embed (`global average pool`)
@@ -2329,11 +2331,11 @@ class TransformerWrapper(Module):
         # maybe cls token
         if exists(self.cls_token):
-            cls_tokens = repeat(self.cls_token, 'd -> b d', b = b)
+            cls_tokens = repeat(self.cls_token, '... -> b ...', b = b)
             x, cls_packed_shape = pack([cls_tokens, x], 'b * d')
             if exists(mask):
-                mask = F.pad(mask, (1, 0), value = True)
+                mask = F.pad(mask, (self.num_cls_tokens, 0), value = True)
         # maybe memory / register tokens
@@ -2415,6 +2417,7 @@ class TransformerWrapper(Module):
         if exists(self.cls_token):
             x, _ = unpack(x, cls_packed_shape, 'b * d')
+            x = x.squeeze(1)  # Remove sequence dimension if num_cls_tokens=1 to keep previous behavior
         # handle expansion to mixture if needed (for mixture of softmax)

{x_transformers-1.40.9.dist-info → x_transformers-1.40.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: x-transformers
-Version: 1.40.9
+Version: 1.40.11
 Summary: X-Transformers - Pytorch
 Home-page: https://github.com/lucidrains/x-transformers
 Author: Phil Wang

{x_transformers-1.40.9.dist-info → x_transformers-1.40.11.dist-info}/RECORD RENAMED Viewed

@@ -5,11 +5,11 @@ x_transformers/continuous.py,sha256=cIVEdhfei258__ziV7kQBrJMxCel54bExBTDrO9rfCI,
 x_transformers/dpo.py,sha256=LjvWgCkqTl-UuehrzQ8nkX5guLr4whYwsmm7SKSwdls,3450
 x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
 x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
-x_transformers/x_transformers.py,sha256=NoWBoiz1t8_QytYo1T2YBFk-7H9s38k2t-EksxqUkMU,88072
+x_transformers/x_transformers.py,sha256=RfpihlGygZz4ICq4IGOgGNOipInXUiYWYNs1tej2Orw,88290
 x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
 x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
-x_transformers-1.40.9.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
-x_transformers-1.40.9.dist-info/METADATA,sha256=xSxqFkhGfr5dU2xI0xo3UzlPMSuaaR4Rd2TrDpEyxcE,661
-x_transformers-1.40.9.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-x_transformers-1.40.9.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
-x_transformers-1.40.9.dist-info/RECORD,,
+x_transformers-1.40.11.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
+x_transformers-1.40.11.dist-info/METADATA,sha256=D97orsPC5EYEtJN6EN75bLOfOY-FBmodr2eaFIovwu8,662
+x_transformers-1.40.11.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
+x_transformers-1.40.11.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
+x_transformers-1.40.11.dist-info/RECORD,,

{x_transformers-1.40.9.dist-info → x_transformers-1.40.11.dist-info}/LICENSE RENAMED Viewed

File without changes

{x_transformers-1.40.9.dist-info → x_transformers-1.40.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{x_transformers-1.40.9.dist-info → x_transformers-1.40.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

x-transformers 1.40.9__py3-none-any.whl → 1.40.11__py3-none-any.whl

x-transformers 1.40.9py3-none-any.whl → 1.40.11py3-none-any.whl