x-transformers 2.11.18__py3-none-any.whl → 2.11.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of x-transformers might be problematic. Click here for more details.

@@ -161,6 +161,21 @@ def or_reduce(masks):
161
161
  head = head | rest
162
162
  return head
163
163
 
164
+ def orthog_project(x, y):
165
+ x, packed_shape = pack([x], 'b *')
166
+ y, _ = pack([y], 'b *')
167
+
168
+ dtype = x.dtype
169
+ x, y = x.double(), y.double()
170
+ unit = F.normalize(y, dim = -1)
171
+
172
+ parallel = (x * unit).sum(dim = -1, keepdim = True) * unit
173
+ orthog = x - parallel
174
+
175
+ orthog, = unpack(orthog, packed_shape, 'b *')
176
+
177
+ return orthog.to(dtype)
178
+
164
179
  # cache helpers
165
180
 
166
181
  def get_cached_kvs(
@@ -276,8 +291,13 @@ class ReluSquared(Module):
276
291
  return F.relu(x) ** 2
277
292
 
278
293
  class SoLU(Module):
294
+ def __init__(self, dim):
295
+ super().__init__()
296
+ self.norm = LayerNorm(dim)
297
+
279
298
  def forward(self, x):
280
- return x.softmax(dim = -1) * x
299
+ activated = x.softmax(dim = -1) * x
300
+ return self.norm(activated)
281
301
 
282
302
  # embedding
283
303
 
@@ -1262,7 +1282,7 @@ class FeedForward(Module):
1262
1282
  elif relu_squared:
1263
1283
  activation = ReluSquared()
1264
1284
  elif solu:
1265
- activation = SoLU()
1285
+ activation = SoLU(inner_dim)
1266
1286
  elif swish:
1267
1287
  activation = nn.SiLU()
1268
1288
  else:
@@ -1376,7 +1396,8 @@ class Attention(Module):
1376
1396
  softclamp_logits = False,
1377
1397
  logit_softclamp_value = 50.,
1378
1398
  learned_value_residual_mix = False,
1379
- laser = False, # https://arxiv.org/abs/2411.03493v1
1399
+ orthog_projected_values = False, # https://openreview.net/forum?id=Ard2QzPAUK
1400
+ laser = False, # https://arxiv.org/abs/2411.03493v1
1380
1401
  laser_softclamp_value = 15.,
1381
1402
  qkv_receive_diff_residuals = False,
1382
1403
  use_latent_q = False,
@@ -1602,6 +1623,11 @@ class Attention(Module):
1602
1623
 
1603
1624
  self.attn_on_attn = on_attn
1604
1625
 
1626
+ # return orthogonal projected weighted values on original values
1627
+ # "belief attention" - iclr 2026
1628
+
1629
+ self.orthog_projected_values = orthog_projected_values
1630
+
1605
1631
  # hybrid module, in same vein as hymba https://www.arxiv.org/abs/2411.13676
1606
1632
 
1607
1633
  hybrid_mix = None
@@ -2043,6 +2069,12 @@ class Attention(Module):
2043
2069
  gates = self.to_v_gate(x)
2044
2070
  out = out * self.to_v_gate_activation(gates)
2045
2071
 
2072
+ # maybe return orthogonal projected - "belief" attention
2073
+
2074
+ if self.orthog_projected_values:
2075
+ merged_v = self.merge_heads(orig_values)
2076
+ out = orthog_project(out, merged_v)
2077
+
2046
2078
  # combine the heads
2047
2079
 
2048
2080
  out = self.to_out(out)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.11.18
3
+ Version: 2.11.20
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -2608,12 +2608,13 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
2608
2608
  ```
2609
2609
 
2610
2610
  ```bibtex
2611
- @article{elhage2022solu,
2612
- title = {Softmax Linear Units},
2613
- author = {Elhage, Nelson and Hume, Tristan and Olsson, Catherine and Nanda, Neel and Henighan, Tom and Johnston, Scott and ElShowk, Sheer and Joseph, Nicholas and DasSarma, Nova and Mann, Ben and Hernandez, Danny and Askell, Amanda and Ndousse, Kamal and Jones, Andy and Drain, Dawn and Chen, Anna and Bai, Yuntao and Ganguli, Deep and Lovitt, Liane and Hatfield-Dodds, Zac and Kernion, Jackson and Conerly, Tom and Kravec, Shauna and Fort, Stanislav and Kadavath, Saurav and Jacobson, Josh and Tran-Johnson, Eli and Kaplan, Jared and Clark, Jack and Brown, Tom and McCandlish, Sam and Amodei, Dario and Olah, Christopher},
2614
- year = {2022},
2615
- journal = {Transformer Circuits Thread},
2616
- note = {https://transformer-circuits.pub/2022/solu/index.html}
2611
+ @inproceedings{anonymous2025beliefformer,
2612
+ title = {BeliefFormer: Belief Attention in Transformer},
2613
+ author = {Anonymous},
2614
+ booktitle = {Submitted to The Fourteenth International Conference on Learning Representations},
2615
+ year = {2025},
2616
+ url = {https://openreview.net/forum?id=Ard2QzPAUK},
2617
+ note = {under review}
2617
2618
  }
2618
2619
  ```
2619
2620
 
@@ -11,10 +11,10 @@ x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg
11
11
  x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
12
12
  x_transformers/nonautoregressive_wrapper.py,sha256=hMQqNimGtchNIe13cR5LZule1V7I1qM5LmY8VQfVdnA,11698
13
13
  x_transformers/up_wrapper.py,sha256=YC2LN14_7Xx9Wtiek2rtEJ_qHqdfSmKlh3d7Cgxwd80,7073
14
- x_transformers/x_transformers.py,sha256=pIUxQmj_wLHIMOxyqjy4hKww6NdYtzxtRMWROovHoDA,127212
14
+ x_transformers/x_transformers.py,sha256=igiWHF93dDCRGaiLL-YVtAYcyhxNQr0Yw_Ievvs1cD4,128188
15
15
  x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
16
16
  x_transformers/xval.py,sha256=AwwYUm8yDAtKQyKJDIhYMsiLTJ_skh3scUFMjp5sda8,8597
17
- x_transformers-2.11.18.dist-info/METADATA,sha256=9VPaNWK5WVVltDqRqkb_4OtPEmJFzfkYkln3aJpKdfQ,96858
18
- x_transformers-2.11.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
- x_transformers-2.11.18.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
20
- x_transformers-2.11.18.dist-info/RECORD,,
17
+ x_transformers-2.11.20.dist-info/METADATA,sha256=Ii1ODqNuQz8ge5SrePZ33Sahr4izJEcpZH2SrAltNcA,96372
18
+ x_transformers-2.11.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
+ x_transformers-2.11.20.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
20
+ x_transformers-2.11.20.dist-info/RECORD,,