x-transformers 2.11.20__py3-none-any.whl → 2.11.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of x-transformers might be problematic. Click here for more details.
- x_transformers/x_transformers.py +21 -4
- {x_transformers-2.11.20.dist-info → x_transformers-2.11.22.dist-info}/METADATA +1 -1
- {x_transformers-2.11.20.dist-info → x_transformers-2.11.22.dist-info}/RECORD +5 -5
- {x_transformers-2.11.20.dist-info → x_transformers-2.11.22.dist-info}/WHEEL +0 -0
- {x_transformers-2.11.20.dist-info → x_transformers-2.11.22.dist-info}/licenses/LICENSE +0 -0
x_transformers/x_transformers.py
CHANGED
|
@@ -1397,6 +1397,7 @@ class Attention(Module):
|
|
|
1397
1397
|
logit_softclamp_value = 50.,
|
|
1398
1398
|
learned_value_residual_mix = False,
|
|
1399
1399
|
orthog_projected_values = False, # https://openreview.net/forum?id=Ard2QzPAUK
|
|
1400
|
+
orthog_projected_values_per_head = False,
|
|
1400
1401
|
laser = False, # https://arxiv.org/abs/2411.03493v1
|
|
1401
1402
|
laser_softclamp_value = 15.,
|
|
1402
1403
|
qkv_receive_diff_residuals = False,
|
|
@@ -1627,6 +1628,9 @@ class Attention(Module):
|
|
|
1627
1628
|
# "belief attention" - iclr 2026
|
|
1628
1629
|
|
|
1629
1630
|
self.orthog_projected_values = orthog_projected_values
|
|
1631
|
+
self.orthog_projected_values_per_head = orthog_projected_values_per_head
|
|
1632
|
+
|
|
1633
|
+
out_dim *= max(1, int(orthog_projected_values) + int(orthog_projected_values_per_head))
|
|
1630
1634
|
|
|
1631
1635
|
# hybrid module, in same vein as hymba https://www.arxiv.org/abs/2411.13676
|
|
1632
1636
|
|
|
@@ -2069,11 +2073,24 @@ class Attention(Module):
|
|
|
2069
2073
|
gates = self.to_v_gate(x)
|
|
2070
2074
|
out = out * self.to_v_gate_activation(gates)
|
|
2071
2075
|
|
|
2072
|
-
# maybe
|
|
2076
|
+
# maybe orthogonal projected weighted values - "belief" attention
|
|
2077
|
+
|
|
2078
|
+
if self.orthog_projected_values or self.orthog_projected_values_per_head:
|
|
2079
|
+
orthog_projected = []
|
|
2080
|
+
v_for_proj = self.merge_heads(orig_values)
|
|
2081
|
+
|
|
2082
|
+
if self.orthog_projected_values:
|
|
2083
|
+
projected = orthog_project(out, v_for_proj)
|
|
2084
|
+
orthog_projected.append(projected)
|
|
2085
|
+
|
|
2086
|
+
if self.orthog_projected_values_per_head:
|
|
2087
|
+
v_for_proj = rearrange(v_for_proj, 'b n (h d) -> b n h d', h = h)
|
|
2088
|
+
out = rearrange(out, 'b n (h d) -> b n h d', h = h)
|
|
2089
|
+
projected = orthog_project(out, v_for_proj)
|
|
2090
|
+
projected = rearrange(projected, 'b n h d -> b n (h d)')
|
|
2091
|
+
orthog_projected.append(projected)
|
|
2073
2092
|
|
|
2074
|
-
|
|
2075
|
-
merged_v = self.merge_heads(orig_values)
|
|
2076
|
-
out = orthog_project(out, merged_v)
|
|
2093
|
+
out = cat(orthog_projected, dim = -1)
|
|
2077
2094
|
|
|
2078
2095
|
# combine the heads
|
|
2079
2096
|
|
|
@@ -11,10 +11,10 @@ x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg
|
|
|
11
11
|
x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
|
|
12
12
|
x_transformers/nonautoregressive_wrapper.py,sha256=hMQqNimGtchNIe13cR5LZule1V7I1qM5LmY8VQfVdnA,11698
|
|
13
13
|
x_transformers/up_wrapper.py,sha256=YC2LN14_7Xx9Wtiek2rtEJ_qHqdfSmKlh3d7Cgxwd80,7073
|
|
14
|
-
x_transformers/x_transformers.py,sha256=
|
|
14
|
+
x_transformers/x_transformers.py,sha256=G3_RQ-Z8szUS4WzKcwMCO2lnrFWDRsrLJ0hJL1rv-WM,129051
|
|
15
15
|
x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
|
|
16
16
|
x_transformers/xval.py,sha256=AwwYUm8yDAtKQyKJDIhYMsiLTJ_skh3scUFMjp5sda8,8597
|
|
17
|
-
x_transformers-2.11.
|
|
18
|
-
x_transformers-2.11.
|
|
19
|
-
x_transformers-2.11.
|
|
20
|
-
x_transformers-2.11.
|
|
17
|
+
x_transformers-2.11.22.dist-info/METADATA,sha256=vtm9uUCTPJ8K8azq1m6YfyFwEXmRPX1IATTfkv6V_Sc,96372
|
|
18
|
+
x_transformers-2.11.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
19
|
+
x_transformers-2.11.22.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
|
20
|
+
x_transformers-2.11.22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|