x-transformers 2.1.0__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- x_transformers/x_transformers.py +4 -3
- {x_transformers-2.1.0.dist-info → x_transformers-2.1.1.dist-info}/METADATA +1 -1
- {x_transformers-2.1.0.dist-info → x_transformers-2.1.1.dist-info}/RECORD +5 -5
- {x_transformers-2.1.0.dist-info → x_transformers-2.1.1.dist-info}/WHEEL +0 -0
- {x_transformers-2.1.0.dist-info → x_transformers-2.1.1.dist-info}/licenses/LICENSE +0 -0
x_transformers/x_transformers.py
CHANGED
@@ -2200,11 +2200,12 @@ class AttentionLayers(Module):
|
|
2200
2200
|
|
2201
2201
|
# attention, cross attention, feedforward
|
2202
2202
|
|
2203
|
+
layer_qkv_receives_diff_view = layer_type == 'a' and qkv_receive_diff_residuals and not (is_first_self_attn and integrate_layers)
|
2204
|
+
|
2203
2205
|
if layer_type == 'a':
|
2204
2206
|
self_attn_learned_value_residual = learned_value_residual_mix and not is_first_self_attn
|
2205
|
-
qkv_receives_diff_view = qkv_receive_diff_residuals and not (is_first_self_attn and integrate_layers)
|
2206
2207
|
|
2207
|
-
layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals =
|
2208
|
+
layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = layer_qkv_receives_diff_view, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
|
2208
2209
|
is_first_self_attn = False
|
2209
2210
|
|
2210
2211
|
elif layer_type == 'c':
|
@@ -2230,7 +2231,7 @@ class AttentionLayers(Module):
|
|
2230
2231
|
|
2231
2232
|
if integrate_layers:
|
2232
2233
|
num_layer_hiddens = ind + 1
|
2233
|
-
layer_integrate_num_view = 3 if
|
2234
|
+
layer_integrate_num_view = 3 if layer_qkv_receives_diff_view else 1
|
2234
2235
|
|
2235
2236
|
layer_integrate = DynamicLIMe(dim, num_layer_hiddens, num_views = layer_integrate_num_view, use_softmax = layer_integrate_use_softmax)
|
2236
2237
|
|
@@ -6,10 +6,10 @@ x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
|
|
6
6
|
x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
|
7
7
|
x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
|
8
8
|
x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
|
9
|
-
x_transformers/x_transformers.py,sha256=
|
9
|
+
x_transformers/x_transformers.py,sha256=bIlP-NHj0SB2joklpxicoaD1HVpRMGIulMF8WYEsOAQ,110076
|
10
10
|
x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
|
11
11
|
x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
|
12
|
-
x_transformers-2.1.
|
13
|
-
x_transformers-2.1.
|
14
|
-
x_transformers-2.1.
|
15
|
-
x_transformers-2.1.
|
12
|
+
x_transformers-2.1.1.dist-info/METADATA,sha256=BBGKnocyDvj_ynWM5dtrbyX1iodI4eWEnn9TWrw38kc,87275
|
13
|
+
x_transformers-2.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
+
x_transformers-2.1.1.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
15
|
+
x_transformers-2.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|