x-transformers 2.0.3__tar.gz → 2.0.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {x_transformers-2.0.3 → x_transformers-2.0.4}/PKG-INFO +1 -1
- {x_transformers-2.0.3 → x_transformers-2.0.4}/pyproject.toml +1 -1
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/x_transformers.py +1 -1
- {x_transformers-2.0.3 → x_transformers-2.0.4}/.github/FUNDING.yml +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/.gitignore +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/LICENSE +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/README.md +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/all-attention.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/attention-on-attention.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/deepnorm.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/fcm.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/ffglu.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/flash-attention.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/gate_values.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/gating.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/macaron-1.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/macaron-2.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/memory-transformer.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/normformer.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/pia.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/resi_dual.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/residual_attn.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/rezero.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/rotary.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/sandwich-2.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/sandwich.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/sandwich_norm.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/scalenorm.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/talking-heads.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/topk-attention.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/images/xval.png +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/tests/test_x_transformers.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/train_copy.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/train_enwik8.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/train_parity.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/__init__.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/attend.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/continuous.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/dpo.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.0.3 → x_transformers-2.0.4}/x_transformers/xval.py +0 -0
@@ -1282,7 +1282,7 @@ class Attention(Module):
|
|
1282
1282
|
dim_kv_input = dim_latent_kv
|
1283
1283
|
|
1284
1284
|
if exists(latent_rope_subheads):
|
1285
|
-
assert not exists(rotate_num_heads)
|
1285
|
+
assert not exists(rotate_num_heads), '`rotate_num_heads` cannot be set when multi-latent attention is being used'
|
1286
1286
|
rotate_num_heads = latent_rope_subheads
|
1287
1287
|
|
1288
1288
|
k_dim = dim_head * (kv_heads - latent_rope_subheads)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|