x-transformers 2.3.26__tar.gz → 2.3.27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_transformers-2.3.26 → x_transformers-2.3.27}/PKG-INFO +1 -1
- {x_transformers-2.3.26 → x_transformers-2.3.27}/pyproject.toml +1 -1
- {x_transformers-2.3.26 → x_transformers-2.3.27}/tests/test_x_transformers.py +4 -1
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/x_transformers.py +2 -2
- {x_transformers-2.3.26 → x_transformers-2.3.27}/.github/FUNDING.yml +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/.gitignore +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/LICENSE +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/README.md +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/data/README.md +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/data/enwik8.gz +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/all-attention.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/attention-on-attention.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/deepnorm.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/fcm.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/ffglu.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/flash-attention.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/gate_values.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/gating.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/macaron-1.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/macaron-2.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/memory-transformer.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/normformer.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/pia.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/resi_dual.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/residual_attn.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/rezero.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/rotary.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/sandwich-2.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/sandwich.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/sandwich_norm.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/scalenorm.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/talking-heads.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/topk-attention.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/images/xval.png +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_belief_state.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_copy.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_entropy_tokenizer.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_enwik8.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_length_extrapolate.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/train_parity.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/__init__.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/attend.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/belief_state_wrapper.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/continuous.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/dpo.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/entropy_based_tokenizer.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.3.26 → x_transformers-2.3.27}/x_transformers/xval.py +0 -0
@@ -1055,8 +1055,9 @@ def test_prepend_embed():
|
|
1055
1055
|
|
1056
1056
|
x = torch.randint(0, 256, (2, 10))
|
1057
1057
|
prepend_embeds = torch.randn(2, 3, 512)
|
1058
|
+
prepend_mask = torch.randint(0, 2, (2, 3)).bool()
|
1058
1059
|
|
1059
|
-
loss = model(x, prepend_embeds = prepend_embeds)
|
1060
|
+
loss = model(x, prepend_mask = prepend_mask, prepend_embeds = prepend_embeds)
|
1060
1061
|
loss.backward()
|
1061
1062
|
|
1062
1063
|
sample = model.generate(
|
@@ -1064,6 +1065,7 @@ def test_prepend_embed():
|
|
1064
1065
|
seq_len = 100,
|
1065
1066
|
temperature = 0.,
|
1066
1067
|
prepend_embeds = prepend_embeds,
|
1068
|
+
prepend_mask = prepend_mask,
|
1067
1069
|
cache_kv = True,
|
1068
1070
|
)
|
1069
1071
|
|
@@ -1072,6 +1074,7 @@ def test_prepend_embed():
|
|
1072
1074
|
seq_len = 100,
|
1073
1075
|
temperature = 0.,
|
1074
1076
|
prepend_embeds = prepend_embeds,
|
1077
|
+
prepend_mask = prepend_mask,
|
1075
1078
|
cache_kv = False,
|
1076
1079
|
)
|
1077
1080
|
|
@@ -1926,7 +1926,7 @@ class Attention(Module):
|
|
1926
1926
|
|
1927
1927
|
out = maybe(self.sublayer_dropout)(out)
|
1928
1928
|
|
1929
|
-
if exists(mask):
|
1929
|
+
if exists(mask) and not exists(cache):
|
1930
1930
|
out = einx.where('b n, b n d, -> b n d', mask, out, 0.)
|
1931
1931
|
|
1932
1932
|
if not return_intermediates:
|
@@ -2484,7 +2484,7 @@ class AttentionLayers(Module):
|
|
2484
2484
|
attn_cache = []
|
2485
2485
|
|
2486
2486
|
if exists(cache):
|
2487
|
-
assert self.causal and not
|
2487
|
+
assert self.causal and not exists(attn_mask)
|
2488
2488
|
|
2489
2489
|
prev_cache_length = cache.cache_length
|
2490
2490
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|