x-transformers 2.10.0__tar.gz → 2.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of x-transformers might be problematic. Click here for more details.
- {x_transformers-2.10.0 → x_transformers-2.10.1}/PKG-INFO +8 -7
- {x_transformers-2.10.0 → x_transformers-2.10.1}/README.md +7 -6
- {x_transformers-2.10.0 → x_transformers-2.10.1}/pyproject.toml +1 -1
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/attend.py +1 -3
- {x_transformers-2.10.0 → x_transformers-2.10.1}/.github/FUNDING.yml +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/.gitignore +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/LICENSE +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/data/README.md +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/data/enwik8.gz +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/all-attention.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/attention-on-attention.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/deepnorm.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/fcm.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/ffglu.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/flash-attention.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/gate_values.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/gating.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/macaron-1.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/macaron-2.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/memory-transformer.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/normformer.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/pia.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/resi_dual.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/residual_attn.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/rezero.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/rotary.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/sandwich-2.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/sandwich.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/sandwich_norm.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/scalenorm.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/talking-heads.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/topk-attention.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/images/xval.png +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/tests/test_x_transformers.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_belief_state.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_copy.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_entropy_tokenizer.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_enwik8.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_gpt_vae.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_length_extrapolate.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_parity.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/train_with_muon.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/__init__.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/belief_state_wrapper.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/continuous.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/dpo.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/entropy_based_tokenizer.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/gpt_vae.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/up_wrapper.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/x_transformers.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.10.0 → x_transformers-2.10.1}/x_transformers/xval.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-transformers
|
|
3
|
-
Version: 2.10.
|
|
3
|
+
Version: 2.10.1
|
|
4
4
|
Summary: X-Transformers
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-transformers/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-transformers
|
|
@@ -2587,13 +2587,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
|
2587
2587
|
```
|
|
2588
2588
|
|
|
2589
2589
|
```bibtex
|
|
2590
|
-
@
|
|
2591
|
-
title = {More Expressive Attention with Negative Weights},
|
|
2592
|
-
author = {
|
|
2593
|
-
booktitle = {Submitted to The Fourteenth International Conference on Learning Representations},
|
|
2590
|
+
@misc{lv2025expressiveattentionnegativeweights,
|
|
2591
|
+
title = {More Expressive Attention with Negative Weights},
|
|
2592
|
+
author = {Ang Lv and Ruobing Xie and Shuaipeng Li and Jiayi Liao and Xingwu Sun and Zhanhui Kang and Di Wang and Rui Yan},
|
|
2594
2593
|
year = {2025},
|
|
2595
|
-
|
|
2596
|
-
|
|
2594
|
+
eprint = {2411.07176},
|
|
2595
|
+
archivePrefix = {arXiv},
|
|
2596
|
+
primaryClass = {cs.CL},
|
|
2597
|
+
url = {https://arxiv.org/abs/2411.07176},
|
|
2597
2598
|
}
|
|
2598
2599
|
```
|
|
2599
2600
|
|
|
@@ -2538,13 +2538,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
|
2538
2538
|
```
|
|
2539
2539
|
|
|
2540
2540
|
```bibtex
|
|
2541
|
-
@
|
|
2542
|
-
title = {More Expressive Attention with Negative Weights},
|
|
2543
|
-
author = {
|
|
2544
|
-
booktitle = {Submitted to The Fourteenth International Conference on Learning Representations},
|
|
2541
|
+
@misc{lv2025expressiveattentionnegativeweights,
|
|
2542
|
+
title = {More Expressive Attention with Negative Weights},
|
|
2543
|
+
author = {Ang Lv and Ruobing Xie and Shuaipeng Li and Jiayi Liao and Xingwu Sun and Zhanhui Kang and Di Wang and Rui Yan},
|
|
2545
2544
|
year = {2025},
|
|
2546
|
-
|
|
2547
|
-
|
|
2545
|
+
eprint = {2411.07176},
|
|
2546
|
+
archivePrefix = {arXiv},
|
|
2547
|
+
primaryClass = {cs.CL},
|
|
2548
|
+
url = {https://arxiv.org/abs/2411.07176},
|
|
2548
2549
|
}
|
|
2549
2550
|
```
|
|
2550
2551
|
|
|
@@ -520,6 +520,7 @@ class Attend(Module):
|
|
|
520
520
|
|
|
521
521
|
if self.cog_signed:
|
|
522
522
|
sim_sign = sim.sign()
|
|
523
|
+
sim = sim.abs()
|
|
523
524
|
|
|
524
525
|
# masking
|
|
525
526
|
|
|
@@ -552,9 +553,6 @@ class Attend(Module):
|
|
|
552
553
|
|
|
553
554
|
pre_softmax_attn = sim
|
|
554
555
|
|
|
555
|
-
if self.cog_signed:
|
|
556
|
-
sim = sim.abs()
|
|
557
|
-
|
|
558
556
|
attn = self.attn_fn(sim)
|
|
559
557
|
|
|
560
558
|
attn = attn.type(dtype)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|