x-transformers 2.11.17__tar.gz → 2.11.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of x-transformers might be problematic. Click here for more details.
- {x_transformers-2.11.17 → x_transformers-2.11.18}/PKG-INFO +11 -1
- {x_transformers-2.11.17 → x_transformers-2.11.18}/README.md +10 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/pyproject.toml +1 -1
- {x_transformers-2.11.17 → x_transformers-2.11.18}/tests/test_x_transformers.py +11 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/x_transformers.py +9 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/.github/FUNDING.yml +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/.gitignore +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/LICENSE +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/data/README.md +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/data/enwik8.gz +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/all-attention.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/attention-on-attention.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/deepnorm.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/fcm.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/ffglu.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/flash-attention.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/gate_values.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/gating.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/macaron-1.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/macaron-2.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/memory-transformer.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/normformer.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/pia.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/resi_dual.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/residual_attn.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/rezero.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/rotary.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/sandwich-2.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/sandwich.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/sandwich_norm.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/scalenorm.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/talking-heads.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/topk-attention.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/images/xval.png +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_belief_state.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_copy.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_entropy_tokenizer.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_enwik8.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_free.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_gpt_vae.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_length_extrapolate.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_parity.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/train_with_muon.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/__init__.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/attend.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/belief_state_wrapper.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/continuous.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/dpo.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/entropy_based_tokenizer.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/free_transformer.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/gpt_vae.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/up_wrapper.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/xval.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-transformers
|
|
3
|
-
Version: 2.11.
|
|
3
|
+
Version: 2.11.18
|
|
4
4
|
Summary: X-Transformers
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-transformers/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-transformers
|
|
@@ -2607,4 +2607,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
|
2607
2607
|
}
|
|
2608
2608
|
```
|
|
2609
2609
|
|
|
2610
|
+
```bibtex
|
|
2611
|
+
@article{elhage2022solu,
|
|
2612
|
+
title = {Softmax Linear Units},
|
|
2613
|
+
author = {Elhage, Nelson and Hume, Tristan and Olsson, Catherine and Nanda, Neel and Henighan, Tom and Johnston, Scott and ElShowk, Sheer and Joseph, Nicholas and DasSarma, Nova and Mann, Ben and Hernandez, Danny and Askell, Amanda and Ndousse, Kamal and Jones, Andy and Drain, Dawn and Chen, Anna and Bai, Yuntao and Ganguli, Deep and Lovitt, Liane and Hatfield-Dodds, Zac and Kernion, Jackson and Conerly, Tom and Kravec, Shauna and Fort, Stanislav and Kadavath, Saurav and Jacobson, Josh and Tran-Johnson, Eli and Kaplan, Jared and Clark, Jack and Brown, Tom and McCandlish, Sam and Amodei, Dario and Olah, Christopher},
|
|
2614
|
+
year = {2022},
|
|
2615
|
+
journal = {Transformer Circuits Thread},
|
|
2616
|
+
note = {https://transformer-circuits.pub/2022/solu/index.html}
|
|
2617
|
+
}
|
|
2618
|
+
```
|
|
2619
|
+
|
|
2610
2620
|
*solve intelligence... then use that to solve everything else.* - Demis Hassabis
|
|
@@ -2558,4 +2558,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
|
2558
2558
|
}
|
|
2559
2559
|
```
|
|
2560
2560
|
|
|
2561
|
+
```bibtex
|
|
2562
|
+
@article{elhage2022solu,
|
|
2563
|
+
title = {Softmax Linear Units},
|
|
2564
|
+
author = {Elhage, Nelson and Hume, Tristan and Olsson, Catherine and Nanda, Neel and Henighan, Tom and Johnston, Scott and ElShowk, Sheer and Joseph, Nicholas and DasSarma, Nova and Mann, Ben and Hernandez, Danny and Askell, Amanda and Ndousse, Kamal and Jones, Andy and Drain, Dawn and Chen, Anna and Bai, Yuntao and Ganguli, Deep and Lovitt, Liane and Hatfield-Dodds, Zac and Kernion, Jackson and Conerly, Tom and Kravec, Shauna and Fort, Stanislav and Kadavath, Saurav and Jacobson, Josh and Tran-Johnson, Eli and Kaplan, Jared and Clark, Jack and Brown, Tom and McCandlish, Sam and Amodei, Dario and Olah, Christopher},
|
|
2565
|
+
year = {2022},
|
|
2566
|
+
journal = {Transformer Circuits Thread},
|
|
2567
|
+
note = {https://transformer-circuits.pub/2022/solu/index.html}
|
|
2568
|
+
}
|
|
2569
|
+
```
|
|
2570
|
+
|
|
2561
2571
|
*solve intelligence... then use that to solve everything else.* - Demis Hassabis
|
|
@@ -1462,3 +1462,14 @@ def test_kv_input_residual():
|
|
|
1462
1462
|
out = attn(tokens, context = context, cross_attn_kv_residuals = condition)
|
|
1463
1463
|
|
|
1464
1464
|
assert tokens.shape == out.shape
|
|
1465
|
+
|
|
1466
|
+
def test_solu():
|
|
1467
|
+
attn = Decoder(
|
|
1468
|
+
dim = 256,
|
|
1469
|
+
depth = 2,
|
|
1470
|
+
heads = 4,
|
|
1471
|
+
ff_solu = True
|
|
1472
|
+
)
|
|
1473
|
+
|
|
1474
|
+
tokens = torch.randn(3, 32, 256)
|
|
1475
|
+
attn(tokens)
|
|
@@ -275,6 +275,10 @@ class ReluSquared(Module):
|
|
|
275
275
|
def forward(self, x):
|
|
276
276
|
return F.relu(x) ** 2
|
|
277
277
|
|
|
278
|
+
class SoLU(Module):
|
|
279
|
+
def forward(self, x):
|
|
280
|
+
return x.softmax(dim = -1) * x
|
|
281
|
+
|
|
278
282
|
# embedding
|
|
279
283
|
|
|
280
284
|
class TokenEmbedding(Module):
|
|
@@ -1239,6 +1243,7 @@ class FeedForward(Module):
|
|
|
1239
1243
|
glu_mult_bias = False,
|
|
1240
1244
|
swish = False,
|
|
1241
1245
|
relu_squared = False,
|
|
1246
|
+
solu = False,
|
|
1242
1247
|
custom_activation = None,
|
|
1243
1248
|
post_act_ln = False,
|
|
1244
1249
|
dropout = 0.,
|
|
@@ -1250,10 +1255,14 @@ class FeedForward(Module):
|
|
|
1250
1255
|
inner_dim = int(dim * mult)
|
|
1251
1256
|
dim_out = default(dim_out, dim)
|
|
1252
1257
|
|
|
1258
|
+
assert at_most_one_of(relu_squared, solu)
|
|
1259
|
+
|
|
1253
1260
|
if exists(custom_activation):
|
|
1254
1261
|
activation = deepcopy(custom_activation)
|
|
1255
1262
|
elif relu_squared:
|
|
1256
1263
|
activation = ReluSquared()
|
|
1264
|
+
elif solu:
|
|
1265
|
+
activation = SoLU()
|
|
1257
1266
|
elif swish:
|
|
1258
1267
|
activation = nn.SiLU()
|
|
1259
1268
|
else:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/nonautoregressive_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
{x_transformers-2.11.17 → x_transformers-2.11.18}/x_transformers/xl_autoregressive_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|