x-transformers 2.11.17__py3-none-any.whl → 2.11.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- x_transformers/x_transformers.py +9 -0
- {x_transformers-2.11.17.dist-info → x_transformers-2.11.18.dist-info}/METADATA +11 -1
- {x_transformers-2.11.17.dist-info → x_transformers-2.11.18.dist-info}/RECORD +5 -5
- {x_transformers-2.11.17.dist-info → x_transformers-2.11.18.dist-info}/WHEEL +0 -0
- {x_transformers-2.11.17.dist-info → x_transformers-2.11.18.dist-info}/licenses/LICENSE +0 -0
x_transformers/x_transformers.py
CHANGED
|
@@ -275,6 +275,10 @@ class ReluSquared(Module):
|
|
|
275
275
|
def forward(self, x):
|
|
276
276
|
return F.relu(x) ** 2
|
|
277
277
|
|
|
278
|
+
class SoLU(Module):
|
|
279
|
+
def forward(self, x):
|
|
280
|
+
return x.softmax(dim = -1) * x
|
|
281
|
+
|
|
278
282
|
# embedding
|
|
279
283
|
|
|
280
284
|
class TokenEmbedding(Module):
|
|
@@ -1239,6 +1243,7 @@ class FeedForward(Module):
|
|
|
1239
1243
|
glu_mult_bias = False,
|
|
1240
1244
|
swish = False,
|
|
1241
1245
|
relu_squared = False,
|
|
1246
|
+
solu = False,
|
|
1242
1247
|
custom_activation = None,
|
|
1243
1248
|
post_act_ln = False,
|
|
1244
1249
|
dropout = 0.,
|
|
@@ -1250,10 +1255,14 @@ class FeedForward(Module):
|
|
|
1250
1255
|
inner_dim = int(dim * mult)
|
|
1251
1256
|
dim_out = default(dim_out, dim)
|
|
1252
1257
|
|
|
1258
|
+
assert at_most_one_of(relu_squared, solu)
|
|
1259
|
+
|
|
1253
1260
|
if exists(custom_activation):
|
|
1254
1261
|
activation = deepcopy(custom_activation)
|
|
1255
1262
|
elif relu_squared:
|
|
1256
1263
|
activation = ReluSquared()
|
|
1264
|
+
elif solu:
|
|
1265
|
+
activation = SoLU()
|
|
1257
1266
|
elif swish:
|
|
1258
1267
|
activation = nn.SiLU()
|
|
1259
1268
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-transformers
|
|
3
|
-
Version: 2.11.
|
|
3
|
+
Version: 2.11.18
|
|
4
4
|
Summary: X-Transformers
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-transformers/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-transformers
|
|
@@ -2607,4 +2607,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
|
2607
2607
|
}
|
|
2608
2608
|
```
|
|
2609
2609
|
|
|
2610
|
+
```bibtex
|
|
2611
|
+
@article{elhage2022solu,
|
|
2612
|
+
title = {Softmax Linear Units},
|
|
2613
|
+
author = {Elhage, Nelson and Hume, Tristan and Olsson, Catherine and Nanda, Neel and Henighan, Tom and Johnston, Scott and ElShowk, Sheer and Joseph, Nicholas and DasSarma, Nova and Mann, Ben and Hernandez, Danny and Askell, Amanda and Ndousse, Kamal and Jones, Andy and Drain, Dawn and Chen, Anna and Bai, Yuntao and Ganguli, Deep and Lovitt, Liane and Hatfield-Dodds, Zac and Kernion, Jackson and Conerly, Tom and Kravec, Shauna and Fort, Stanislav and Kadavath, Saurav and Jacobson, Josh and Tran-Johnson, Eli and Kaplan, Jared and Clark, Jack and Brown, Tom and McCandlish, Sam and Amodei, Dario and Olah, Christopher},
|
|
2614
|
+
year = {2022},
|
|
2615
|
+
journal = {Transformer Circuits Thread},
|
|
2616
|
+
note = {https://transformer-circuits.pub/2022/solu/index.html}
|
|
2617
|
+
}
|
|
2618
|
+
```
|
|
2619
|
+
|
|
2610
2620
|
*solve intelligence... then use that to solve everything else.* - Demis Hassabis
|
|
@@ -11,10 +11,10 @@ x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg
|
|
|
11
11
|
x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
|
|
12
12
|
x_transformers/nonautoregressive_wrapper.py,sha256=hMQqNimGtchNIe13cR5LZule1V7I1qM5LmY8VQfVdnA,11698
|
|
13
13
|
x_transformers/up_wrapper.py,sha256=YC2LN14_7Xx9Wtiek2rtEJ_qHqdfSmKlh3d7Cgxwd80,7073
|
|
14
|
-
x_transformers/x_transformers.py,sha256=
|
|
14
|
+
x_transformers/x_transformers.py,sha256=pIUxQmj_wLHIMOxyqjy4hKww6NdYtzxtRMWROovHoDA,127212
|
|
15
15
|
x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
|
|
16
16
|
x_transformers/xval.py,sha256=AwwYUm8yDAtKQyKJDIhYMsiLTJ_skh3scUFMjp5sda8,8597
|
|
17
|
-
x_transformers-2.11.
|
|
18
|
-
x_transformers-2.11.
|
|
19
|
-
x_transformers-2.11.
|
|
20
|
-
x_transformers-2.11.
|
|
17
|
+
x_transformers-2.11.18.dist-info/METADATA,sha256=9VPaNWK5WVVltDqRqkb_4OtPEmJFzfkYkln3aJpKdfQ,96858
|
|
18
|
+
x_transformers-2.11.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
19
|
+
x_transformers-2.11.18.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
|
20
|
+
x_transformers-2.11.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|