PyPI - x-transformers - Versions diffs - 2.11.16__py3-none-any.whl → 2.11.18__py3-none-any.whl - Mend

x-transformers 2.11.16py3-none-any.whl → 2.11.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

x_transformers/free_transformer.py CHANGED Viewed

@@ -149,6 +149,7 @@ class FreeTransformer(Module):
         enc_kwargs: dict = dict(),
         dec_kwargs: dict = dict(),
         kl_loss_weight = 1.,
+        latent_dropout_prob = 0.,
         pad_id = -1,
         **kwargs
     ):
@@ -187,6 +188,8 @@ class FreeTransformer(Module):
         self.from_latent_to_condition = nn.Linear(self.binary_mapper.num_codes, dim, bias = False)
+        self.latent_dropout = nn.Dropout(latent_dropout_prob)
         self.decoder_head = Decoder(
             dim = dim,
             depth = dec_head_depth,
@@ -380,6 +383,8 @@ class FreeTransformer(Module):
         latents, kl_loss = self.encode_to_latents(tokens_for_latents, mask = encoder_mask, per_token_latents = per_token_latents, return_kl_loss = True)
+        latents = self.latent_dropout(latents)
         condition = self.from_latent_to_condition(latents)
         # decoder tail

x_transformers/x_transformers.py CHANGED Viewed

@@ -275,6 +275,10 @@ class ReluSquared(Module):
     def forward(self, x):
         return F.relu(x) ** 2
+class SoLU(Module):
+    def forward(self, x):
+        return x.softmax(dim = -1) * x
 # embedding
 class TokenEmbedding(Module):
@@ -1239,6 +1243,7 @@ class FeedForward(Module):
         glu_mult_bias = False,
         swish = False,
         relu_squared = False,
+        solu = False,
         custom_activation = None,
         post_act_ln = False,
         dropout = 0.,
@@ -1250,10 +1255,14 @@ class FeedForward(Module):
         inner_dim = int(dim * mult)
         dim_out = default(dim_out, dim)
+        assert at_most_one_of(relu_squared, solu)
         if exists(custom_activation):
             activation = deepcopy(custom_activation)
         elif relu_squared:
             activation = ReluSquared()
+        elif solu:
+            activation = SoLU()
         elif swish:
             activation = nn.SiLU()
         else:

{x_transformers-2.11.16.dist-info → x_transformers-2.11.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.11.16
+Version: 2.11.18
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -2607,4 +2607,14 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@article{elhage2022solu,
+    title   = {Softmax Linear Units},
+    author  = {Elhage, Nelson and Hume, Tristan and Olsson, Catherine and Nanda, Neel and Henighan, Tom and Johnston, Scott and ElShowk, Sheer and Joseph, Nicholas and DasSarma, Nova and Mann, Ben and Hernandez, Danny and Askell, Amanda and Ndousse, Kamal and Jones, Andy and Drain, Dawn and Chen, Anna and Bai, Yuntao and Ganguli, Deep and Lovitt, Liane and Hatfield-Dodds, Zac and Kernion, Jackson and Conerly, Tom and Kravec, Shauna and Fort, Stanislav and Kadavath, Saurav and Jacobson, Josh and Tran-Johnson, Eli and Kaplan, Jared and Clark, Jack and Brown, Tom and McCandlish, Sam and Amodei, Dario and Olah, Christopher},
+    year    = {2022},
+    journal = {Transformer Circuits Thread},
+    note    = {https://transformer-circuits.pub/2022/solu/index.html}
+}
+```
 *solve intelligence... then use that to solve everything else.* - Demis Hassabis

{x_transformers-2.11.16.dist-info → x_transformers-2.11.18.dist-info}/RECORD RENAMED Viewed

@@ -5,16 +5,16 @@ x_transformers/belief_state_wrapper.py,sha256=YLUMk6t2MhFBEw5lHDDHJHcoCxTIkHvxTN
 x_transformers/continuous.py,sha256=WwpQCjyVY4PtuEAOFY68zqgklbF9I7AL5w6874YlDe8,13249
 x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
 x_transformers/entropy_based_tokenizer.py,sha256=F2lO8-v3aLIcVDVNhu7RR-UtRdlmaaYJzBK9m7OnLE8,5018
-x_transformers/free_transformer.py,sha256=_hYYkaro3xei3MC3rwtuCWi9gSnciXyAT91_7SrA0nw,11396
+x_transformers/free_transformer.py,sha256=F0H_rfb_8_nO4oRbaVDLdfOa8EP4YcUNCOaI2rhkLV0,11541
 x_transformers/gpt_vae.py,sha256=1zyjwgfZr6CRDsh5VMCPSdoCPg-sdX5mXmZ_mn4VyYQ,6082
 x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
 x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
 x_transformers/nonautoregressive_wrapper.py,sha256=hMQqNimGtchNIe13cR5LZule1V7I1qM5LmY8VQfVdnA,11698
 x_transformers/up_wrapper.py,sha256=YC2LN14_7Xx9Wtiek2rtEJ_qHqdfSmKlh3d7Cgxwd80,7073
-x_transformers/x_transformers.py,sha256=5ctPu8tvlbUMrtW360e_LPnoGv6xcgQFsyWdbvLo6Tk,127002
+x_transformers/x_transformers.py,sha256=pIUxQmj_wLHIMOxyqjy4hKww6NdYtzxtRMWROovHoDA,127212
 x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
 x_transformers/xval.py,sha256=AwwYUm8yDAtKQyKJDIhYMsiLTJ_skh3scUFMjp5sda8,8597
-x_transformers-2.11.16.dist-info/METADATA,sha256=cvhm5LnIRCdqLuv25iSU4vj0a6Np9j2lv2O9W-V48-k,96012
-x_transformers-2.11.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-x_transformers-2.11.16.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
-x_transformers-2.11.16.dist-info/RECORD,,
+x_transformers-2.11.18.dist-info/METADATA,sha256=9VPaNWK5WVVltDqRqkb_4OtPEmJFzfkYkln3aJpKdfQ,96858
+x_transformers-2.11.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+x_transformers-2.11.18.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
+x_transformers-2.11.18.dist-info/RECORD,,

{x_transformers-2.11.16.dist-info → x_transformers-2.11.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{x_transformers-2.11.16.dist-info → x_transformers-2.11.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

x-transformers 2.11.16__py3-none-any.whl → 2.11.18__py3-none-any.whl

x-transformers 2.11.16py3-none-any.whl → 2.11.18py3-none-any.whl