PyPI - x-transformers - Versions diffs - 2.0.1__tar.gz → 2.0.2__tar.gz - Mend

x-transformers 2.0.1tar.gz → 2.0.2tar.gz

Files changed (55) hide show

{x_transformers-2.0.1 → x_transformers-2.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.0.1
+Version: 2.0.2
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -40,6 +40,7 @@ Requires-Dist: loguru
 Requires-Dist: packaging>=21.0
 Requires-Dist: torch>=2.0
 Provides-Extra: examples
+Requires-Dist: lion-pytorch; extra == 'examples'
 Requires-Dist: torchvision; extra == 'examples'
 Requires-Dist: tqdm; extra == 'examples'
 Provides-Extra: test

{x_transformers-2.0.1 → x_transformers-2.0.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.0.1"
+version = "2.0.2"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -34,7 +34,11 @@ Homepage = "https://pypi.org/project/x-transformers/"
 Repository = "https://github.com/lucidrains/x-transformers"
 [project.optional-dependencies]
-examples = ["tqdm", "torchvision"]
+examples = [
+    "lion-pytorch",
+    "tqdm",
+    "torchvision"
+]
 test = [
     "pytest",

{x_transformers-2.0.1 → x_transformers-2.0.2}/train_parity.py RENAMED Viewed

@@ -7,12 +7,16 @@ from x_transformers import TransformerWrapper, Decoder
 # constants
-NUM_BATCHES = 100000
 BATCH_SIZE = 256
 LEARNING_RATE = 3e-4
 EVAL_EVERY  = 500
-TRAIN_MAX_LENGTH = 64
 EVAL_LENGTHS = (16, 32, 64, 128, 256, 512)
+TRAIN_MAX_LENGTH = EVAL_LENGTHS[-2]
+LOSS_THRES_INCREASE_LEN = 1e-3
+MEET_CRITERIA_THRES_INCREASE_LEN = 10
 HYBRIDIZE_WITH_RNN = True
 # rnn for fully resolving state tracking by hybridization
@@ -28,6 +32,7 @@ if HYBRIDIZE_WITH_RNN:
     decoder_kwargs = dict(
         attn_hybrid_fold_axial_dim = 4, # even if recurrence is every 4 tokens, can generalize for parity
+        attn_hybrid_learned_mix = True,
         attn_hybrid_module = GRU(dim, dim_head * heads, batch_first = True)
     )
@@ -48,7 +53,9 @@ model = TransformerWrapper(
 # optimizer
-adam = optim.Adam(model.parameters(), lr = LEARNING_RATE)
+from lion_pytorch.cautious_lion import Lion
+optimizer = Lion(model.parameters(), lr = LEARNING_RATE, cautious_factor = 0.1)
 # data generator
@@ -73,7 +80,8 @@ meet_criteria = 0
 train_seq_len = 1
 stop_length = EVAL_LENGTHS[-2]
-with tqdm.tqdm(range(NUM_BATCHES), mininterval = 10., desc = 'training') as pbar:
+with tqdm.tqdm(mininterval = 10., desc = 'training') as pbar:
     while train_seq_len < stop_length:
         model.train()
@@ -90,12 +98,12 @@ with tqdm.tqdm(range(NUM_BATCHES), mininterval = 10., desc = 'training') as pbar
         last_loss = loss[:, -1].mean()
         loss.mean().backward()
-        if last_loss.item() < 0.001:
+        if last_loss.item() < LOSS_THRES_INCREASE_LEN:
             meet_criteria += 1
         else:
             meet_criteria = 0
-        if meet_criteria >= 10:
+        if meet_criteria >= MEET_CRITERIA_THRES_INCREASE_LEN:
             meet_criteria = 0
             train_seq_len += 1
             print(f'criteria met, incrementing to {train_seq_len}')
@@ -103,8 +111,8 @@ with tqdm.tqdm(range(NUM_BATCHES), mininterval = 10., desc = 'training') as pbar
         print(f'({train_seq_len})| {i}: {last_loss.item()}')
         torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
-        adam.step()
-        adam.zero_grad()
+        optimizer.step()
+        optimizer.zero_grad()
         last_step = train_seq_len == stop_length

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/x_transformers.py RENAMED Viewed

@@ -1204,6 +1204,7 @@ class Attention(Module):
         hybrid_module: Module | None = None,
         hybrid_mask_kwarg: str | None = None,
         hybrid_fold_axial_dim: int | None = None,
+        hybrid_learned_mix = False,
         one_kv_head = False,
         kv_heads = None,
         value_dim_head = None,
@@ -1446,7 +1447,7 @@ class Attention(Module):
         if exists(hybrid_module) and exists(hybrid_fold_axial_dim):
             hybrid_module = FoldAxially(axial_dim = hybrid_fold_axial_dim, fn = hybrid_module)
-            hybrid_mix = LinearNoBias(dim, heads)
+            hybrid_mix = LinearNoBias(dim, heads) if hybrid_learned_mix else None
             hybrid_norms = ModuleList([
                 MultiheadRMSNorm(dim_head, heads = heads),
@@ -1779,7 +1780,12 @@ class Attention(Module):
             out = out_norm(out)
             hybrid_out = hybrid_out_norm(hybrid_out)
-            out = 0.5 * (out + hybrid_out)
+            if exists(self.hybrid_mix):
+                mix = self.hybrid_mix(x)
+                mix = rearrange(mix, 'b n h -> b h n 1')
+                out = out.lerp(hybrid_out, mix.sigmoid())
+            else:
+                out = 0.5 * (out + hybrid_out)
         # merge heads

{x_transformers-2.0.1 → x_transformers-2.0.2}/.github/FUNDING.yml RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/.github/workflows/python-test.yaml RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/.gitignore RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/LICENSE RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/README.md RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/all-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/attention-on-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/cosine-sim-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/deepnorm.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/dynamic-pos-bias-linear.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/dynamic-pos-bias-log.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/dynamic-pos-bias-sinusoidal.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/dynamic-pos-bias.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/enhanced-recurrence.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/fcm.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/ffglu.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/flash-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/gate_values.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/gating.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/length-extrapolation-scale.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/macaron-1.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/macaron-2.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/memory-transformer.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/normformer.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/pia.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/qknorm-analysis.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/resi_dual.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/residual_attn.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/rezero.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/rotary.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/sandwich-2.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/sandwich.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/sandwich_norm.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/scalenorm.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/talking-heads.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/topk-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/images/xval.png RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/tests/test_x_transformers.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/train_copy.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/train_enwik8.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/__init__.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/attend.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/continuous.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/dpo.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/multi_input.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/neo_mlp.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/nonautoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/xl_autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.1 → x_transformers-2.0.2}/x_transformers/xval.py RENAMED Viewed

File without changes

x-transformers 2.0.1__tar.gz → 2.0.2__tar.gz

x-transformers 2.0.1tar.gz → 2.0.2tar.gz