PyPI - x-transformers - Versions diffs - 2.2.0__tar.gz → 2.2.2__tar.gz - Mend

x-transformers 2.2.0tar.gz → 2.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{x_transformers-2.2.0 → x_transformers-2.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.2.0
+Version: 2.2.2
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers

{x_transformers-2.2.0 → x_transformers-2.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.2.0"
+version = "2.2.2"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{x_transformers-2.2.0 → x_transformers-2.2.2}/tests/test_x_transformers.py RENAMED Viewed

@@ -769,7 +769,10 @@ def test_dynamic_tanh():
     model(x)
-def test_entropy_based_tokenizer():
+@pytest.mark.parametrize('var_length', (False, True))
+def test_entropy_based_tokenizer(
+    var_length
+):
     from x_transformers.entropy_based_tokenizer import EntropyBasedTokenizer
     model = TransformerWrapper(
@@ -787,6 +790,10 @@ def test_entropy_based_tokenizer():
     seq = torch.randint(0, 20000, (2, 1024))
-    segmented_seq = tokenizer(seq, return_segmented_seq = True)
+    lens = None
+    if var_length:
+        lens = torch.randint(512, 768, (2,))
+    segmented_seq = tokenizer(seq, lens, return_segmented_seq = True)
     assert len(segmented_seq) == seq.shape[0]

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/entropy_based_tokenizer.py RENAMED Viewed

@@ -1,10 +1,14 @@
+from itertools import zip_longest
 import torch
+from torch import tensor
 import torch.nn.functional as F
 from torch.nn import Module
 from torch.nn.utils.rnn import pad_sequence
 from x_transformers.x_transformers import Decoder, TransformerWrapper
+import einx
 from einops import repeat, rearrange
 # helper functions
@@ -24,7 +28,7 @@ class EntropyBasedTokenizer(Module):
     def __init__(
         self,
         decoder: TransformerWrapper,
-        entropy_threshold = 1.5
+        entropy_threshold: float
     ):
         super().__init__()
         assert isinstance(decoder.attn_layers, Decoder)
@@ -36,29 +40,56 @@ class EntropyBasedTokenizer(Module):
     def forward(
         self,
         seq,
+        lens = None, # Int['b']
         return_segmented_seq = False
     ):
         self.decoder.eval()
+        is_var_length = exists(lens)
         batch, seq_len, device = *seq.shape, seq.device
+        arange = torch.arange(seq_len, device = device)
+        # forward through a small trained decoder and get the entropies of the logits
         _, intermediates = self.decoder(seq, return_logit_entropies = True)
         entropies = intermediates.logit_entropies
-        over_thres_mask = entropies >= self.entropy_threshold
+        # get length mask for boundaries
+        mask = tensor(True, device = device)
+        if is_var_length:
+            mask = einx.less('n, b -> b n', arange, lens)
+        # the mask for tokens that were of a sufficient surprise level
-        arange = torch.arange(seq_len, device = device) + 1
-        arange = repeat(arange, 'n -> b n', b = batch)
+        over_thres_mask = (entropies >= self.entropy_threshold) & mask
+        # needed for selecting out indices at entropy threshold mask
+        arange_plus_one = arange + 1
+        arange_plus_one = repeat(arange_plus_one, 'n -> b n', b = batch)
         # get a tensor of Int['b num_tokens'] with the token lengths, zero padded
         boundaries = over_thres_mask.clone()
-        boundaries[..., -1] = True # last token is always a boundary
+        # set the boundary of the last token
+        # if `lens` not given, assume always last token
+        # but if `lens` were given, then properly set the index
+        if not is_var_length:
+            boundaries[..., -1] = True
+        else:
+            scatter_indices = rearrange(lens - 1, 'b -> b 1')
+            boundaries.scatter_(-1, scatter_indices, True)
         num_tokens = boundaries.sum(dim = -1) # number of tokens
-        boundaries = arange[boundaries].split(num_tokens.tolist())
+        boundaries = arange_plus_one[boundaries].split(num_tokens.tolist())
         # get the token lengths
@@ -79,12 +110,17 @@ class EntropyBasedTokenizer(Module):
         # segment the sequence based on the token lengths
+        lens = default(lens, (None,))
         segmented_seq = []
-        for one_seq, one_token_length in zip(seq, token_lengths):
+        for one_seq, one_len, one_token_length in zip_longest(seq, lens, token_lengths):
+            if exists(one_len):
+                one_seq = one_seq[:one_len]
             one_token_length = one_token_length[one_token_length > 0]
+            print(one_seq.shape, one_token_length)
             splitted_seq = one_seq.split(one_token_length.tolist())
             segmented_seq.append(splitted_seq)

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/x_transformers.py RENAMED Viewed

@@ -2006,7 +2006,7 @@ class AttentionLayers(Module):
         assert not (rotary_xpos and not causal), 'rotary xpos is not compatible with bidirectional attention'
         self.rotary_pos_emb = RotaryEmbedding(rotary_emb_dim, use_xpos = rotary_xpos, scale_base = rotary_xpos_scale_base, interpolation_factor = rotary_interpolation_factor, base_rescale_factor = rotary_base_rescale_factor) if rotary_pos_emb else None
-        assert at_most_one_of(alibi_pos_bias, rel_pos_bias, data_dependent_alibi), 'you can only choose one of Alibi positional bias, data dependent Alibi (forgetting transformers), or T5 relative positional bias'
+        assert at_most_one_of(alibi_pos_bias, rel_pos_bias, data_dependent_alibi), 'you can only choose one of Alibi positional bias, data dependent Alibi (forgetting transformers), dynamic tanh, or T5 relative positional bias'
         assert rel_pos_num_buckets <= rel_pos_max_distance, 'number of relative position buckets must be less than the relative position max distance'
         # relative positional bias

{x_transformers-2.2.0 → x_transformers-2.2.2}/.github/FUNDING.yml RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/.github/workflows/python-test.yaml RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/.gitignore RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/LICENSE RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/README.md RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/data/README.md RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/data/enwik8.gz RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/all-attention.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/attention-on-attention.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/cosine-sim-attention.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/deepnorm.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/dynamic-pos-bias-linear.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/dynamic-pos-bias-log.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/dynamic-pos-bias-sinusoidal.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/dynamic-pos-bias.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/enhanced-recurrence.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/fcm.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/ffglu.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/flash-attention.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/gate_values.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/gating.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/length-extrapolation-scale.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/macaron-1.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/macaron-2.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/memory-transformer.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/normformer.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/pia.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/qknorm-analysis.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/resi_dual.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/residual_attn.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/rezero.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/rotary.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/sandwich-2.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/sandwich.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/sandwich_norm.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/scalenorm.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/talking-heads.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/topk-attention.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/images/xval.png RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/train_belief_state.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/train_copy.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/train_enwik8.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/train_length_extrapolate.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/train_parity.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/__init__.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/attend.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/belief_state_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/continuous.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/dpo.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/multi_input.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/neo_mlp.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/nonautoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/xl_autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.2.0 → x_transformers-2.2.2}/x_transformers/xval.py RENAMED Viewed

File without changes

x-transformers 2.2.0__tar.gz → 2.2.2__tar.gz

x-transformers 2.2.0tar.gz → 2.2.2tar.gz