x-transformers 2.2.2__tar.gz → 2.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {x_transformers-2.2.2 → x_transformers-2.2.4}/PKG-INFO +1 -1
  2. {x_transformers-2.2.2 → x_transformers-2.2.4}/pyproject.toml +1 -1
  3. {x_transformers-2.2.2 → x_transformers-2.2.4}/tests/test_x_transformers.py +2 -0
  4. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/entropy_based_tokenizer.py +15 -8
  5. {x_transformers-2.2.2 → x_transformers-2.2.4}/.github/FUNDING.yml +0 -0
  6. {x_transformers-2.2.2 → x_transformers-2.2.4}/.github/workflows/python-publish.yml +0 -0
  7. {x_transformers-2.2.2 → x_transformers-2.2.4}/.github/workflows/python-test.yaml +0 -0
  8. {x_transformers-2.2.2 → x_transformers-2.2.4}/.gitignore +0 -0
  9. {x_transformers-2.2.2 → x_transformers-2.2.4}/LICENSE +0 -0
  10. {x_transformers-2.2.2 → x_transformers-2.2.4}/README.md +0 -0
  11. {x_transformers-2.2.2 → x_transformers-2.2.4}/data/README.md +0 -0
  12. {x_transformers-2.2.2 → x_transformers-2.2.4}/data/enwik8.gz +0 -0
  13. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/all-attention.png +0 -0
  14. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/attention-on-attention.png +0 -0
  15. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/cosine-sim-attention.png +0 -0
  16. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/deepnorm.png +0 -0
  17. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/dynamic-pos-bias-linear.png +0 -0
  18. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/dynamic-pos-bias-log.png +0 -0
  19. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  20. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/dynamic-pos-bias.png +0 -0
  21. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/enhanced-recurrence.png +0 -0
  22. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/fcm.png +0 -0
  23. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/ffglu.png +0 -0
  24. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/flash-attention.png +0 -0
  25. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/gate_values.png +0 -0
  26. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/gating.png +0 -0
  27. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/length-extrapolation-scale.png +0 -0
  28. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/macaron-1.png +0 -0
  29. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/macaron-2.png +0 -0
  30. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/memory-transformer.png +0 -0
  31. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/normformer.png +0 -0
  32. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/pia.png +0 -0
  33. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/qknorm-analysis.png +0 -0
  34. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/resi_dual.png +0 -0
  35. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/residual_attn.png +0 -0
  36. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/rezero.png +0 -0
  37. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/rotary.png +0 -0
  38. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/sandwich-2.png +0 -0
  39. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/sandwich.png +0 -0
  40. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/sandwich_norm.png +0 -0
  41. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/scalenorm.png +0 -0
  42. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/talking-heads.png +0 -0
  43. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/topk-attention.png +0 -0
  44. {x_transformers-2.2.2 → x_transformers-2.2.4}/images/xval.png +0 -0
  45. {x_transformers-2.2.2 → x_transformers-2.2.4}/train_belief_state.py +0 -0
  46. {x_transformers-2.2.2 → x_transformers-2.2.4}/train_copy.py +0 -0
  47. {x_transformers-2.2.2 → x_transformers-2.2.4}/train_enwik8.py +0 -0
  48. {x_transformers-2.2.2 → x_transformers-2.2.4}/train_length_extrapolate.py +0 -0
  49. {x_transformers-2.2.2 → x_transformers-2.2.4}/train_parity.py +0 -0
  50. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/__init__.py +0 -0
  51. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/attend.py +0 -0
  52. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/autoregressive_wrapper.py +0 -0
  53. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/belief_state_wrapper.py +0 -0
  54. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/continuous.py +0 -0
  55. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/dpo.py +0 -0
  56. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/multi_input.py +0 -0
  57. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/neo_mlp.py +0 -0
  58. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/nonautoregressive_wrapper.py +0 -0
  59. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/x_transformers.py +0 -0
  60. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  61. {x_transformers-2.2.2 → x_transformers-2.2.4}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.2.2
3
+ Version: 2.2.4
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.2.2"
3
+ version = "2.2.4"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -797,3 +797,5 @@ def test_entropy_based_tokenizer(
797
797
  segmented_seq = tokenizer(seq, lens, return_segmented_seq = True)
798
798
 
799
799
  assert len(segmented_seq) == seq.shape[0]
800
+
801
+ tokenizer(seq[0]) # able to handle without batch dim
@@ -9,7 +9,7 @@ from torch.nn.utils.rnn import pad_sequence
9
9
  from x_transformers.x_transformers import Decoder, TransformerWrapper
10
10
 
11
11
  import einx
12
- from einops import repeat, rearrange
12
+ from einops import repeat, rearrange, pack, unpack
13
13
 
14
14
  # helper functions
15
15
 
@@ -39,10 +39,13 @@ class EntropyBasedTokenizer(Module):
39
39
  @torch.no_grad()
40
40
  def forward(
41
41
  self,
42
- seq,
43
- lens = None, # Int['b']
42
+ seq, # Float['b n'] | Float['n']
43
+ lens = None, # Int['b']
44
44
  return_segmented_seq = False
45
45
  ):
46
+ no_batch_dim = seq.ndim == 1
47
+ seq, maybe_batch_ps = pack((seq,), '* n')
48
+
46
49
  self.decoder.eval()
47
50
 
48
51
  is_var_length = exists(lens)
@@ -89,15 +92,15 @@ class EntropyBasedTokenizer(Module):
89
92
 
90
93
  num_tokens = boundaries.sum(dim = -1) # number of tokens
91
94
 
92
- boundaries = arange_plus_one[boundaries].split(num_tokens.tolist())
95
+ indices = arange_plus_one[boundaries].split(num_tokens.tolist())
93
96
 
94
97
  # get the token lengths
95
98
 
96
99
  token_lengths = []
97
100
 
98
- for one_boundary in boundaries:
99
- padded_boundary = F.pad(one_boundary, (1, 0), value = 0.)
100
- one_token_lengths = padded_boundary[1:] - padded_boundary[:-1]
101
+ for one_indices in indices:
102
+ padded_indices = F.pad(one_indices, (1, 0), value = 0.)
103
+ one_token_lengths = padded_indices[1:] - padded_indices[:-1]
101
104
 
102
105
  token_lengths.append(one_token_lengths)
103
106
 
@@ -106,6 +109,8 @@ class EntropyBasedTokenizer(Module):
106
109
  # early return
107
110
 
108
111
  if not return_segmented_seq:
112
+ token_lengths, = unpack(token_lengths, maybe_batch_ps, '* num_tokens')
113
+
109
114
  return token_lengths
110
115
 
111
116
  # segment the sequence based on the token lengths
@@ -120,8 +125,10 @@ class EntropyBasedTokenizer(Module):
120
125
 
121
126
  one_token_length = one_token_length[one_token_length > 0]
122
127
 
123
- print(one_seq.shape, one_token_length)
124
128
  splitted_seq = one_seq.split(one_token_length.tolist())
125
129
  segmented_seq.append(splitted_seq)
126
130
 
131
+ if no_batch_dim:
132
+ segmented_seq = segmented_seq[0]
133
+
127
134
  return segmented_seq
File without changes
File without changes