x-transformers 2.2.8__tar.gz → 2.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {x_transformers-2.2.8 → x_transformers-2.2.9}/PKG-INFO +1 -1
  2. {x_transformers-2.2.8 → x_transformers-2.2.9}/pyproject.toml +1 -1
  3. {x_transformers-2.2.8 → x_transformers-2.2.9}/tests/test_x_transformers.py +0 -1
  4. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/entropy_based_tokenizer.py +28 -3
  5. {x_transformers-2.2.8 → x_transformers-2.2.9}/.github/FUNDING.yml +0 -0
  6. {x_transformers-2.2.8 → x_transformers-2.2.9}/.github/workflows/python-publish.yml +0 -0
  7. {x_transformers-2.2.8 → x_transformers-2.2.9}/.github/workflows/python-test.yaml +0 -0
  8. {x_transformers-2.2.8 → x_transformers-2.2.9}/.gitignore +0 -0
  9. {x_transformers-2.2.8 → x_transformers-2.2.9}/LICENSE +0 -0
  10. {x_transformers-2.2.8 → x_transformers-2.2.9}/README.md +0 -0
  11. {x_transformers-2.2.8 → x_transformers-2.2.9}/data/README.md +0 -0
  12. {x_transformers-2.2.8 → x_transformers-2.2.9}/data/enwik8.gz +0 -0
  13. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/all-attention.png +0 -0
  14. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/attention-on-attention.png +0 -0
  15. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/cosine-sim-attention.png +0 -0
  16. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/deepnorm.png +0 -0
  17. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/dynamic-pos-bias-linear.png +0 -0
  18. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/dynamic-pos-bias-log.png +0 -0
  19. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  20. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/dynamic-pos-bias.png +0 -0
  21. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/enhanced-recurrence.png +0 -0
  22. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/fcm.png +0 -0
  23. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/ffglu.png +0 -0
  24. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/flash-attention.png +0 -0
  25. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/gate_values.png +0 -0
  26. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/gating.png +0 -0
  27. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/length-extrapolation-scale.png +0 -0
  28. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/macaron-1.png +0 -0
  29. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/macaron-2.png +0 -0
  30. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/memory-transformer.png +0 -0
  31. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/normformer.png +0 -0
  32. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/pia.png +0 -0
  33. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/qknorm-analysis.png +0 -0
  34. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/resi_dual.png +0 -0
  35. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/residual_attn.png +0 -0
  36. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/rezero.png +0 -0
  37. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/rotary.png +0 -0
  38. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/sandwich-2.png +0 -0
  39. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/sandwich.png +0 -0
  40. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/sandwich_norm.png +0 -0
  41. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/scalenorm.png +0 -0
  42. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/talking-heads.png +0 -0
  43. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/topk-attention.png +0 -0
  44. {x_transformers-2.2.8 → x_transformers-2.2.9}/images/xval.png +0 -0
  45. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_belief_state.py +0 -0
  46. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_copy.py +0 -0
  47. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_enwik8.py +0 -0
  49. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.2.8 → x_transformers-2.2.9}/train_parity.py +0 -0
  51. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/continuous.py +0 -0
  56. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/dpo.py +0 -0
  57. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/multi_input.py +0 -0
  58. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/neo_mlp.py +0 -0
  59. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/nonautoregressive_wrapper.py +0 -0
  60. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/x_transformers.py +0 -0
  61. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  62. {x_transformers-2.2.8 → x_transformers-2.2.9}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.2.8
3
+ Version: 2.2.9
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.2.8"
3
+ version = "2.2.9"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -819,4 +819,3 @@ def test_custom_ff_activation():
819
819
  logits = model(seq)
820
820
 
821
821
  assert logits.shape == (2, 1024, 20000)
822
-
@@ -33,12 +33,15 @@ class EntropyBasedTokenizer(Module):
33
33
  def __init__(
34
34
  self,
35
35
  decoder: Module,
36
- entropy_threshold: float
36
+ entropy_threshold: float,
37
+ max_token_size: int | None = None
37
38
  ):
38
39
  super().__init__()
39
40
  self.decoder = decoder
40
41
  self.entropy_threshold = entropy_threshold
41
42
 
43
+ self.max_token_size = max_token_size
44
+
42
45
  @torch.no_grad()
43
46
  def forward(
44
47
  self,
@@ -53,7 +56,7 @@ class EntropyBasedTokenizer(Module):
53
56
  self.decoder.eval()
54
57
 
55
58
  is_var_length = exists(lens)
56
- batch, seq_len, device = *seq.shape, seq.device
59
+ batch, seq_len, device, max_token_size = *seq.shape, seq.device, self.max_token_size
57
60
 
58
61
  arange = torch.arange(seq_len, device = device)
59
62
 
@@ -94,7 +97,29 @@ class EntropyBasedTokenizer(Module):
94
97
  scatter_indices = rearrange(lens - 1, 'b -> b 1')
95
98
  boundaries.scatter_(-1, scatter_indices, True)
96
99
 
97
- num_tokens = boundaries.sum(dim = -1) # number of tokens
100
+ # handle max token size - technique has the flaw that repeating subsequences are grouped into one large token
101
+
102
+ if exists(max_token_size):
103
+ token_ids = boundaries.cumsum(dim = -1)
104
+ token_ids = F.pad(token_ids, (1, -1), value = 0)
105
+
106
+ max_num_tokens = boundaries.sum(dim = -1).amax().item()
107
+ token_ids_seq = torch.arange(max_num_tokens, device = device)
108
+
109
+ token_mask = einx.equal('j, b i -> b j i', token_ids_seq, token_ids)
110
+
111
+ token_sub_seq_arange = token_mask.cumsum(dim = -1)
112
+
113
+ sub_seq_boundaries = (token_sub_seq_arange % max_token_size == 0)
114
+ sub_seq_boundaries = (sub_seq_boundaries & token_mask).any(dim = 1)
115
+
116
+ boundaries = boundaries | sub_seq_boundaries
117
+
118
+ # number of tokens
119
+
120
+ num_tokens = boundaries.sum(dim = -1)
121
+
122
+ # get number of tokens as well as derived indices
98
123
 
99
124
  indices = arange_plus_one[boundaries].split(num_tokens.tolist())
100
125
 
File without changes
File without changes