x-transformers 2.2.8__tar.gz → 2.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {x_transformers-2.2.8 → x_transformers-2.2.10}/PKG-INFO +1 -1
  2. {x_transformers-2.2.8 → x_transformers-2.2.10}/pyproject.toml +1 -1
  3. {x_transformers-2.2.8 → x_transformers-2.2.10}/tests/test_x_transformers.py +0 -1
  4. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/entropy_based_tokenizer.py +29 -3
  5. {x_transformers-2.2.8 → x_transformers-2.2.10}/.github/FUNDING.yml +0 -0
  6. {x_transformers-2.2.8 → x_transformers-2.2.10}/.github/workflows/python-publish.yml +0 -0
  7. {x_transformers-2.2.8 → x_transformers-2.2.10}/.github/workflows/python-test.yaml +0 -0
  8. {x_transformers-2.2.8 → x_transformers-2.2.10}/.gitignore +0 -0
  9. {x_transformers-2.2.8 → x_transformers-2.2.10}/LICENSE +0 -0
  10. {x_transformers-2.2.8 → x_transformers-2.2.10}/README.md +0 -0
  11. {x_transformers-2.2.8 → x_transformers-2.2.10}/data/README.md +0 -0
  12. {x_transformers-2.2.8 → x_transformers-2.2.10}/data/enwik8.gz +0 -0
  13. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/all-attention.png +0 -0
  14. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/attention-on-attention.png +0 -0
  15. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/cosine-sim-attention.png +0 -0
  16. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/deepnorm.png +0 -0
  17. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/dynamic-pos-bias-linear.png +0 -0
  18. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/dynamic-pos-bias-log.png +0 -0
  19. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  20. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/dynamic-pos-bias.png +0 -0
  21. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/enhanced-recurrence.png +0 -0
  22. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/fcm.png +0 -0
  23. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/ffglu.png +0 -0
  24. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/flash-attention.png +0 -0
  25. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/gate_values.png +0 -0
  26. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/gating.png +0 -0
  27. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/length-extrapolation-scale.png +0 -0
  28. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/macaron-1.png +0 -0
  29. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/macaron-2.png +0 -0
  30. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/memory-transformer.png +0 -0
  31. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/normformer.png +0 -0
  32. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/pia.png +0 -0
  33. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/qknorm-analysis.png +0 -0
  34. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/resi_dual.png +0 -0
  35. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/residual_attn.png +0 -0
  36. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/rezero.png +0 -0
  37. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/rotary.png +0 -0
  38. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/sandwich-2.png +0 -0
  39. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/sandwich.png +0 -0
  40. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/sandwich_norm.png +0 -0
  41. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/scalenorm.png +0 -0
  42. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/talking-heads.png +0 -0
  43. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/topk-attention.png +0 -0
  44. {x_transformers-2.2.8 → x_transformers-2.2.10}/images/xval.png +0 -0
  45. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_belief_state.py +0 -0
  46. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_copy.py +0 -0
  47. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_enwik8.py +0 -0
  49. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.2.8 → x_transformers-2.2.10}/train_parity.py +0 -0
  51. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/continuous.py +0 -0
  56. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/dpo.py +0 -0
  57. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/multi_input.py +0 -0
  58. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/neo_mlp.py +0 -0
  59. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/nonautoregressive_wrapper.py +0 -0
  60. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/x_transformers.py +0 -0
  61. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  62. {x_transformers-2.2.8 → x_transformers-2.2.10}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.2.8
3
+ Version: 2.2.10
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.2.8"
3
+ version = "2.2.10"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -819,4 +819,3 @@ def test_custom_ff_activation():
819
819
  logits = model(seq)
820
820
 
821
821
  assert logits.shape == (2, 1024, 20000)
822
-
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
  from itertools import zip_longest
2
3
 
3
4
  import torch
@@ -33,12 +34,15 @@ class EntropyBasedTokenizer(Module):
33
34
  def __init__(
34
35
  self,
35
36
  decoder: Module,
36
- entropy_threshold: float
37
+ entropy_threshold: float,
38
+ max_token_size: int | None = None
37
39
  ):
38
40
  super().__init__()
39
41
  self.decoder = decoder
40
42
  self.entropy_threshold = entropy_threshold
41
43
 
44
+ self.max_token_size = max_token_size
45
+
42
46
  @torch.no_grad()
43
47
  def forward(
44
48
  self,
@@ -53,7 +57,7 @@ class EntropyBasedTokenizer(Module):
53
57
  self.decoder.eval()
54
58
 
55
59
  is_var_length = exists(lens)
56
- batch, seq_len, device = *seq.shape, seq.device
60
+ batch, seq_len, device, max_token_size = *seq.shape, seq.device, self.max_token_size
57
61
 
58
62
  arange = torch.arange(seq_len, device = device)
59
63
 
@@ -94,7 +98,29 @@ class EntropyBasedTokenizer(Module):
94
98
  scatter_indices = rearrange(lens - 1, 'b -> b 1')
95
99
  boundaries.scatter_(-1, scatter_indices, True)
96
100
 
97
- num_tokens = boundaries.sum(dim = -1) # number of tokens
101
+ # handle max token size - technique has the flaw that repeating subsequences are grouped into one large token
102
+
103
+ if exists(max_token_size):
104
+ token_ids = boundaries.cumsum(dim = -1)
105
+ token_ids = F.pad(token_ids, (1, -1), value = 0)
106
+
107
+ max_num_tokens = boundaries.sum(dim = -1).amax().item()
108
+ token_ids_seq = torch.arange(max_num_tokens, device = device)
109
+
110
+ token_mask = einx.equal('j, b i -> b j i', token_ids_seq, token_ids)
111
+
112
+ token_sub_seq_arange = token_mask.cumsum(dim = -1)
113
+
114
+ sub_seq_boundaries = (token_sub_seq_arange % max_token_size == 0)
115
+ sub_seq_boundaries = (sub_seq_boundaries & token_mask).any(dim = 1)
116
+
117
+ boundaries = boundaries | sub_seq_boundaries
118
+
119
+ # number of tokens
120
+
121
+ num_tokens = boundaries.sum(dim = -1)
122
+
123
+ # get number of tokens as well as derived indices
98
124
 
99
125
  indices = arange_plus_one[boundaries].split(num_tokens.tolist())
100
126
 
File without changes