x-transformers 2.1.1__py3-none-any.whl → 2.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ # Belief State Transformer
2
+
3
+ # https://arxiv.org/abs/2410.23506
4
+ # https://www.youtube.com/watch?v=aqhbRtB2Fyg
5
+
6
+ import torch
7
+ from torch.autograd import Function
8
+ from torch.nn import Module, ModuleList
9
+ from torch import nn, cat, stack, arange, cartesian_prod
10
+ import torch.nn.functional as F
11
+
12
+ from x_transformers.x_transformers import (
13
+ Decoder,
14
+ TransformerWrapper
15
+ )
16
+
17
+ import einx
18
+ from einops import rearrange, repeat
19
+
20
+ # helper functions
21
+
22
+ def exists(v):
23
+ return v is not None
24
+
25
+ def default(v, d):
26
+ return v if exists(v) else d
27
+
28
+ # wrappers
29
+
30
+ class BeliefStateWrapper(Module):
31
+ """
32
+ Figure 13. in https://arxiv.org/abs/2410.23506
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ forward_decoder: TransformerWrapper,
38
+ backward_decoder: TransformerWrapper
39
+ ):
40
+ super().__init__()
41
+ assert forward_decoder.emb_dim == backward_decoder.emb_dim, 'forward and backwards model must have the same embedding dimension'
42
+ assert forward_decoder.num_tokens == backward_decoder.num_tokens, 'forward and backwards model must have the same embedding dimension'
43
+
44
+ dim = forward_decoder.emb_dim
45
+ num_tokens = forward_decoder.num_tokens
46
+
47
+ # the suffix token
48
+
49
+ self.suffix_token = nn.Parameter(torch.zeros(dim))
50
+
51
+ # the text prediction head, which predicts for the combinations of prefix and suffix the next and previous token for forwards and backward sequences
52
+
53
+ self.text_head = nn.Sequential(
54
+ nn.Linear(dim * 2, dim),
55
+ nn.LeakyReLU(),
56
+ nn.Linear(dim, num_tokens * 2),
57
+ )
58
+
59
+ # the two decoders, one which is causal forward, the other causal backwards
60
+
61
+ self.forward_decoder = forward_decoder
62
+ self.backward_decoder = backward_decoder
63
+
64
+ def forward(
65
+ self,
66
+ seq
67
+ ):
68
+ batch, seq_len, device = *seq.shape, seq.device
69
+
70
+ # forward autoregressive
71
+
72
+ forward_embeds = self.forward_decoder(seq, return_embeddings = True)
73
+
74
+ # backward autoregressive
75
+
76
+ backward_seq = seq.flip(1)
77
+
78
+ suffix_tokens = repeat(self.suffix_token, 'd -> b 1 d', b = batch)
79
+
80
+ backward_embeds = self.backward_decoder(
81
+ backward_seq,
82
+ prepend_embeds = suffix_tokens,
83
+ return_embeddings = True
84
+ )
85
+
86
+ backward_embeds = backward_embeds.flip(1)
87
+
88
+ # trick to reduce memory on backwards pass
89
+
90
+ orig_forward_embeds, forward_embeds = forward_embeds, forward_embeds.detach()
91
+ orig_backward_embeds, backward_embeds = backward_embeds, backward_embeds.detach()
92
+
93
+ forward_embeds.requires_grad_()
94
+ backward_embeds.requires_grad_()
95
+
96
+ # belief state objective
97
+
98
+ seq_arange = arange(seq_len, device = device)
99
+
100
+ fb_pairs = cartesian_prod(seq_arange, seq_arange)
101
+
102
+ # filter down to valid pairs, as in figure 11
103
+ # f - forward, b - backward, i - indices
104
+
105
+ fi, bi = fb_pairs.unbind(dim = -1)
106
+ valid_mask = (bi - fi) >= 2
107
+
108
+ fb_pairs = fb_pairs[valid_mask]
109
+
110
+ # get labels for both
111
+
112
+ fi, bi = fb_pairs.unbind(dim = -1)
113
+
114
+ labels_fi, labels_bi = (fi + 1), bi
115
+
116
+ forward_labels, backward_labels = seq[:, fi], seq[:, bi]
117
+ labels = stack((forward_labels, backward_labels), dim = -1)
118
+
119
+ # get the forward and backward embedding pairs and feed them through the text head for both forward and backward predictions
120
+
121
+ fb_embeds = cat((
122
+ forward_embeds[:, fi],
123
+ backward_embeds[:, bi]
124
+ ), dim = -1)
125
+
126
+ logits = self.text_head(fb_embeds)
127
+
128
+ # cross entropy loss
129
+
130
+ fb_loss = F.cross_entropy(
131
+ rearrange(logits, 'b n (fb l) -> b l (fb n)', fb = 2),
132
+ rearrange(labels, 'b n fb -> b (fb n)')
133
+ )
134
+
135
+ # backwards
136
+
137
+ fb_loss.backward()
138
+
139
+ orig_forward_embeds.backward(forward_embeds.grad)
140
+ orig_backward_embeds.backward(backward_embeds.grad)
141
+
142
+ return fb_loss
@@ -970,6 +970,7 @@ class DynamicLIMe(Module):
970
970
  dim,
971
971
  num_layers,
972
972
  num_views = 1,
973
+ norm = True,
973
974
  use_softmax = True
974
975
  ):
975
976
  super().__init__()
@@ -977,6 +978,7 @@ class DynamicLIMe(Module):
977
978
  self.multiple_views = num_views > 1
978
979
 
979
980
  self.to_weights = Sequential(
981
+ RMSNorm(dim) if norm else None,
980
982
  nn.Linear(dim, num_views * num_layers),
981
983
  Rearrange('... (views layers) -> views ... layers', views = num_views),
982
984
  nn.Softmax(dim = -1) if use_softmax else nn.ReLU()
@@ -987,6 +989,7 @@ class DynamicLIMe(Module):
987
989
  x,
988
990
  hiddens
989
991
  ):
992
+
990
993
  if not is_tensor(hiddens):
991
994
  hiddens = stack(hiddens)
992
995
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.1.1
3
+ Version: 2.1.4
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -41,7 +41,6 @@ Requires-Dist: packaging>=21.0
41
41
  Requires-Dist: torch>=2.0
42
42
  Provides-Extra: examples
43
43
  Requires-Dist: lion-pytorch; extra == 'examples'
44
- Requires-Dist: torchvision; extra == 'examples'
45
44
  Requires-Dist: tqdm; extra == 'examples'
46
45
  Provides-Extra: test
47
46
  Requires-Dist: pytest; extra == 'test'
@@ -2436,4 +2435,13 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
2436
2435
  }
2437
2436
  ```
2438
2437
 
2438
+ ```bibtex
2439
+ @inproceedings{Hu2024TheBS,
2440
+ title = {The Belief State Transformer},
2441
+ author = {Edward S. Hu and Kwangjun Ahn and Qinghua Liu and Haoran Xu and Manan Tomar and Ada Langford and Dinesh Jayaraman and Alex Lamb and John Langford},
2442
+ year = {2024},
2443
+ url = {https://api.semanticscholar.org/CorpusID:273707334}
2444
+ }
2445
+ ```
2446
+
2439
2447
  *solve intelligence... then use that to solve everything else.* - Demis Hassabis
@@ -1,15 +1,16 @@
1
1
  x_transformers/__init__.py,sha256=l0dom8ZYkRzFvnDdgzDboXqrI1tKav3beVE7TN2nHko,844
2
2
  x_transformers/attend.py,sha256=-5BWWhFsp7tvZTdN91Ay5SqOjyj9uOs-122vFvoO6b4,17253
3
3
  x_transformers/autoregressive_wrapper.py,sha256=reLCno9Z9pchVU79tBF8OMo21LwSZ67KAeB83jqkyAc,10505
4
+ x_transformers/belief_state.py,sha256=5E_08m6kvXROYgROazTJFxuUsDPNjIVM3AJxg3CJNmU,3966
4
5
  x_transformers/continuous.py,sha256=p0sCAiH1na236ygwgL1Yyhu36eZBf9cZvoW1JyP_fFE,7073
5
6
  x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
6
7
  x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
7
8
  x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
8
9
  x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
9
- x_transformers/x_transformers.py,sha256=bIlP-NHj0SB2joklpxicoaD1HVpRMGIulMF8WYEsOAQ,110076
10
+ x_transformers/x_transformers.py,sha256=-80N4sqUr3sR51Ms4wCfc4jhxnPwf0ApNR4xfIsasfQ,110142
10
11
  x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
11
12
  x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
12
- x_transformers-2.1.1.dist-info/METADATA,sha256=BBGKnocyDvj_ynWM5dtrbyX1iodI4eWEnn9TWrw38kc,87275
13
- x_transformers-2.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- x_transformers-2.1.1.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
15
- x_transformers-2.1.1.dist-info/RECORD,,
13
+ x_transformers-2.1.4.dist-info/METADATA,sha256=-jme9jyXVeVlo1T7nPi2iGFfoxfjSwdj2D33_dr4yxQ,87570
14
+ x_transformers-2.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
15
+ x_transformers-2.1.4.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
16
+ x_transformers-2.1.4.dist-info/RECORD,,