x-transformers 1.22.10__py3-none-any.whl → 1.22.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- x_transformers/autoregressive_wrapper.py +4 -15
- x_transformers/x_transformers.py +3 -1
- {x_transformers-1.22.10.dist-info → x_transformers-1.22.12.dist-info}/METADATA +1 -1
- {x_transformers-1.22.10.dist-info → x_transformers-1.22.12.dist-info}/RECORD +7 -7
- {x_transformers-1.22.10.dist-info → x_transformers-1.22.12.dist-info}/LICENSE +0 -0
- {x_transformers-1.22.10.dist-info → x_transformers-1.22.12.dist-info}/WHEEL +0 -0
- {x_transformers-1.22.10.dist-info → x_transformers-1.22.12.dist-info}/top_level.txt +0 -0
@@ -28,8 +28,7 @@ def eval_decorator(fn):
|
|
28
28
|
|
29
29
|
# for variable lengthed prefixes
|
30
30
|
|
31
|
-
def
|
32
|
-
assert left ^ right
|
31
|
+
def align_right(t, lens, pad_id = 0):
|
33
32
|
batch, seq_len, device, dtype = *t.shape, t.device, t.dtype
|
34
33
|
|
35
34
|
assert lens.ndim == 1 and lens.shape[0] == batch
|
@@ -41,14 +40,9 @@ def align(t, lens, pad_id = 0, left = False, right = False):
|
|
41
40
|
batch_arange = torch.arange(batch, device = device, dtype = torch.long)[..., None]
|
42
41
|
prompt_len_arange = torch.arange(seq_len, device = device, dtype = torch.long)
|
43
42
|
|
44
|
-
|
45
|
-
|
46
|
-
offset = pad_lens
|
47
|
-
elif right:
|
48
|
-
padding = (max_pad_len, 0)
|
49
|
-
offset = max_pad_len - pad_lens
|
43
|
+
t = F.pad(t, (max_pad_len, 0), value = 0)
|
44
|
+
offset = max_pad_len - pad_lens
|
50
45
|
|
51
|
-
t = F.pad(t, padding, value = 0)
|
52
46
|
aligned = t[batch_arange, prompt_len_arange + offset[..., None]]
|
53
47
|
return aligned
|
54
48
|
|
@@ -157,7 +151,7 @@ class AutoregressiveWrapper(Module):
|
|
157
151
|
|
158
152
|
seq_start_pos = None
|
159
153
|
if exists(prompt_lens):
|
160
|
-
prompts =
|
154
|
+
prompts = align_right(prompts, prompt_lens, pad_id = self.pad_value)
|
161
155
|
seq_start_pos = t - prompt_lens
|
162
156
|
|
163
157
|
# output from which sampled tokens appended to
|
@@ -244,11 +238,6 @@ class AutoregressiveWrapper(Module):
|
|
244
238
|
out = out.masked_fill(mask, self.pad_value)
|
245
239
|
break
|
246
240
|
|
247
|
-
# if variable lengthed, needs to realign
|
248
|
-
|
249
|
-
if exists(prompt_lens):
|
250
|
-
out = align(out, prompt_lens, pad_id = self.pad_value, left = True)
|
251
|
-
|
252
241
|
out = out[:, t:]
|
253
242
|
|
254
243
|
out, = unpack(out, ps, '* n')
|
x_transformers/x_transformers.py
CHANGED
@@ -434,7 +434,6 @@ class RotaryEmbedding(nn.Module):
|
|
434
434
|
|
435
435
|
freqs = torch.einsum('... i , j -> ... i j', t, self.inv_freq)
|
436
436
|
freqs = torch.cat((freqs, freqs), dim = -1)
|
437
|
-
freqs = rearrange(freqs, '... i j -> ... 1 i j')
|
438
437
|
|
439
438
|
if not exists(self.scale):
|
440
439
|
return freqs, 1.
|
@@ -455,6 +454,9 @@ def apply_rotary_pos_emb(t, freqs, scale = 1):
|
|
455
454
|
rot_dim, seq_len = freqs.shape[-1], t.shape[-2]
|
456
455
|
freqs = freqs[..., -seq_len:, :]
|
457
456
|
|
457
|
+
if t.ndim == 4 and freqs.ndim == 3:
|
458
|
+
freqs = rearrange(freqs, 'b n d -> b 1 n d')
|
459
|
+
|
458
460
|
# partial rotary embeddings, Wang et al. GPT-J
|
459
461
|
t, t_unrotated = t[..., :rot_dim], t[..., rot_dim:]
|
460
462
|
t = (t * freqs.cos() * scale) + (rotate_half(t) * freqs.sin() * scale)
|
@@ -1,12 +1,12 @@
|
|
1
1
|
x_transformers/__init__.py,sha256=FDb654rUx8FpXRd76B8q0diH8I7q-ZjTWEtEJ4UM21Y,701
|
2
2
|
x_transformers/attend.py,sha256=xPa6RjnMDsc1jKliQdThETMTQeRX3ycmAlw5pgzLIf4,12605
|
3
|
-
x_transformers/autoregressive_wrapper.py,sha256=
|
3
|
+
x_transformers/autoregressive_wrapper.py,sha256=uUnwXP2uZ4oJSN4EVXfWQormKWv8c6yzrE5tDZUjSag,8480
|
4
4
|
x_transformers/continuous_autoregressive_wrapper.py,sha256=pTiDqu6JRUlnQJQp_xHATYHy0lgSd6ERLqyiFO3pC-4,1575
|
5
5
|
x_transformers/nonautoregressive_wrapper.py,sha256=AQLE4rA_Kh8VNoe9OzpwyeWson34sRkhks4dn4seNjI,10414
|
6
|
-
x_transformers/x_transformers.py,sha256=
|
6
|
+
x_transformers/x_transformers.py,sha256=mYveA7PqRUZg9-82ALFBpuhTfQirfCF5rxL6EUCdU5I,59075
|
7
7
|
x_transformers/xl_autoregressive_wrapper.py,sha256=DCx4n0_c1tFai4nOqaWVnqx2p9eutsZsDMiMP1ckxNU,4117
|
8
|
-
x_transformers-1.22.
|
9
|
-
x_transformers-1.22.
|
10
|
-
x_transformers-1.22.
|
11
|
-
x_transformers-1.22.
|
12
|
-
x_transformers-1.22.
|
8
|
+
x_transformers-1.22.12.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
9
|
+
x_transformers-1.22.12.dist-info/METADATA,sha256=lissCQf2eUs5Oalsk0PRw0gwTiybipzr9P5YRybgZdQ,662
|
10
|
+
x_transformers-1.22.12.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
11
|
+
x_transformers-1.22.12.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
|
12
|
+
x_transformers-1.22.12.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|