x-transformers 1.30.3__py3-none-any.whl → 1.30.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- x_transformers/attend.py +1 -0
- x_transformers/x_transformers.py +5 -1
- {x_transformers-1.30.3.dist-info → x_transformers-1.30.6.dist-info}/METADATA +1 -1
- {x_transformers-1.30.3.dist-info → x_transformers-1.30.6.dist-info}/RECORD +7 -7
- {x_transformers-1.30.3.dist-info → x_transformers-1.30.6.dist-info}/LICENSE +0 -0
- {x_transformers-1.30.3.dist-info → x_transformers-1.30.6.dist-info}/WHEEL +0 -0
- {x_transformers-1.30.3.dist-info → x_transformers-1.30.6.dist-info}/top_level.txt +0 -0
x_transformers/attend.py
CHANGED
@@ -22,6 +22,7 @@ class Intermediates:
|
|
22
22
|
pre_softmax_attn: Tensor | None = None
|
23
23
|
post_softmax_attn: Tensor | None = None
|
24
24
|
cached_kv: Tuple[Tensor, Tensor] | None = None
|
25
|
+
layer_type: str | None = None
|
25
26
|
|
26
27
|
def to_tuple(self):
|
27
28
|
return (self.qk_similarities, self.pre_softmax_attn, self.post_softmax_attn)
|
x_transformers/x_transformers.py
CHANGED
@@ -868,7 +868,7 @@ class Attention(Module):
|
|
868
868
|
|
869
869
|
k, v, r = map(lambda t: maybe(rearrange)(t, 'b n (h d) -> b h n d', h = kv_h), (k, v, r))
|
870
870
|
|
871
|
-
if exists(cache)
|
871
|
+
if exists(cache):
|
872
872
|
ck, cv = cache.cached_kv
|
873
873
|
|
874
874
|
if exists(mem):
|
@@ -1338,6 +1338,9 @@ class AttentionLayers(Module):
|
|
1338
1338
|
if exists(cache):
|
1339
1339
|
assert not self.training and self.causal and not any([*map(exists, (mask, attn_mask))])
|
1340
1340
|
|
1341
|
+
if exists(context):
|
1342
|
+
context = context[:, :0]
|
1343
|
+
|
1341
1344
|
if cache_age > 0:
|
1342
1345
|
x = x[:, -cache_age:] # for spec decoding, may be greater than 1
|
1343
1346
|
|
@@ -1407,6 +1410,7 @@ class AttentionLayers(Module):
|
|
1407
1410
|
x = residual_fn(out, inner_residual)
|
1408
1411
|
|
1409
1412
|
if layer_type in ('a', 'c') and return_hiddens:
|
1413
|
+
inter.layer_type = layer_type
|
1410
1414
|
intermediates.append(inter)
|
1411
1415
|
|
1412
1416
|
if layer_type == 'a' and self.residual_attn:
|
@@ -1,14 +1,14 @@
|
|
1
1
|
x_transformers/__init__.py,sha256=8LQl-dNL6vj8VHRx5LMSOlRDTXQvYOuM21PDXz8WdiI,703
|
2
|
-
x_transformers/attend.py,sha256=
|
2
|
+
x_transformers/attend.py,sha256=8opOeCQddi440WcH73B_wB5vtL0jaEQwBL-DIWq2lCs,10713
|
3
3
|
x_transformers/autoregressive_wrapper.py,sha256=uX8Mb0zLsQrZECt_9UGt35g7tC05Rk3nPqO6xp2FFCc,9619
|
4
4
|
x_transformers/continuous.py,sha256=WO52n9lFAXv5-SGadi2cApGF8dkouN8QSTEOuC7erj8,6180
|
5
5
|
x_transformers/dpo.py,sha256=LjvWgCkqTl-UuehrzQ8nkX5guLr4whYwsmm7SKSwdls,3450
|
6
6
|
x_transformers/nonautoregressive_wrapper.py,sha256=ys_p8obc7lTeeodCqvkRKxOXQ1C9T3j5Jwr-JbVgnXk,10432
|
7
|
-
x_transformers/x_transformers.py,sha256=
|
7
|
+
x_transformers/x_transformers.py,sha256=pXckFcDL6kTghYEUjIamZiR5H8dV6aIEPQTIYAGgqxA,66388
|
8
8
|
x_transformers/xl_autoregressive_wrapper.py,sha256=DCx4n0_c1tFai4nOqaWVnqx2p9eutsZsDMiMP1ckxNU,4117
|
9
9
|
x_transformers/xval.py,sha256=QE1ltYZTR_eGgIHPP2BrMWVWVLqMW-OpDZh87BSmQEg,8563
|
10
|
-
x_transformers-1.30.
|
11
|
-
x_transformers-1.30.
|
12
|
-
x_transformers-1.30.
|
13
|
-
x_transformers-1.30.
|
14
|
-
x_transformers-1.30.
|
10
|
+
x_transformers-1.30.6.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
11
|
+
x_transformers-1.30.6.dist-info/METADATA,sha256=1Xq9oSctaCQ5TOpdM3j6lJENYStLuda5VqEzOtq1B0c,661
|
12
|
+
x_transformers-1.30.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
13
|
+
x_transformers-1.30.6.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
|
14
|
+
x_transformers-1.30.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|