x-transformers 1.42.25__py3-none-any.whl → 1.42.27__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- x_transformers/x_transformers.py +4 -5
- {x_transformers-1.42.25.dist-info → x_transformers-1.42.27.dist-info}/METADATA +1 -1
- {x_transformers-1.42.25.dist-info → x_transformers-1.42.27.dist-info}/RECORD +6 -6
- {x_transformers-1.42.25.dist-info → x_transformers-1.42.27.dist-info}/LICENSE +0 -0
- {x_transformers-1.42.25.dist-info → x_transformers-1.42.27.dist-info}/WHEEL +0 -0
- {x_transformers-1.42.25.dist-info → x_transformers-1.42.27.dist-info}/top_level.txt +0 -0
x_transformers/x_transformers.py
CHANGED
@@ -51,8 +51,8 @@ def default(val, d):
|
|
51
51
|
return val
|
52
52
|
return d() if callable(d) else d
|
53
53
|
|
54
|
-
def first(it):
|
55
|
-
return it[0]
|
54
|
+
def first(it, default = None):
|
55
|
+
return it[0] if len(it) > 0 else default
|
56
56
|
|
57
57
|
def is_empty(x):
|
58
58
|
return len(x) == 0
|
@@ -1077,7 +1077,7 @@ class Attention(Module):
|
|
1077
1077
|
logit_softclamp_value = 50.,
|
1078
1078
|
neutreno_value_residual = False, # Nguyen et al. https://arxiv.org/abs/2312.00751
|
1079
1079
|
neutreno_alpha = 0.4,
|
1080
|
-
learned_value_residual_mix =
|
1080
|
+
learned_value_residual_mix = True,
|
1081
1081
|
laser = False, # https://arxiv.org/abs/2411.03493v1
|
1082
1082
|
laser_softclamp_value = 15.,
|
1083
1083
|
onnxable = False,
|
@@ -1357,7 +1357,6 @@ class Attention(Module):
|
|
1357
1357
|
k = k * self.qk_norm_k_scale
|
1358
1358
|
|
1359
1359
|
if exists(rotary_pos_emb):
|
1360
|
-
|
1361
1360
|
freqs, xpos_scale = rotary_pos_emb
|
1362
1361
|
q_xpos_scale, k_xpos_scale = (xpos_scale, xpos_scale ** -1.) if exists(xpos_scale) else (1., 1.)
|
1363
1362
|
|
@@ -1989,7 +1988,7 @@ class AttentionLayers(Module):
|
|
1989
1988
|
|
1990
1989
|
if exists(self.rotary_pos_emb):
|
1991
1990
|
if not exists(rotary_pos_emb):
|
1992
|
-
maybe_mem = mems
|
1991
|
+
maybe_mem = first(mems, None) # todo - handle edge case where different layers get different memory lengths. don't think this will ever come up but who knows
|
1993
1992
|
mem_len = maybe_mem.shape[1] if exists(maybe_mem) else 0
|
1994
1993
|
|
1995
1994
|
if not exists(pos):
|
@@ -6,11 +6,11 @@ x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
|
|
6
6
|
x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
|
7
7
|
x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
|
8
8
|
x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
|
9
|
-
x_transformers/x_transformers.py,sha256=
|
9
|
+
x_transformers/x_transformers.py,sha256=mLAqXQuZynqueJDkTEBs-kE9Uk8mSq_DF8UG9oY65Ns,96695
|
10
10
|
x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
|
11
11
|
x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
|
12
|
-
x_transformers-1.42.
|
13
|
-
x_transformers-1.42.
|
14
|
-
x_transformers-1.42.
|
15
|
-
x_transformers-1.42.
|
16
|
-
x_transformers-1.42.
|
12
|
+
x_transformers-1.42.27.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
13
|
+
x_transformers-1.42.27.dist-info/METADATA,sha256=g6KI8a3WyHUyq9w5Tq3aQatgr89gpc5IMZ5c1zAGlHU,739
|
14
|
+
x_transformers-1.42.27.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
15
|
+
x_transformers-1.42.27.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
|
16
|
+
x_transformers-1.42.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|