x-transformers 1.42.27__tar.gz → 1.42.28__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {x_transformers-1.42.27/x_transformers.egg-info → x_transformers-1.42.28}/PKG-INFO +1 -1
- {x_transformers-1.42.27 → x_transformers-1.42.28}/setup.py +1 -1
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/x_transformers.py +2 -2
- {x_transformers-1.42.27 → x_transformers-1.42.28/x_transformers.egg-info}/PKG-INFO +1 -1
- {x_transformers-1.42.27 → x_transformers-1.42.28}/LICENSE +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/README.md +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/setup.cfg +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/tests/test_x_transformers.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/__init__.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/attend.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/continuous.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/dpo.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/multi_input.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/xval.py +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers.egg-info/SOURCES.txt +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers.egg-info/dependency_links.txt +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers.egg-info/requires.txt +0 -0
- {x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers.egg-info/top_level.txt +0 -0
@@ -1077,7 +1077,7 @@ class Attention(Module):
|
|
1077
1077
|
logit_softclamp_value = 50.,
|
1078
1078
|
neutreno_value_residual = False, # Nguyen et al. https://arxiv.org/abs/2312.00751
|
1079
1079
|
neutreno_alpha = 0.4,
|
1080
|
-
learned_value_residual_mix =
|
1080
|
+
learned_value_residual_mix = False,
|
1081
1081
|
laser = False, # https://arxiv.org/abs/2411.03493v1
|
1082
1082
|
laser_softclamp_value = 15.,
|
1083
1083
|
onnxable = False,
|
@@ -1584,7 +1584,7 @@ class AttentionLayers(Module):
|
|
1584
1584
|
unet_skips = False,
|
1585
1585
|
reinject_input = False, # seen first in DEQ paper https://arxiv.org/abs/1909.01377, but later used in a number of papers trying to achieve depthwise generalization https://arxiv.org/abs/2410.03020v1
|
1586
1586
|
add_value_residual = False, # resformer from Zhou et al - https://arxiv.org/abs/2410.17897v1
|
1587
|
-
learned_value_residual_mix =
|
1587
|
+
learned_value_residual_mix = True, # seeing big improvements when the value residual mix value is learned per token - credit goes to @faresobeid for taking the first step with learned scalar mix, then @Blinkdl for taking it a step further with data dependent. here we will use per token learned
|
1588
1588
|
rel_pos_kwargs: dict = dict(),
|
1589
1589
|
**kwargs
|
1590
1590
|
):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/nonautoregressive_wrapper.py
RENAMED
File without changes
|
{x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers/xl_autoregressive_wrapper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{x_transformers-1.42.27 → x_transformers-1.42.28}/x_transformers.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|