x-transformers 1.42.22__tar.gz → 1.42.24__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (22) hide show
  1. {x_transformers-1.42.22/x_transformers.egg-info → x_transformers-1.42.24}/PKG-INFO +1 -1
  2. {x_transformers-1.42.22 → x_transformers-1.42.24}/setup.py +1 -1
  3. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/x_transformers.py +5 -5
  4. {x_transformers-1.42.22 → x_transformers-1.42.24/x_transformers.egg-info}/PKG-INFO +1 -1
  5. {x_transformers-1.42.22 → x_transformers-1.42.24}/LICENSE +0 -0
  6. {x_transformers-1.42.22 → x_transformers-1.42.24}/README.md +0 -0
  7. {x_transformers-1.42.22 → x_transformers-1.42.24}/setup.cfg +0 -0
  8. {x_transformers-1.42.22 → x_transformers-1.42.24}/tests/test_x_transformers.py +0 -0
  9. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/__init__.py +0 -0
  10. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/attend.py +0 -0
  11. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/autoregressive_wrapper.py +0 -0
  12. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/continuous.py +0 -0
  13. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/dpo.py +0 -0
  14. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/multi_input.py +0 -0
  15. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/neo_mlp.py +0 -0
  16. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/nonautoregressive_wrapper.py +0 -0
  17. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  18. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers/xval.py +0 -0
  19. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers.egg-info/SOURCES.txt +0 -0
  20. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers.egg-info/dependency_links.txt +0 -0
  21. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers.egg-info/requires.txt +0 -0
  22. {x_transformers-1.42.22 → x_transformers-1.42.24}/x_transformers.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: x-transformers
3
- Version: 1.42.22
3
+ Version: 1.42.24
4
4
  Summary: X-Transformers - Pytorch
5
5
  Home-page: https://github.com/lucidrains/x-transformers
6
6
  Author: Phil Wang
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
  setup(
4
4
  name = 'x-transformers',
5
5
  packages = find_packages(exclude=['examples']),
6
- version = '1.42.22',
6
+ version = '1.42.24',
7
7
  license='MIT',
8
8
  description = 'X-Transformers - Pytorch',
9
9
  author = 'Phil Wang',
@@ -1079,6 +1079,7 @@ class Attention(Module):
1079
1079
  neutreno_alpha = 0.4,
1080
1080
  learned_value_residual_mix = False,
1081
1081
  laser = False, # https://arxiv.org/abs/2411.03493v1
1082
+ laser_softclamp_value = 15.,
1082
1083
  onnxable = False,
1083
1084
  attend_sdp_kwargs: dict = dict(
1084
1085
  enable_flash = True,
@@ -1119,9 +1120,9 @@ class Attention(Module):
1119
1120
  self.to_v = LinearNoBias(dim_kv, v_dim) if not shared_kv else None
1120
1121
 
1121
1122
  # enhancing gradients to attention through exponentiated values
1122
- # todo - compare it to `attn = attn * large_value + attn.detach() * (1. - large_value)`
1123
1123
 
1124
1124
  self.laser = laser
1125
+ self.laser_softclamp_value = laser_softclamp_value
1125
1126
 
1126
1127
  # relations projection from tp-attention
1127
1128
 
@@ -1449,8 +1450,7 @@ class Attention(Module):
1449
1450
  attn_bias = pad_at_dim(attn_bias, (num_mem_kv, 0))
1450
1451
 
1451
1452
  if self.laser:
1452
- values_max = v.amax(dim = -2, keepdim = True).detach() # numerical stability
1453
- v = v - values_max
1453
+ v = softclamp(v, self.laser_softclamp_value)
1454
1454
  v = v.exp()
1455
1455
 
1456
1456
  # attention is all we need
@@ -1465,7 +1465,7 @@ class Attention(Module):
1465
1465
  # laser
1466
1466
 
1467
1467
  if self.laser:
1468
- out = log(out) + values_max
1468
+ out = log(out)
1469
1469
 
1470
1470
  # store the values for resformer or Neutreno
1471
1471
 
@@ -1849,7 +1849,7 @@ class AttentionLayers(Module):
1849
1849
  is_first_self_attn = False
1850
1850
  elif layer_type == 'c':
1851
1851
  cross_attn_learned_value_residual = learned_value_residual_mix and not is_first_cross_attn
1852
- layer = Attention(dim, heads = heads, learned_value_residual_mix = cross_attn_learned_value_residual, **{**attn_kwargs, **cross_attn_kwargs})
1852
+ layer = Attention(dim, heads = heads, **{**attn_kwargs, **cross_attn_kwargs})
1853
1853
  is_first_cross_attn = False
1854
1854
  elif layer_type == 'f':
1855
1855
  layer = FeedForward(dim, **ff_kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: x-transformers
3
- Version: 1.42.22
3
+ Version: 1.42.24
4
4
  Summary: X-Transformers - Pytorch
5
5
  Home-page: https://github.com/lucidrains/x-transformers
6
6
  Author: Phil Wang