x-transformers 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -970,6 +970,7 @@ class DynamicLIMe(Module):
970
970
  dim,
971
971
  num_layers,
972
972
  num_views = 1,
973
+ norm = True,
973
974
  use_softmax = True
974
975
  ):
975
976
  super().__init__()
@@ -977,6 +978,7 @@ class DynamicLIMe(Module):
977
978
  self.multiple_views = num_views > 1
978
979
 
979
980
  self.to_weights = Sequential(
981
+ RMSNorm(dim) if norm else None,
980
982
  nn.Linear(dim, num_views * num_layers),
981
983
  Rearrange('... (views layers) -> views ... layers', views = num_views),
982
984
  nn.Softmax(dim = -1) if use_softmax else nn.ReLU()
@@ -987,6 +989,7 @@ class DynamicLIMe(Module):
987
989
  x,
988
990
  hiddens
989
991
  ):
992
+
990
993
  if not is_tensor(hiddens):
991
994
  hiddens = stack(hiddens)
992
995
 
@@ -2200,11 +2203,12 @@ class AttentionLayers(Module):
2200
2203
 
2201
2204
  # attention, cross attention, feedforward
2202
2205
 
2206
+ layer_qkv_receives_diff_view = layer_type == 'a' and qkv_receive_diff_residuals and not (is_first_self_attn and integrate_layers)
2207
+
2203
2208
  if layer_type == 'a':
2204
2209
  self_attn_learned_value_residual = learned_value_residual_mix and not is_first_self_attn
2205
- qkv_receives_diff_view = qkv_receive_diff_residuals and not (is_first_self_attn and integrate_layers)
2206
2210
 
2207
- layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receives_diff_view, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
2211
+ layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = layer_qkv_receives_diff_view, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
2208
2212
  is_first_self_attn = False
2209
2213
 
2210
2214
  elif layer_type == 'c':
@@ -2230,7 +2234,7 @@ class AttentionLayers(Module):
2230
2234
 
2231
2235
  if integrate_layers:
2232
2236
  num_layer_hiddens = ind + 1
2233
- layer_integrate_num_view = 3 if layer_type == 'a' and qkv_receives_diff_view else 1
2237
+ layer_integrate_num_view = 3 if layer_qkv_receives_diff_view else 1
2234
2238
 
2235
2239
  layer_integrate = DynamicLIMe(dim, num_layer_hiddens, num_views = layer_integrate_num_view, use_softmax = layer_integrate_use_softmax)
2236
2240
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -41,7 +41,6 @@ Requires-Dist: packaging>=21.0
41
41
  Requires-Dist: torch>=2.0
42
42
  Provides-Extra: examples
43
43
  Requires-Dist: lion-pytorch; extra == 'examples'
44
- Requires-Dist: torchvision; extra == 'examples'
45
44
  Requires-Dist: tqdm; extra == 'examples'
46
45
  Provides-Extra: test
47
46
  Requires-Dist: pytest; extra == 'test'
@@ -6,10 +6,10 @@ x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
6
6
  x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
7
7
  x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
8
8
  x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
9
- x_transformers/x_transformers.py,sha256=PhwkSTxLYxFPLn-mjVe6t5LrrZEKsS8P03u7Q9v_KYM,110061
9
+ x_transformers/x_transformers.py,sha256=-80N4sqUr3sR51Ms4wCfc4jhxnPwf0ApNR4xfIsasfQ,110142
10
10
  x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
11
11
  x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
12
- x_transformers-2.1.0.dist-info/METADATA,sha256=BzidlkOJz0xRRSpZjOxcph8e_K16QOpELw6JUeDS9mQ,87275
13
- x_transformers-2.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- x_transformers-2.1.0.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
15
- x_transformers-2.1.0.dist-info/RECORD,,
12
+ x_transformers-2.1.2.dist-info/METADATA,sha256=-LsNGhf7qKzttPNU7VOSVqigs61_Nuw4r0LBlZDT_Qo,87227
13
+ x_transformers-2.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ x_transformers-2.1.2.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
15
+ x_transformers-2.1.2.dist-info/RECORD,,