x-transformers 1.28.5__tar.gz → 1.29.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_transformers-1.28.5/x_transformers.egg-info → x_transformers-1.29.0}/PKG-INFO +1 -1
- {x_transformers-1.28.5 → x_transformers-1.29.0}/README.md +49 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/setup.py +1 -1
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/x_transformers.py +7 -1
- {x_transformers-1.28.5 → x_transformers-1.29.0/x_transformers.egg-info}/PKG-INFO +1 -1
- {x_transformers-1.28.5 → x_transformers-1.29.0}/LICENSE +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/setup.cfg +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/__init__.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/attend.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/continuous.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/dpo.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers/xval.py +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers.egg-info/SOURCES.txt +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers.egg-info/dependency_links.txt +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers.egg-info/requires.txt +0 -0
- {x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers.egg-info/top_level.txt +0 -0
@@ -674,6 +674,55 @@ model = TransformerWrapper(
|
|
674
674
|
)
|
675
675
|
```
|
676
676
|
|
677
|
+
### Weight-tied Layers
|
678
|
+
|
679
|
+
In the early days of the cambrian explosion of BERT, a paper explored weight tying all the layers, the model named <a href="https://arxiv.org/abs/1909.11942">ALBERT</a>. You can use it by setting `weight_tie_layers = True`
|
680
|
+
|
681
|
+
```python
|
682
|
+
import torch
|
683
|
+
from x_transformers import TransformerWrapper, Encoder
|
684
|
+
|
685
|
+
model = TransformerWrapper(
|
686
|
+
num_tokens = 20000,
|
687
|
+
max_seq_len = 1024,
|
688
|
+
attn_layers = Encoder(
|
689
|
+
dim = 512,
|
690
|
+
depth = 12,
|
691
|
+
weight_tie_layers = True # set this to True to weight tie all the layers
|
692
|
+
)
|
693
|
+
)
|
694
|
+
```
|
695
|
+
|
696
|
+
If you wish to do something more sophisticated, say 3 layers, with each layer recurrent 4 times before onto the next, that is possible as well.
|
697
|
+
|
698
|
+
```python
|
699
|
+
import torch
|
700
|
+
from x_transformers import TransformerWrapper, Decoder
|
701
|
+
|
702
|
+
model = TransformerWrapper(
|
703
|
+
num_tokens = 20000,
|
704
|
+
max_seq_len = 1024,
|
705
|
+
attn_layers = Decoder(
|
706
|
+
dim = 512,
|
707
|
+
custom_layers = (
|
708
|
+
'a', 'f', # 3 sets of attention and feedforward
|
709
|
+
'a', 'f',
|
710
|
+
'a', 'f'
|
711
|
+
),
|
712
|
+
layers_execute_order = (
|
713
|
+
*((0, 1) * 4), # each done 4 times before sequentially passed forward, but you can probably imagine some more interesting configurations...
|
714
|
+
*((2, 3) * 4),
|
715
|
+
*((4, 5) * 4),
|
716
|
+
)
|
717
|
+
)
|
718
|
+
)
|
719
|
+
|
720
|
+
x = torch.randint(0, 256, (1, 1024))
|
721
|
+
|
722
|
+
model(x) # (1, 1024, 20000)
|
723
|
+
|
724
|
+
```
|
725
|
+
|
677
726
|
### Understanding and Improving Transformer From a Multi-Particle Dynamic System Point of View
|
678
727
|
|
679
728
|
<img src="./images/macaron-1.png"></img>
|
@@ -1001,7 +1001,7 @@ class AttentionLayers(Module):
|
|
1001
1001
|
def __init__(
|
1002
1002
|
self,
|
1003
1003
|
dim,
|
1004
|
-
depth,
|
1004
|
+
depth = None,
|
1005
1005
|
heads = 8,
|
1006
1006
|
causal = False,
|
1007
1007
|
cross_attend = False,
|
@@ -1054,6 +1054,8 @@ class AttentionLayers(Module):
|
|
1054
1054
|
attn_kwargs, kwargs = groupby_prefix_and_trim('attn_', kwargs)
|
1055
1055
|
cross_attn_kwargs, kwargs = groupby_prefix_and_trim('cross_attn_', kwargs)
|
1056
1056
|
|
1057
|
+
assert len(kwargs) == 0, f'unrecognized kwargs passed in {kwargs.keys()}'
|
1058
|
+
|
1057
1059
|
dim_head = attn_kwargs.get('dim_head', DEFAULT_DIM_HEAD)
|
1058
1060
|
|
1059
1061
|
self.dim = dim
|
@@ -1138,9 +1140,12 @@ class AttentionLayers(Module):
|
|
1138
1140
|
|
1139
1141
|
# setup weight tying, which is a special case of `layer_execute_order`
|
1140
1142
|
|
1143
|
+
assert not (exists(layers_execute_order) and exists(custom_layers) and exists(depth)), 'depth should not be passed in if using custom layers and custom layer execution order'
|
1144
|
+
|
1141
1145
|
assert not (weight_tie_layers and any([*map(exists, (custom_layers, par_ratio, sandwich_coef))]))
|
1142
1146
|
|
1143
1147
|
if weight_tie_layers:
|
1148
|
+
assert exists(depth), 'depth must be passed in with `weight_tie_layers` = True'
|
1144
1149
|
assert not exists(layers_execute_order)
|
1145
1150
|
layers_execute_order = tuple(range(len(default_block))) * depth
|
1146
1151
|
depth = 1
|
@@ -1164,6 +1169,7 @@ class AttentionLayers(Module):
|
|
1164
1169
|
assert sandwich_coef > 0 and sandwich_coef <= depth, 'sandwich coefficient should be less than the depth'
|
1165
1170
|
layer_types = ('a',) * sandwich_coef + default_block * (depth - sandwich_coef) + ('f',) * sandwich_coef
|
1166
1171
|
else:
|
1172
|
+
assert exists(depth), '`depth` must be passed in for `Decoder` or `Encoder`'
|
1167
1173
|
layer_types = default_block * depth
|
1168
1174
|
|
1169
1175
|
self.layer_types = layer_types
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{x_transformers-1.28.5 → x_transformers-1.29.0}/x_transformers.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|