x-transformers 2.5.1__tar.gz → 2.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_transformers-2.5.1 → x_transformers-2.5.2}/PKG-INFO +1 -1
- {x_transformers-2.5.1 → x_transformers-2.5.2}/pyproject.toml +1 -1
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/x_transformers.py +5 -1
- {x_transformers-2.5.1 → x_transformers-2.5.2}/.github/FUNDING.yml +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/.gitignore +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/LICENSE +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/README.md +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/data/README.md +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/data/enwik8.gz +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/all-attention.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/attention-on-attention.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/deepnorm.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/fcm.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/ffglu.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/flash-attention.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/gate_values.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/gating.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/macaron-1.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/macaron-2.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/memory-transformer.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/normformer.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/pia.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/resi_dual.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/residual_attn.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/rezero.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/rotary.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/sandwich-2.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/sandwich.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/sandwich_norm.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/scalenorm.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/talking-heads.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/topk-attention.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/images/xval.png +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/tests/test_x_transformers.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_belief_state.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_copy.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_entropy_tokenizer.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_enwik8.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_length_extrapolate.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/train_parity.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/__init__.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/attend.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/belief_state_wrapper.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/continuous.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/dpo.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/entropy_based_tokenizer.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/up_wrapper.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.5.1 → x_transformers-2.5.2}/x_transformers/xval.py +0 -0
@@ -2763,6 +2763,7 @@ class AttentionPool(Module):
|
|
2763
2763
|
depth = 1,
|
2764
2764
|
heads = 8,
|
2765
2765
|
dim_head = 64,
|
2766
|
+
use_transformer_blocks = None,
|
2766
2767
|
squeeze_output = None,
|
2767
2768
|
attn_kwargs: dict = dict()
|
2768
2769
|
):
|
@@ -2772,9 +2773,12 @@ class AttentionPool(Module):
|
|
2772
2773
|
squeeze_output = default(squeeze_output, False)
|
2773
2774
|
assert not (squeeze_output and num_pooled_tokens > 1)
|
2774
2775
|
|
2776
|
+
use_transformer_blocks = default(use_transformer_blocks, depth > 1)
|
2777
|
+
assert use_transformer_blocks or depth == 1
|
2778
|
+
|
2775
2779
|
self.queries = nn.Parameter(torch.randn(num_pooled_tokens, dim) * 1e-2)
|
2776
2780
|
|
2777
|
-
if
|
2781
|
+
if use_transformer_blocks:
|
2778
2782
|
assert not add_residual, 'residual already in effect when doing a full cross attention based transformer for pooling'
|
2779
2783
|
attn_kwargs = {f'attn_{k}': v for k, v in attn_kwargs.items()}
|
2780
2784
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|