x-transformers 2.7.0__tar.gz → 2.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_transformers-2.7.0 → x_transformers-2.7.1}/PKG-INFO +1 -1
- {x_transformers-2.7.0 → x_transformers-2.7.1}/pyproject.toml +1 -1
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/continuous.py +10 -1
- {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/FUNDING.yml +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/workflows/python-publish.yml +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/workflows/python-test.yaml +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/.gitignore +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/LICENSE +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/README.md +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/data/README.md +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/data/enwik8.gz +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/all-attention.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/attention-on-attention.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/cosine-sim-attention.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/deepnorm.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-linear.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-log.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-sinusoidal.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/enhanced-recurrence.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/fcm.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/ffglu.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/flash-attention.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/gate_values.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/gating.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/length-extrapolation-scale.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/macaron-1.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/macaron-2.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/memory-transformer.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/normformer.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/pia.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/qknorm-analysis.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/resi_dual.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/residual_attn.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/rezero.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/rotary.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich-2.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich_norm.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/scalenorm.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/talking-heads.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/topk-attention.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/images/xval.png +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/tests/test_x_transformers.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_belief_state.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_copy.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_entropy_tokenizer.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_enwik8.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_length_extrapolate.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/train_parity.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/__init__.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/attend.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/autoregressive_wrapper.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/belief_state_wrapper.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/dpo.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/entropy_based_tokenizer.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/multi_input.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/neo_mlp.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/up_wrapper.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/x_transformers.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/xval.py +0 -0
@@ -241,6 +241,7 @@ class ContinuousAutoregressiveWrapper(Module):
|
|
241
241
|
self,
|
242
242
|
net: ContinuousTransformerWrapper,
|
243
243
|
loss_fn: Module | None = None,
|
244
|
+
use_l1_loss = False,
|
244
245
|
equal_loss_weight_batch = False, # setting this to True, if the mask is passed in and sequences are variable in length, each sequence will be weighted the same (as opposed to each token)
|
245
246
|
):
|
246
247
|
super().__init__()
|
@@ -250,7 +251,15 @@ class ContinuousAutoregressiveWrapper(Module):
|
|
250
251
|
probabilistic = net.probabilistic
|
251
252
|
self.probabilistic = probabilistic
|
252
253
|
|
253
|
-
|
254
|
+
# default loss function
|
255
|
+
|
256
|
+
if not exists(loss_fn):
|
257
|
+
if probabilistic:
|
258
|
+
loss_fn = GaussianNLL()
|
259
|
+
elif use_l1_loss:
|
260
|
+
loss_fn = nn.L1Loss(reduction = 'none')
|
261
|
+
else:
|
262
|
+
loss_fn = nn.MSELoss(reduction = 'none')
|
254
263
|
|
255
264
|
self.loss_fn = loss_fn
|
256
265
|
self.equal_loss_weight_batch = equal_loss_weight_batch
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|