x-transformers 1.32.14__tar.gz → 1.34.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_transformers-1.32.14/x_transformers.egg-info → x_transformers-1.34.0}/PKG-INFO +1 -1
- {x_transformers-1.32.14 → x_transformers-1.34.0}/README.md +11 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/setup.py +1 -1
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/autoregressive_wrapper.py +31 -3
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/x_transformers.py +3 -3
- {x_transformers-1.32.14 → x_transformers-1.34.0/x_transformers.egg-info}/PKG-INFO +1 -1
- {x_transformers-1.32.14 → x_transformers-1.34.0}/LICENSE +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/setup.cfg +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/tests/test_x_transformers.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/__init__.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/attend.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/continuous.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/dpo.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/multi_input.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/nonautoregressive_wrapper.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/xl_autoregressive_wrapper.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/xval.py +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers.egg-info/SOURCES.txt +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers.egg-info/dependency_links.txt +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers.egg-info/requires.txt +0 -0
- {x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers.egg-info/top_level.txt +0 -0
@@ -2187,4 +2187,15 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
2187
2187
|
}
|
2188
2188
|
```
|
2189
2189
|
|
2190
|
+
```bibtex
|
2191
|
+
@article{Nguyen2024MinPS,
|
2192
|
+
title = {Min P Sampling: Balancing Creativity and Coherence at High Temperature},
|
2193
|
+
author = {Minh Nguyen and Andrew Baker and Andreas Kirsch and Clement Neo},
|
2194
|
+
journal = {ArXiv},
|
2195
|
+
year = {2024},
|
2196
|
+
volume = {abs/2407.01082},
|
2197
|
+
url = {https://api.semanticscholar.org/CorpusID:270870613}
|
2198
|
+
}
|
2199
|
+
```
|
2200
|
+
|
2190
2201
|
*solve intelligence... then use that to solve everything else.* - Demis Hassabis
|
@@ -19,6 +19,9 @@ def default(val, d):
|
|
19
19
|
def identity(t, *args, **kwargs):
|
20
20
|
return t
|
21
21
|
|
22
|
+
def join(arr, delimiter = ', '):
|
23
|
+
return delimiter.join(arr)
|
24
|
+
|
22
25
|
def cast_tuple(t, length = 1):
|
23
26
|
return t if isinstance(t, tuple) else (t,) * length
|
24
27
|
|
@@ -79,11 +82,29 @@ def top_k(logits, frac_num_tokens = 0.1, k = None):
|
|
79
82
|
# top_a
|
80
83
|
|
81
84
|
def top_a(logits, min_p_pow = 2.0, min_p_ratio = 0.02):
|
82
|
-
probs =
|
83
|
-
max_probs =
|
85
|
+
probs = logits.softmax(dim = -1)
|
86
|
+
max_probs = probs.amax(dim = -1, keepdim = True)
|
84
87
|
limit = torch.pow(max_probs, min_p_pow) * min_p_ratio
|
85
88
|
return torch.where(probs < limit, float('-inf'), logits)
|
86
89
|
|
90
|
+
# min_p
|
91
|
+
# https://arxiv.org/abs/2407.01082
|
92
|
+
|
93
|
+
def min_p(logits, min_p = 0.1):
|
94
|
+
probs = logits.softmax(dim = -1)
|
95
|
+
max_probs = probs.amax(dim = -1, keepdim = True)
|
96
|
+
limit = min_p * max_probs
|
97
|
+
return torch.where(probs < limit, float('-inf'), logits)
|
98
|
+
|
99
|
+
# filter logits functions dict[str -> Callable]
|
100
|
+
|
101
|
+
FILTER_LOGITS_FN = dict(
|
102
|
+
top_p = top_p,
|
103
|
+
top_k = top_k,
|
104
|
+
top_a = top_a,
|
105
|
+
min_p = min_p
|
106
|
+
)
|
107
|
+
|
87
108
|
# contrastive decoding function
|
88
109
|
|
89
110
|
def contrastive_decode_fn(
|
@@ -136,7 +157,7 @@ class AutoregressiveWrapper(Module):
|
|
136
157
|
eos_token = None,
|
137
158
|
temperature = 1.,
|
138
159
|
prompt_lens: Tensor | None = None,
|
139
|
-
filter_logits_fn: Callable = top_k,
|
160
|
+
filter_logits_fn: str | Callable = top_k,
|
140
161
|
restrict_to_max_seq_len = True,
|
141
162
|
amateur_model: Module | Tuple[Module] | None = None,
|
142
163
|
filter_kwargs: dict = dict(),
|
@@ -153,6 +174,13 @@ class AutoregressiveWrapper(Module):
|
|
153
174
|
|
154
175
|
b, t = prompts.shape
|
155
176
|
|
177
|
+
# handle filter logits fn given as string
|
178
|
+
|
179
|
+
if isinstance(filter_logits_fn, str):
|
180
|
+
assert filter_logits_fn in FILTER_LOGITS_FN, f"only {join(FILTER_LOGITS_FN.keys())} are available"
|
181
|
+
|
182
|
+
filter_logits_fn = FILTER_LOGITS_FN[filter_logits_fn]
|
183
|
+
|
156
184
|
# handle variable lengthed prompts (prefixes)
|
157
185
|
|
158
186
|
seq_start_pos = None
|
@@ -8,7 +8,7 @@ import torch
|
|
8
8
|
import torch.nn.functional as F
|
9
9
|
from torch import nn, einsum, Tensor
|
10
10
|
from torch.nn import Module, ModuleList, ModuleDict
|
11
|
-
from torch.
|
11
|
+
from torch.amp import autocast
|
12
12
|
|
13
13
|
from functools import partial, wraps
|
14
14
|
from collections import namedtuple
|
@@ -521,7 +521,7 @@ class RotaryEmbedding(Module):
|
|
521
521
|
t = torch.arange(seq_len, device = device)
|
522
522
|
return self.forward(t)
|
523
523
|
|
524
|
-
@autocast(enabled = False)
|
524
|
+
@autocast('cuda', enabled = False)
|
525
525
|
def forward(self, t):
|
526
526
|
max_pos = t.max() + 1
|
527
527
|
|
@@ -545,7 +545,7 @@ def rotate_half(x):
|
|
545
545
|
x = torch.stack((-x2, x1), dim = -1)
|
546
546
|
return rearrange(x, '... d r -> ... (d r)')
|
547
547
|
|
548
|
-
@autocast(enabled = False)
|
548
|
+
@autocast('cuda', enabled = False)
|
549
549
|
def apply_rotary_pos_emb(t, freqs, scale = 1):
|
550
550
|
rot_dim, seq_len, orig_dtype = freqs.shape[-1], t.shape[-2], t.dtype
|
551
551
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/nonautoregressive_wrapper.py
RENAMED
File without changes
|
{x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers/xl_autoregressive_wrapper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{x_transformers-1.32.14 → x_transformers-1.34.0}/x_transformers.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|