PyPI - x-transformers - Versions diffs - 2.3.17__tar.gz → 2.3.19__tar.gz - Mend

x-transformers 2.3.17tar.gz → 2.3.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{x_transformers-2.3.17 → x_transformers-2.3.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.3.17
+Version: 2.3.19
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers

{x_transformers-2.3.17 → x_transformers-2.3.19}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.3.17"
+version = "2.3.19"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{x_transformers-2.3.17 → x_transformers-2.3.19}/tests/test_x_transformers.py RENAMED Viewed

@@ -651,6 +651,39 @@ def test_hybrid(hybrid_axial_dim):
     mask = torch.randint(0, 2, (2, 1024)).bool()
     embed = enc(x, mask = mask)
+def test_hybrid_cache():
+    from torch.nn import GRU
+    model = TransformerWrapper(
+        num_tokens = 20000,
+        max_seq_len = 1024,
+        attn_layers = Decoder(
+            dim = 128,
+            depth = 6,
+            heads = 8,
+            attn_dim_head = 64,
+            attn_hybrid_fold_axial_dim = 1,
+            attn_hybrid_module = GRU(128, 64 * 8, batch_first = True)
+        )
+    )
+    x = torch.randint(0, 20000, (2, 4))
+    # parallel
+    out_parallel = model(x)
+    # sequential
+    x_without_last = x[:, :-1]
+    out1, cache = model(x_without_last, return_intermediates = True)
+    out2 = model(x, cache = cache)
+    out_seq = torch.cat((out1, out2), dim = 1)
+    assert torch.allclose(out_parallel, out_seq, atol = 1e-5)
 def test_multi_latent_attention():
     model = TransformerWrapper(
         num_tokens = 20000,
@@ -876,9 +909,6 @@ def test_continuous(
     cache_kv,
     rollout_steps
 ):
-    if probabilistic and rollout_steps > 1:
-        pytest.skip()
     from x_transformers import (
         ContinuousTransformerWrapper,
         Decoder,

{x_transformers-2.3.17 → x_transformers-2.3.19}/x_transformers/attend.py RENAMED Viewed

@@ -25,6 +25,7 @@ class Intermediates:
     values:             Tensor | None = None
     cached_kv:          Tuple[Tensor, Tensor] | None = None
     layer_type:         str | None = None
+    hybrid_hidden:      Tensor | None = None
     def to_tuple(self):
         return (self.qk_similarities, self.pre_softmax_attn, self.post_softmax_attn)

{x_transformers-2.3.17 → x_transformers-2.3.19}/x_transformers/continuous.py RENAMED Viewed

@@ -32,6 +32,15 @@ def default(val, d):
         return val
     return d() if not isinstance(d, Module) and callable(d) else d
+def sample_from_mean_variance(
+    mean,
+    variance,
+    eps = 1e-5,
+    temperature = 1.
+):
+    std = variance.clamp(min = eps).sqrt()
+    return torch.normal(mean, std * temperature)
 def masked_mean(t, mask):
     t = einx.where('b n, b n d, -> b n d', mask, t, 0.)
@@ -274,9 +283,7 @@ class ContinuousAutoregressiveWrapper(Module):
             if self.probabilistic:
                 mean, var = last_output
-                stddev = var.clamp(min = 1e-5).sqrt()
-                last_output = torch.normal(mean, stddev * temperature)
+                last_output = sample_from_mean_variance(mean, var, temperature = temperature)
             out = cat((out, last_output), dim = -2)
@@ -298,7 +305,6 @@ class ContinuousAutoregressiveWrapper(Module):
         **kwargs
     ):
         assert rollout_steps > 1
-        assert not self.probabilistic, 'probabilistic not supported yet'
         steps = rollout_steps
@@ -369,8 +375,13 @@ class ContinuousAutoregressiveWrapper(Module):
                 **kwargs
             )
-            last_pred = out[:, -1:]
-            inp = last_pred
+            last_pred = out[..., -1:, :]
+            if self.probabilistic:
+                mean, var = last_pred
+                inp = sample_from_mean_variance(mean, var)
+            else:
+                inp = last_pred
             preds.append(last_pred)

{x_transformers-2.3.17 → x_transformers-2.3.19}/x_transformers/x_transformers.py RENAMED Viewed

@@ -1079,10 +1079,11 @@ class FoldAxially(Module):
     def forward(
         self,
         x,
+        *args,
         **kwargs
     ):
         if self.axial_dim == 1:
-            return self.fn(x, **kwargs)
+            return self.fn(x, *args, **kwargs)
         seq_len, axial_dim = x.shape[1], self.axial_dim
@@ -1091,7 +1092,7 @@ class FoldAxially(Module):
         x = rearrange(x, 'b (n axial_dim) ... -> (b axial_dim) n ...', axial_dim = axial_dim)
-        out = self.fn(x, **kwargs)
+        out = self.fn(x, *args, **kwargs)
         (out, *rest_out), tree_spec = tree_flatten(out)
@@ -1857,9 +1858,17 @@ class Attention(Module):
             if not self.causal and exists(self.hybrid_mask_kwarg):
                 hybrid_forward_kwargs = {self.hybrid_mask_kwarg: mask}
+            # handle maybe hybrid cache
+            hybrid_forward_args = ()
+            if exists(cache) and exists(cache.hybrid_hidden):
+                hybrid_hiddens = cache.hybrid_hidden
+                hybrid_forward_args = (hybrid_hiddens,)
             # hybrid forward
-            hybrid_outputs = self.hybrid_module(x, **hybrid_forward_kwargs)
+            hybrid_outputs = self.hybrid_module(x, *hybrid_forward_args, **hybrid_forward_kwargs)
             # handle hybrid out
@@ -1870,6 +1879,10 @@ class Attention(Module):
             if hybrid_out.ndim == 3:
                 hybrid_out = rearrange(hybrid_out, 'b n (h d) -> b h n d', h = h)
+            if len(rest_hybrid_outs) > 0:
+                hybrid_hidden = first(rest_hybrid_outs)
+                intermediates.hybrid_hidden = hybrid_hidden
             out_norm, hybrid_out_norm = self.hybrid_norms
             out = out_norm(out)