PyPI - x-transformers - Versions diffs - 1.32.15__py3-none-any.whl → 1.34.1__py3-none-any.whl - Mend

x-transformers 1.32.15py3-none-any.whl → 1.34.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

x_transformers/attend.py CHANGED Viewed

@@ -138,9 +138,27 @@ class Attend(Module):
         # flash attention
         self.flash = flash
-        assert not (flash and version.parse(torch.__version__) < version.parse('2.0.0')), 'in order to use flash attention, you must be using pytorch 2.0 or above'
-        self.sdp_kwargs = sdp_kwargs
+        torch_version = version.parse(torch.__version__)
+        assert not (flash and torch_version < version.parse('2.0.0')), 'in order to use flash attention, you must be using pytorch 2.0 or above'
+        # torch 2.3 uses new backend and context manager
+        if torch_version >= version.parse('2.3'):
+            from torch.nn.attention import SDPBackend
+            str_to_backend = dict(
+                enable_flash = SDPBackend.FLASH_ATTENTION,
+                enable_mem_efficient = SDPBackend.EFFICIENT_ATTENTION,
+                enable_math = SDPBackend.MATH,
+                enable_cudnn = SDPBackend.CUDNN_ATTENTION
+            )
+            sdpa_backends = [str_to_backend[enable_str] for enable_str, enable in sdp_kwargs.items() if enable]
+            self.sdp_context_manager = partial(torch.nn.attention.sdpa_kernel, sdpa_backends)
+        else:
+            self.sdp_context_manager = partial(torch.backends.cuda.sdp_kernel, **sdp_kwargs)
     def flash_attn(
         self,
@@ -231,7 +249,7 @@ class Attend(Module):
         # pytorch 2.0 flash attn: q, k, v, mask, dropout, causal, softmax_scale
-        with torch.backends.cuda.sdp_kernel(**self.sdp_kwargs):
+        with self.sdp_context_manager():
             out = F.scaled_dot_product_attention(
                 q, k, v,
                 attn_mask = mask,

x_transformers/x_transformers.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 import torch.nn.functional as F
 from torch import nn, einsum, Tensor
 from torch.nn import Module, ModuleList, ModuleDict
-from torch.cuda.amp import autocast
+from torch.amp import autocast
 from functools import partial, wraps
 from collections import namedtuple
@@ -521,7 +521,7 @@ class RotaryEmbedding(Module):
         t = torch.arange(seq_len, device = device)
         return self.forward(t)
-    @autocast(enabled = False)
+    @autocast('cuda', enabled = False)
     def forward(self, t):
         max_pos = t.max() + 1
@@ -545,7 +545,7 @@ def rotate_half(x):
     x = torch.stack((-x2, x1), dim = -1)
     return rearrange(x, '... d r -> ... (d r)')
-@autocast(enabled = False)
+@autocast('cuda', enabled = False)
 def apply_rotary_pos_emb(t, freqs, scale = 1):
     rot_dim, seq_len, orig_dtype = freqs.shape[-1], t.shape[-2], t.dtype

{x_transformers-1.32.15.dist-info → x_transformers-1.34.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: x-transformers
-Version: 1.32.15
+Version: 1.34.1
 Summary: X-Transformers - Pytorch
 Home-page: https://github.com/lucidrains/x-transformers
 Author: Phil Wang

{x_transformers-1.32.15.dist-info → x_transformers-1.34.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 x_transformers/__init__.py,sha256=-MkQrSc37cTVDX7AOykxunYnqVtFlQ7lb0Cse5dsGWU,793
-x_transformers/attend.py,sha256=MI-m91wumBFqFqr_KK9MLgsLk_vPeaVbFMyDr_mWdmY,11349
+x_transformers/attend.py,sha256=7q996VGYHGIsc0FQnN8WNiwHn3xny3i1biRwx7yW5vg,12090
 x_transformers/autoregressive_wrapper.py,sha256=ka_iiej5lEBOcbutWQgGrFVMDilz2PFWzLhBh5_tmmg,10366
 x_transformers/continuous.py,sha256=cIVEdhfei258__ziV7kQBrJMxCel54bExBTDrO9rfCI,6450
 x_transformers/dpo.py,sha256=LjvWgCkqTl-UuehrzQ8nkX5guLr4whYwsmm7SKSwdls,3450
 x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
 x_transformers/nonautoregressive_wrapper.py,sha256=ys_p8obc7lTeeodCqvkRKxOXQ1C9T3j5Jwr-JbVgnXk,10432
-x_transformers/x_transformers.py,sha256=pyRQ6lb1Sx1CbjOH882tAv9UhAzsLwIeXDPBOsiRipg,78669
+x_transformers/x_transformers.py,sha256=hs9j-lHukVGYLlpbBhn4CZhSzI7s0x6bYtEhCc33ftE,78680
 x_transformers/xl_autoregressive_wrapper.py,sha256=DCx4n0_c1tFai4nOqaWVnqx2p9eutsZsDMiMP1ckxNU,4117
 x_transformers/xval.py,sha256=QE1ltYZTR_eGgIHPP2BrMWVWVLqMW-OpDZh87BSmQEg,8563
-x_transformers-1.32.15.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
-x_transformers-1.32.15.dist-info/METADATA,sha256=cXOxhpryz1YtL2rhp8siArEI-K3t9-NOyh-4z1GfjDM,662
-x_transformers-1.32.15.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
-x_transformers-1.32.15.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
-x_transformers-1.32.15.dist-info/RECORD,,
+x_transformers-1.34.1.dist-info/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
+x_transformers-1.34.1.dist-info/METADATA,sha256=jSsnjS0ptrIpH-nc9h7fNMjzAvpmQGOkXYqTSWyUvGQ,661
+x_transformers-1.34.1.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
+x_transformers-1.34.1.dist-info/top_level.txt,sha256=hO6KGpFuGucRNEtRfme4A_rGcM53AKwGP7RVlRIxS5Q,15
+x_transformers-1.34.1.dist-info/RECORD,,

{x_transformers-1.32.15.dist-info → x_transformers-1.34.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (73.0.1)
+Generator: setuptools (74.1.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{x_transformers-1.32.15.dist-info → x_transformers-1.34.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{x_transformers-1.32.15.dist-info → x_transformers-1.34.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

x-transformers 1.32.15__py3-none-any.whl → 1.34.1__py3-none-any.whl

x-transformers 1.32.15py3-none-any.whl → 1.34.1py3-none-any.whl