triton-windows 3.5.0.post21__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (217) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +82 -0
  3. triton/_filecheck.py +97 -0
  4. triton/_internal_testing.py +255 -0
  5. triton/_utils.py +126 -0
  6. triton/backends/__init__.py +47 -0
  7. triton/backends/amd/__init__.py +0 -0
  8. triton/backends/amd/compiler.py +461 -0
  9. triton/backends/amd/driver.c +283 -0
  10. triton/backends/amd/driver.py +724 -0
  11. triton/backends/amd/lib/asanrtl.bc +0 -0
  12. triton/backends/amd/lib/ockl.bc +0 -0
  13. triton/backends/amd/lib/ocml.bc +0 -0
  14. triton/backends/compiler.py +90 -0
  15. triton/backends/driver.py +66 -0
  16. triton/backends/nvidia/__init__.py +0 -0
  17. triton/backends/nvidia/bin/ptxas.exe +0 -0
  18. triton/backends/nvidia/compiler.py +533 -0
  19. triton/backends/nvidia/driver.c +517 -0
  20. triton/backends/nvidia/driver.py +799 -0
  21. triton/backends/nvidia/include/cuda.h +26280 -0
  22. triton/backends/nvidia/lib/libdevice.10.bc +0 -0
  23. triton/backends/nvidia/lib/x64/cuda.lib +0 -0
  24. triton/compiler/__init__.py +7 -0
  25. triton/compiler/code_generator.py +1614 -0
  26. triton/compiler/compiler.py +509 -0
  27. triton/compiler/errors.py +51 -0
  28. triton/compiler/make_launcher.py +0 -0
  29. triton/errors.py +5 -0
  30. triton/experimental/__init__.py +0 -0
  31. triton/experimental/gluon/__init__.py +5 -0
  32. triton/experimental/gluon/_compiler.py +0 -0
  33. triton/experimental/gluon/_runtime.py +102 -0
  34. triton/experimental/gluon/language/__init__.py +119 -0
  35. triton/experimental/gluon/language/_core.py +490 -0
  36. triton/experimental/gluon/language/_layouts.py +583 -0
  37. triton/experimental/gluon/language/_math.py +20 -0
  38. triton/experimental/gluon/language/_semantic.py +380 -0
  39. triton/experimental/gluon/language/_standard.py +80 -0
  40. triton/experimental/gluon/language/amd/__init__.py +4 -0
  41. triton/experimental/gluon/language/amd/_layouts.py +96 -0
  42. triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
  43. triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
  44. triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
  45. triton/experimental/gluon/language/extra/__init__.py +3 -0
  46. triton/experimental/gluon/language/nvidia/__init__.py +4 -0
  47. triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
  48. triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
  49. triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
  50. triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
  51. triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
  52. triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
  53. triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
  54. triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
  55. triton/experimental/gluon/nvidia/__init__.py +4 -0
  56. triton/experimental/gluon/nvidia/blackwell.py +3 -0
  57. triton/experimental/gluon/nvidia/hopper.py +45 -0
  58. triton/knobs.py +546 -0
  59. triton/language/__init__.py +342 -0
  60. triton/language/core.py +3405 -0
  61. triton/language/extra/__init__.py +26 -0
  62. triton/language/extra/cuda/__init__.py +16 -0
  63. triton/language/extra/cuda/gdc.py +42 -0
  64. triton/language/extra/cuda/libdevice.py +1629 -0
  65. triton/language/extra/cuda/utils.py +109 -0
  66. triton/language/extra/hip/__init__.py +5 -0
  67. triton/language/extra/hip/libdevice.py +491 -0
  68. triton/language/extra/hip/utils.py +35 -0
  69. triton/language/extra/libdevice.py +790 -0
  70. triton/language/math.py +249 -0
  71. triton/language/random.py +218 -0
  72. triton/language/semantic.py +1939 -0
  73. triton/language/standard.py +534 -0
  74. triton/language/target_info.py +54 -0
  75. triton/runtime/__init__.py +23 -0
  76. triton/runtime/_allocation.py +44 -0
  77. triton/runtime/_async_compile.py +55 -0
  78. triton/runtime/autotuner.py +476 -0
  79. triton/runtime/build.py +168 -0
  80. triton/runtime/cache.py +317 -0
  81. triton/runtime/driver.py +38 -0
  82. triton/runtime/errors.py +36 -0
  83. triton/runtime/interpreter.py +1414 -0
  84. triton/runtime/jit.py +1107 -0
  85. triton/runtime/tcc/include/_mingw.h +168 -0
  86. triton/runtime/tcc/include/assert.h +62 -0
  87. triton/runtime/tcc/include/conio.h +409 -0
  88. triton/runtime/tcc/include/ctype.h +281 -0
  89. triton/runtime/tcc/include/dir.h +31 -0
  90. triton/runtime/tcc/include/direct.h +68 -0
  91. triton/runtime/tcc/include/dirent.h +135 -0
  92. triton/runtime/tcc/include/dos.h +55 -0
  93. triton/runtime/tcc/include/errno.h +75 -0
  94. triton/runtime/tcc/include/excpt.h +123 -0
  95. triton/runtime/tcc/include/fcntl.h +52 -0
  96. triton/runtime/tcc/include/fenv.h +108 -0
  97. triton/runtime/tcc/include/float.h +75 -0
  98. triton/runtime/tcc/include/inttypes.h +297 -0
  99. triton/runtime/tcc/include/io.h +418 -0
  100. triton/runtime/tcc/include/iso646.h +36 -0
  101. triton/runtime/tcc/include/limits.h +116 -0
  102. triton/runtime/tcc/include/locale.h +91 -0
  103. triton/runtime/tcc/include/malloc.h +181 -0
  104. triton/runtime/tcc/include/math.h +497 -0
  105. triton/runtime/tcc/include/mem.h +13 -0
  106. triton/runtime/tcc/include/memory.h +40 -0
  107. triton/runtime/tcc/include/process.h +176 -0
  108. triton/runtime/tcc/include/sec_api/conio_s.h +42 -0
  109. triton/runtime/tcc/include/sec_api/crtdbg_s.h +19 -0
  110. triton/runtime/tcc/include/sec_api/io_s.h +33 -0
  111. triton/runtime/tcc/include/sec_api/mbstring_s.h +52 -0
  112. triton/runtime/tcc/include/sec_api/search_s.h +25 -0
  113. triton/runtime/tcc/include/sec_api/stdio_s.h +145 -0
  114. triton/runtime/tcc/include/sec_api/stdlib_s.h +67 -0
  115. triton/runtime/tcc/include/sec_api/stralign_s.h +30 -0
  116. triton/runtime/tcc/include/sec_api/string_s.h +41 -0
  117. triton/runtime/tcc/include/sec_api/sys/timeb_s.h +34 -0
  118. triton/runtime/tcc/include/sec_api/tchar_s.h +266 -0
  119. triton/runtime/tcc/include/sec_api/time_s.h +61 -0
  120. triton/runtime/tcc/include/sec_api/wchar_s.h +128 -0
  121. triton/runtime/tcc/include/setjmp.h +160 -0
  122. triton/runtime/tcc/include/share.h +28 -0
  123. triton/runtime/tcc/include/signal.h +63 -0
  124. triton/runtime/tcc/include/stdalign.h +16 -0
  125. triton/runtime/tcc/include/stdarg.h +14 -0
  126. triton/runtime/tcc/include/stdatomic.h +171 -0
  127. triton/runtime/tcc/include/stdbool.h +11 -0
  128. triton/runtime/tcc/include/stddef.h +42 -0
  129. triton/runtime/tcc/include/stdint.h +212 -0
  130. triton/runtime/tcc/include/stdio.h +429 -0
  131. triton/runtime/tcc/include/stdlib.h +591 -0
  132. triton/runtime/tcc/include/stdnoreturn.h +7 -0
  133. triton/runtime/tcc/include/string.h +164 -0
  134. triton/runtime/tcc/include/sys/fcntl.h +13 -0
  135. triton/runtime/tcc/include/sys/file.h +14 -0
  136. triton/runtime/tcc/include/sys/locking.h +30 -0
  137. triton/runtime/tcc/include/sys/stat.h +290 -0
  138. triton/runtime/tcc/include/sys/time.h +69 -0
  139. triton/runtime/tcc/include/sys/timeb.h +133 -0
  140. triton/runtime/tcc/include/sys/types.h +123 -0
  141. triton/runtime/tcc/include/sys/unistd.h +14 -0
  142. triton/runtime/tcc/include/sys/utime.h +146 -0
  143. triton/runtime/tcc/include/tcc/tcc_libm.h +618 -0
  144. triton/runtime/tcc/include/tccdefs.h +342 -0
  145. triton/runtime/tcc/include/tcclib.h +80 -0
  146. triton/runtime/tcc/include/tchar.h +1102 -0
  147. triton/runtime/tcc/include/tgmath.h +89 -0
  148. triton/runtime/tcc/include/time.h +287 -0
  149. triton/runtime/tcc/include/uchar.h +33 -0
  150. triton/runtime/tcc/include/unistd.h +1 -0
  151. triton/runtime/tcc/include/vadefs.h +11 -0
  152. triton/runtime/tcc/include/values.h +4 -0
  153. triton/runtime/tcc/include/varargs.h +12 -0
  154. triton/runtime/tcc/include/wchar.h +873 -0
  155. triton/runtime/tcc/include/wctype.h +172 -0
  156. triton/runtime/tcc/include/winapi/basetsd.h +149 -0
  157. triton/runtime/tcc/include/winapi/basetyps.h +85 -0
  158. triton/runtime/tcc/include/winapi/guiddef.h +156 -0
  159. triton/runtime/tcc/include/winapi/poppack.h +8 -0
  160. triton/runtime/tcc/include/winapi/pshpack1.h +8 -0
  161. triton/runtime/tcc/include/winapi/pshpack2.h +8 -0
  162. triton/runtime/tcc/include/winapi/pshpack4.h +8 -0
  163. triton/runtime/tcc/include/winapi/pshpack8.h +8 -0
  164. triton/runtime/tcc/include/winapi/qos.h +72 -0
  165. triton/runtime/tcc/include/winapi/shellapi.h +59 -0
  166. triton/runtime/tcc/include/winapi/winbase.h +2958 -0
  167. triton/runtime/tcc/include/winapi/wincon.h +309 -0
  168. triton/runtime/tcc/include/winapi/windef.h +293 -0
  169. triton/runtime/tcc/include/winapi/windows.h +127 -0
  170. triton/runtime/tcc/include/winapi/winerror.h +3166 -0
  171. triton/runtime/tcc/include/winapi/wingdi.h +4080 -0
  172. triton/runtime/tcc/include/winapi/winnls.h +778 -0
  173. triton/runtime/tcc/include/winapi/winnt.h +5837 -0
  174. triton/runtime/tcc/include/winapi/winreg.h +272 -0
  175. triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
  176. triton/runtime/tcc/include/winapi/winuser.h +5651 -0
  177. triton/runtime/tcc/include/winapi/winver.h +160 -0
  178. triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
  179. triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
  180. triton/runtime/tcc/lib/cuda.def +697 -0
  181. triton/runtime/tcc/lib/gdi32.def +337 -0
  182. triton/runtime/tcc/lib/kernel32.def +770 -0
  183. triton/runtime/tcc/lib/libtcc1.a +0 -0
  184. triton/runtime/tcc/lib/msvcrt.def +1399 -0
  185. triton/runtime/tcc/lib/python3.def +810 -0
  186. triton/runtime/tcc/lib/python310.def +1610 -0
  187. triton/runtime/tcc/lib/python311.def +1633 -0
  188. triton/runtime/tcc/lib/python312.def +1703 -0
  189. triton/runtime/tcc/lib/python313.def +1651 -0
  190. triton/runtime/tcc/lib/python313t.def +1656 -0
  191. triton/runtime/tcc/lib/python314.def +1800 -0
  192. triton/runtime/tcc/lib/python314t.def +1809 -0
  193. triton/runtime/tcc/lib/python39.def +1644 -0
  194. triton/runtime/tcc/lib/python3t.def +905 -0
  195. triton/runtime/tcc/lib/user32.def +658 -0
  196. triton/runtime/tcc/libtcc.dll +0 -0
  197. triton/runtime/tcc/tcc.exe +0 -0
  198. triton/testing.py +543 -0
  199. triton/tools/__init__.py +0 -0
  200. triton/tools/build_extern.py +365 -0
  201. triton/tools/compile.py +210 -0
  202. triton/tools/disasm.py +143 -0
  203. triton/tools/extra/cuda/compile.c +70 -0
  204. triton/tools/extra/cuda/compile.h +14 -0
  205. triton/tools/extra/hip/compile.cpp +66 -0
  206. triton/tools/extra/hip/compile.h +13 -0
  207. triton/tools/link.py +322 -0
  208. triton/tools/mxfp.py +301 -0
  209. triton/tools/ragged_tma.py +92 -0
  210. triton/tools/tensor_descriptor.py +34 -0
  211. triton/windows_utils.py +405 -0
  212. triton_windows-3.5.0.post21.dist-info/METADATA +46 -0
  213. triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
  214. triton_windows-3.5.0.post21.dist-info/WHEEL +5 -0
  215. triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
  216. triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
  217. triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1629 @@
1
+ from triton.language import core
2
+
3
+
4
+ @core.extern
5
+ def clz(arg0, _semantic=None):
6
+ return core.extern_elementwise(
7
+ "", "", [arg0], {
8
+ (core.dtype("int32"), ): ("__nv_clz", core.dtype("int32")),
9
+ (core.dtype("int64"), ): ("__nv_clzll", core.dtype("int32")),
10
+ }, is_pure=True, _semantic=_semantic)
11
+
12
+
13
+ @core.extern
14
+ def popc(arg0, _semantic=None):
15
+ return core.extern_elementwise(
16
+ "", "", [arg0], {
17
+ (core.dtype("int32"), ): ("__nv_popc", core.dtype("int32")),
18
+ (core.dtype("int64"), ): ("__nv_popcll", core.dtype("int32")),
19
+ }, is_pure=True, _semantic=_semantic)
20
+
21
+
22
+ @core.extern
23
+ def byte_perm(arg0, arg1, arg2, _semantic=None):
24
+ return core.extern_elementwise("", "", [arg0, arg1, arg2], {
25
+ (core.dtype("int32"), core.dtype("int32"), core.dtype("int32")): ("__nv_byte_perm", core.dtype("int32")),
26
+ }, is_pure=True, _semantic=_semantic)
27
+
28
+
29
+ @core.extern
30
+ def mulhi(arg0, arg1, _semantic=None):
31
+ return core.extern_elementwise(
32
+ "", "", [arg0, arg1], {
33
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_mulhi", core.dtype("int32")),
34
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_umulhi", core.dtype("uint32")),
35
+ (core.dtype("int64"), core.dtype("int64")): ("__nv_mul64hi", core.dtype("int64")),
36
+ (core.dtype("uint64"), core.dtype("uint64")): ("__nv_umul64hi", core.dtype("uint64")),
37
+ }, is_pure=True, _semantic=_semantic)
38
+
39
+
40
+ @core.extern
41
+ def mul24(arg0, arg1, _semantic=None):
42
+ return core.extern_elementwise(
43
+ "", "", [arg0, arg1], {
44
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_mul24", core.dtype("int32")),
45
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_umul24", core.dtype("uint32")),
46
+ }, is_pure=True, _semantic=_semantic)
47
+
48
+
49
+ @core.extern
50
+ def brev(arg0, _semantic=None):
51
+ return core.extern_elementwise(
52
+ "", "", [arg0], {
53
+ (core.dtype("int32"), ): ("__nv_brev", core.dtype("int32")),
54
+ (core.dtype("int64"), ): ("__nv_brevll", core.dtype("int64")),
55
+ }, is_pure=True, _semantic=_semantic)
56
+
57
+
58
+ @core.extern
59
+ def sad(arg0, arg1, arg2, _semantic=None):
60
+ return core.extern_elementwise(
61
+ "", "", [arg0, arg1, arg2], {
62
+ (core.dtype("int32"), core.dtype("int32"), core.dtype("uint32")): ("__nv_sad", core.dtype("int32")),
63
+ (core.dtype("uint32"), core.dtype("uint32"), core.dtype("uint32")): ("__nv_usad", core.dtype("uint32")),
64
+ }, is_pure=True, _semantic=_semantic)
65
+
66
+
67
+ @core.extern
68
+ def abs(arg0, _semantic=None):
69
+ return core.extern_elementwise(
70
+ "", "", [arg0], {
71
+ (core.dtype("int32"), ): ("__nv_abs", core.dtype("int32")),
72
+ (core.dtype("int64"), ): ("__nv_llabs", core.dtype("int64")),
73
+ (core.dtype("fp32"), ): ("__nv_fabsf", core.dtype("fp32")),
74
+ (core.dtype("fp64"), ): ("__nv_fabs", core.dtype("fp64")),
75
+ }, is_pure=True, _semantic=_semantic)
76
+
77
+
78
+ @core.extern
79
+ def floor(arg0, _semantic=None):
80
+ return core.extern_elementwise(
81
+ "", "", [arg0], {
82
+ (core.dtype("fp32"), ): ("__nv_floorf", core.dtype("fp32")),
83
+ (core.dtype("fp64"), ): ("__nv_floor", core.dtype("fp64")),
84
+ }, is_pure=True, _semantic=_semantic)
85
+
86
+
87
+ @core.extern
88
+ def rcp64h(arg0, _semantic=None):
89
+ return core.extern_elementwise("", "", [arg0], {
90
+ (core.dtype("fp64"), ): ("__nv_rcp64h", core.dtype("fp64")),
91
+ }, is_pure=True, _semantic=_semantic)
92
+
93
+
94
+ @core.extern
95
+ def rsqrt(arg0, _semantic=None):
96
+ return core.extern_elementwise(
97
+ "", "", [arg0], {
98
+ (core.dtype("fp32"), ): ("__nv_rsqrtf", core.dtype("fp32")),
99
+ (core.dtype("fp64"), ): ("__nv_rsqrt", core.dtype("fp64")),
100
+ }, is_pure=True, _semantic=_semantic)
101
+
102
+
103
+ @core.extern
104
+ def ceil(arg0, _semantic=None):
105
+ return core.extern_elementwise(
106
+ "", "", [arg0], {
107
+ (core.dtype("fp64"), ): ("__nv_ceil", core.dtype("fp64")),
108
+ (core.dtype("fp32"), ): ("__nv_ceilf", core.dtype("fp32")),
109
+ }, is_pure=True, _semantic=_semantic)
110
+
111
+
112
+ @core.extern
113
+ def trunc(arg0, _semantic=None):
114
+ return core.extern_elementwise(
115
+ "", "", [arg0], {
116
+ (core.dtype("fp64"), ): ("__nv_trunc", core.dtype("fp64")),
117
+ (core.dtype("fp32"), ): ("__nv_truncf", core.dtype("fp32")),
118
+ }, is_pure=True, _semantic=_semantic)
119
+
120
+
121
+ @core.extern
122
+ def exp2(arg0, _semantic=None):
123
+ return core.extern_elementwise(
124
+ "", "", [arg0], {
125
+ (core.dtype("fp32"), ): ("__nv_exp2f", core.dtype("fp32")),
126
+ (core.dtype("fp64"), ): ("__nv_exp2", core.dtype("fp64")),
127
+ }, is_pure=True, _semantic=_semantic)
128
+
129
+
130
+ @core.extern
131
+ def saturatef(arg0, _semantic=None):
132
+ return core.extern_elementwise("", "", [arg0], {
133
+ (core.dtype("fp32"), ): ("__nv_saturatef", core.dtype("fp32")),
134
+ }, is_pure=True, _semantic=_semantic)
135
+
136
+
137
+ @core.extern
138
+ def fma_rn(arg0, arg1, arg2, _semantic=None):
139
+ return core.extern_elementwise(
140
+ "", "", [arg0, arg1, arg2], {
141
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rn", core.dtype("fp32")),
142
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rn", core.dtype("fp64")),
143
+ }, is_pure=True, _semantic=_semantic)
144
+
145
+
146
+ @core.extern
147
+ def fma_rz(arg0, arg1, arg2, _semantic=None):
148
+ return core.extern_elementwise(
149
+ "", "", [arg0, arg1, arg2], {
150
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rz", core.dtype("fp32")),
151
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rz", core.dtype("fp64")),
152
+ }, is_pure=True, _semantic=_semantic)
153
+
154
+
155
+ @core.extern
156
+ def fma_rd(arg0, arg1, arg2, _semantic=None):
157
+ return core.extern_elementwise(
158
+ "", "", [arg0, arg1, arg2], {
159
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rd", core.dtype("fp32")),
160
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rd", core.dtype("fp64")),
161
+ }, is_pure=True, _semantic=_semantic)
162
+
163
+
164
+ @core.extern
165
+ def fma_ru(arg0, arg1, arg2, _semantic=None):
166
+ return core.extern_elementwise(
167
+ "", "", [arg0, arg1, arg2], {
168
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_ru", core.dtype("fp32")),
169
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_ru", core.dtype("fp64")),
170
+ }, is_pure=True, _semantic=_semantic)
171
+
172
+
173
+ @core.extern
174
+ def fast_dividef(arg0, arg1, _semantic=None):
175
+ return core.extern_elementwise("", "", [arg0, arg1], {
176
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fast_fdividef", core.dtype("fp32")),
177
+ }, is_pure=True, _semantic=_semantic)
178
+
179
+
180
+ @core.extern
181
+ def div_rn(arg0, arg1, _semantic=None):
182
+ return core.extern_elementwise(
183
+ "", "", [arg0, arg1], {
184
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rn", core.dtype("fp32")),
185
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rn", core.dtype("fp64")),
186
+ }, is_pure=True, _semantic=_semantic)
187
+
188
+
189
+ @core.extern
190
+ def div_rz(arg0, arg1, _semantic=None):
191
+ return core.extern_elementwise(
192
+ "", "", [arg0, arg1], {
193
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rz", core.dtype("fp32")),
194
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rz", core.dtype("fp64")),
195
+ }, is_pure=True, _semantic=_semantic)
196
+
197
+
198
+ @core.extern
199
+ def div_rd(arg0, arg1, _semantic=None):
200
+ return core.extern_elementwise(
201
+ "", "", [arg0, arg1], {
202
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rd", core.dtype("fp32")),
203
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rd", core.dtype("fp64")),
204
+ }, is_pure=True, _semantic=_semantic)
205
+
206
+
207
+ @core.extern
208
+ def div_ru(arg0, arg1, _semantic=None):
209
+ return core.extern_elementwise(
210
+ "", "", [arg0, arg1], {
211
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_ru", core.dtype("fp32")),
212
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_ru", core.dtype("fp64")),
213
+ }, is_pure=True, _semantic=_semantic)
214
+
215
+
216
+ @core.extern
217
+ def rcp_rn(arg0, _semantic=None):
218
+ return core.extern_elementwise(
219
+ "", "", [arg0], {
220
+ (core.dtype("fp32"), ): ("__nv_frcp_rn", core.dtype("fp32")),
221
+ (core.dtype("fp64"), ): ("__nv_drcp_rn", core.dtype("fp64")),
222
+ }, is_pure=True, _semantic=_semantic)
223
+
224
+
225
+ @core.extern
226
+ def rcp_rz(arg0, _semantic=None):
227
+ return core.extern_elementwise(
228
+ "", "", [arg0], {
229
+ (core.dtype("fp32"), ): ("__nv_frcp_rz", core.dtype("fp32")),
230
+ (core.dtype("fp64"), ): ("__nv_drcp_rz", core.dtype("fp64")),
231
+ }, is_pure=True, _semantic=_semantic)
232
+
233
+
234
+ @core.extern
235
+ def rcp_rd(arg0, _semantic=None):
236
+ return core.extern_elementwise(
237
+ "", "", [arg0], {
238
+ (core.dtype("fp32"), ): ("__nv_frcp_rd", core.dtype("fp32")),
239
+ (core.dtype("fp64"), ): ("__nv_drcp_rd", core.dtype("fp64")),
240
+ }, is_pure=True, _semantic=_semantic)
241
+
242
+
243
+ @core.extern
244
+ def rcp_ru(arg0, _semantic=None):
245
+ return core.extern_elementwise(
246
+ "", "", [arg0], {
247
+ (core.dtype("fp32"), ): ("__nv_frcp_ru", core.dtype("fp32")),
248
+ (core.dtype("fp64"), ): ("__nv_drcp_ru", core.dtype("fp64")),
249
+ }, is_pure=True, _semantic=_semantic)
250
+
251
+
252
+ @core.extern
253
+ def sqrt_rn(arg0, _semantic=None):
254
+ return core.extern_elementwise(
255
+ "", "", [arg0], {
256
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rn", core.dtype("fp32")),
257
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rn", core.dtype("fp64")),
258
+ }, is_pure=True, _semantic=_semantic)
259
+
260
+
261
+ @core.extern
262
+ def sqrt_rz(arg0, _semantic=None):
263
+ return core.extern_elementwise(
264
+ "", "", [arg0], {
265
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rz", core.dtype("fp32")),
266
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rz", core.dtype("fp64")),
267
+ }, is_pure=True, _semantic=_semantic)
268
+
269
+
270
+ @core.extern
271
+ def sqrt_rd(arg0, _semantic=None):
272
+ return core.extern_elementwise(
273
+ "", "", [arg0], {
274
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rd", core.dtype("fp32")),
275
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rd", core.dtype("fp64")),
276
+ }, is_pure=True, _semantic=_semantic)
277
+
278
+
279
+ @core.extern
280
+ def sqrt_ru(arg0, _semantic=None):
281
+ return core.extern_elementwise(
282
+ "", "", [arg0], {
283
+ (core.dtype("fp32"), ): ("__nv_fsqrt_ru", core.dtype("fp32")),
284
+ (core.dtype("fp64"), ): ("__nv_dsqrt_ru", core.dtype("fp64")),
285
+ }, is_pure=True, _semantic=_semantic)
286
+
287
+
288
+ @core.extern
289
+ def sqrt(arg0, _semantic=None):
290
+ return core.extern_elementwise(
291
+ "", "", [arg0], {
292
+ (core.dtype("fp32"), ): ("__nv_sqrtf", core.dtype("fp32")),
293
+ (core.dtype("fp64"), ): ("__nv_sqrt", core.dtype("fp64")),
294
+ }, is_pure=True, _semantic=_semantic)
295
+
296
+
297
+ @core.extern
298
+ def add_rn(arg0, arg1, _semantic=None):
299
+ return core.extern_elementwise(
300
+ "", "", [arg0, arg1], {
301
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rn", core.dtype("fp64")),
302
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rn", core.dtype("fp32")),
303
+ }, is_pure=True, _semantic=_semantic)
304
+
305
+
306
+ @core.extern
307
+ def add_rz(arg0, arg1, _semantic=None):
308
+ return core.extern_elementwise(
309
+ "", "", [arg0, arg1], {
310
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rz", core.dtype("fp64")),
311
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rz", core.dtype("fp32")),
312
+ }, is_pure=True, _semantic=_semantic)
313
+
314
+
315
+ @core.extern
316
+ def add_rd(arg0, arg1, _semantic=None):
317
+ return core.extern_elementwise(
318
+ "", "", [arg0, arg1], {
319
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rd", core.dtype("fp64")),
320
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rd", core.dtype("fp32")),
321
+ }, is_pure=True, _semantic=_semantic)
322
+
323
+
324
+ @core.extern
325
+ def add_ru(arg0, arg1, _semantic=None):
326
+ return core.extern_elementwise(
327
+ "", "", [arg0, arg1], {
328
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_ru", core.dtype("fp64")),
329
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_ru", core.dtype("fp32")),
330
+ }, is_pure=True, _semantic=_semantic)
331
+
332
+
333
+ @core.extern
334
+ def mul_rn(arg0, arg1, _semantic=None):
335
+ return core.extern_elementwise(
336
+ "", "", [arg0, arg1], {
337
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rn", core.dtype("fp64")),
338
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rn", core.dtype("fp32")),
339
+ }, is_pure=True, _semantic=_semantic)
340
+
341
+
342
+ @core.extern
343
+ def mul_rz(arg0, arg1, _semantic=None):
344
+ return core.extern_elementwise(
345
+ "", "", [arg0, arg1], {
346
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rz", core.dtype("fp64")),
347
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rz", core.dtype("fp32")),
348
+ }, is_pure=True, _semantic=_semantic)
349
+
350
+
351
+ @core.extern
352
+ def mul_rd(arg0, arg1, _semantic=None):
353
+ return core.extern_elementwise(
354
+ "", "", [arg0, arg1], {
355
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rd", core.dtype("fp64")),
356
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rd", core.dtype("fp32")),
357
+ }, is_pure=True, _semantic=_semantic)
358
+
359
+
360
+ @core.extern
361
+ def mul_ru(arg0, arg1, _semantic=None):
362
+ return core.extern_elementwise(
363
+ "", "", [
364
+ arg0,
365
+ arg1,
366
+ ], {
367
+ (
368
+ core.dtype("fp64"),
369
+ core.dtype("fp64"),
370
+ ): ("__nv_dmul_ru", core.dtype("fp64")),
371
+ (
372
+ core.dtype("fp32"),
373
+ core.dtype("fp32"),
374
+ ): ("__nv_fmul_ru", core.dtype("fp32")),
375
+ }, is_pure=True, _semantic=_semantic)
376
+
377
+
378
+ @core.extern
379
+ def double2float_rn(arg0, _semantic=None):
380
+ return core.extern_elementwise("", "", [arg0], {
381
+ (core.dtype("fp64"), ): ("__nv_double2float_rn", core.dtype("fp32")),
382
+ }, is_pure=True, _semantic=_semantic)
383
+
384
+
385
+ @core.extern
386
+ def double2float_rz(arg0, _semantic=None):
387
+ return core.extern_elementwise("", "", [arg0], {
388
+ (core.dtype("fp64"), ): ("__nv_double2float_rz", core.dtype("fp32")),
389
+ }, is_pure=True, _semantic=_semantic)
390
+
391
+
392
+ @core.extern
393
+ def double2float_rd(arg0, _semantic=None):
394
+ return core.extern_elementwise("", "", [arg0], {
395
+ (core.dtype("fp64"), ): ("__nv_double2float_rd", core.dtype("fp32")),
396
+ }, is_pure=True, _semantic=_semantic)
397
+
398
+
399
+ @core.extern
400
+ def double2float_ru(arg0, _semantic=None):
401
+ return core.extern_elementwise("", "", [arg0], {
402
+ (core.dtype("fp64"), ): ("__nv_double2float_ru", core.dtype("fp32")),
403
+ }, is_pure=True, _semantic=_semantic)
404
+
405
+
406
+ @core.extern
407
+ def double2int_rn(arg0, _semantic=None):
408
+ return core.extern_elementwise("", "", [arg0], {
409
+ (core.dtype("fp64"), ): ("__nv_double2int_rn", core.dtype("int32")),
410
+ }, is_pure=True, _semantic=_semantic)
411
+
412
+
413
+ @core.extern
414
+ def double2int_rz(arg0, _semantic=None):
415
+ return core.extern_elementwise("", "", [arg0], {
416
+ (core.dtype("fp64"), ): ("__nv_double2int_rz", core.dtype("int32")),
417
+ }, is_pure=True, _semantic=_semantic)
418
+
419
+
420
+ @core.extern
421
+ def double2int_rd(arg0, _semantic=None):
422
+ return core.extern_elementwise("", "", [arg0], {
423
+ (core.dtype("fp64"), ): ("__nv_double2int_rd", core.dtype("int32")),
424
+ }, is_pure=True, _semantic=_semantic)
425
+
426
+
427
+ @core.extern
428
+ def double2int_ru(arg0, _semantic=None):
429
+ return core.extern_elementwise("", "", [arg0], {
430
+ (core.dtype("fp64"), ): ("__nv_double2int_ru", core.dtype("int32")),
431
+ }, is_pure=True, _semantic=_semantic)
432
+
433
+
434
+ @core.extern
435
+ def double2uint_rn(arg0, _semantic=None):
436
+ return core.extern_elementwise("", "", [arg0], {
437
+ (core.dtype("fp64"), ): ("__nv_double2uint_rn", core.dtype("int32")),
438
+ }, is_pure=True, _semantic=_semantic)
439
+
440
+
441
+ @core.extern
442
+ def double2uint_rz(arg0, _semantic=None):
443
+ return core.extern_elementwise("", "", [arg0], {
444
+ (core.dtype("fp64"), ): ("__nv_double2uint_rz", core.dtype("int32")),
445
+ }, is_pure=True, _semantic=_semantic)
446
+
447
+
448
+ @core.extern
449
+ def double2uint_rd(arg0, _semantic=None):
450
+ return core.extern_elementwise("", "", [arg0], {
451
+ (core.dtype("fp64"), ): ("__nv_double2uint_rd", core.dtype("int32")),
452
+ }, is_pure=True, _semantic=_semantic)
453
+
454
+
455
+ @core.extern
456
+ def double2uint_ru(arg0, _semantic=None):
457
+ return core.extern_elementwise("", "", [arg0], {
458
+ (core.dtype("fp64"), ): ("__nv_double2uint_ru", core.dtype("int32")),
459
+ }, is_pure=True, _semantic=_semantic)
460
+
461
+
462
+ @core.extern
463
+ def int2double_rn(arg0, _semantic=None):
464
+ return core.extern_elementwise("", "", [arg0], {
465
+ (core.dtype("int32"), ): ("__nv_int2double_rn", core.dtype("fp64")),
466
+ }, is_pure=True, _semantic=_semantic)
467
+
468
+
469
+ @core.extern
470
+ def uint2double_rn(arg0, _semantic=None):
471
+ return core.extern_elementwise("", "", [arg0], {
472
+ (core.dtype("uint32"), ): ("__nv_uint2double_rn", core.dtype("fp64")),
473
+ }, is_pure=True, _semantic=_semantic)
474
+
475
+
476
+ @core.extern
477
+ def float2int_rn(arg0, _semantic=None):
478
+ return core.extern_elementwise("", "", [arg0], {
479
+ (core.dtype("fp32"), ): ("__nv_float2int_rn", core.dtype("int32")),
480
+ }, is_pure=True, _semantic=_semantic)
481
+
482
+
483
+ @core.extern
484
+ def float2int_rz(arg0, _semantic=None):
485
+ return core.extern_elementwise("", "", [arg0], {
486
+ (core.dtype("fp32"), ): ("__nv_float2int_rz", core.dtype("int32")),
487
+ }, is_pure=True, _semantic=_semantic)
488
+
489
+
490
+ @core.extern
491
+ def float2int_rd(arg0, _semantic=None):
492
+ return core.extern_elementwise("", "", [arg0], {
493
+ (core.dtype("fp32"), ): ("__nv_float2int_rd", core.dtype("int32")),
494
+ }, is_pure=True, _semantic=_semantic)
495
+
496
+
497
+ @core.extern
498
+ def float2int_ru(arg0, _semantic=None):
499
+ return core.extern_elementwise("", "", [arg0], {
500
+ (core.dtype("fp32"), ): ("__nv_float2int_ru", core.dtype("int32")),
501
+ }, is_pure=True, _semantic=_semantic)
502
+
503
+
504
+ @core.extern
505
+ def float2uint_rn(arg0, _semantic=None):
506
+ return core.extern_elementwise("", "", [arg0], {
507
+ (core.dtype("fp32"), ): ("__nv_float2uint_rn", core.dtype("int32")),
508
+ }, is_pure=True, _semantic=_semantic)
509
+
510
+
511
+ @core.extern
512
+ def float2uint_rz(arg0, _semantic=None):
513
+ return core.extern_elementwise("", "", [arg0], {
514
+ (core.dtype("fp32"), ): ("__nv_float2uint_rz", core.dtype("int32")),
515
+ }, is_pure=True, _semantic=_semantic)
516
+
517
+
518
+ @core.extern
519
+ def float2uint_rd(arg0, _semantic=None):
520
+ return core.extern_elementwise("", "", [arg0], {
521
+ (core.dtype("fp32"), ): ("__nv_float2uint_rd", core.dtype("int32")),
522
+ }, is_pure=True, _semantic=_semantic)
523
+
524
+
525
+ @core.extern
526
+ def float2uint_ru(arg0, _semantic=None):
527
+ return core.extern_elementwise("", "", [arg0], {
528
+ (core.dtype("fp32"), ): ("__nv_float2uint_ru", core.dtype("int32")),
529
+ }, is_pure=True, _semantic=_semantic)
530
+
531
+
532
+ @core.extern
533
+ def int2float_rn(arg0, _semantic=None):
534
+ return core.extern_elementwise("", "", [arg0], {
535
+ (core.dtype("int32"), ): ("__nv_int2float_rn", core.dtype("fp32")),
536
+ }, is_pure=True, _semantic=_semantic)
537
+
538
+
539
+ @core.extern
540
+ def int2float_rz(arg0, _semantic=None):
541
+ return core.extern_elementwise("", "", [arg0], {
542
+ (core.dtype("int32"), ): ("__nv_int2float_rz", core.dtype("fp32")),
543
+ }, is_pure=True, _semantic=_semantic)
544
+
545
+
546
+ @core.extern
547
+ def int2float_rd(arg0, _semantic=None):
548
+ return core.extern_elementwise("", "", [arg0], {
549
+ (core.dtype("int32"), ): ("__nv_int2float_rd", core.dtype("fp32")),
550
+ }, is_pure=True, _semantic=_semantic)
551
+
552
+
553
+ @core.extern
554
+ def int2float_ru(arg0, _semantic=None):
555
+ return core.extern_elementwise("", "", [arg0], {
556
+ (core.dtype("int32"), ): ("__nv_int2float_ru", core.dtype("fp32")),
557
+ }, is_pure=True, _semantic=_semantic)
558
+
559
+
560
+ @core.extern
561
+ def uint2float_rn(arg0, _semantic=None):
562
+ return core.extern_elementwise("", "", [arg0], {
563
+ (core.dtype("uint32"), ): ("__nv_uint2float_rn", core.dtype("fp32")),
564
+ }, is_pure=True, _semantic=_semantic)
565
+
566
+
567
+ @core.extern
568
+ def uint2float_rz(arg0, _semantic=None):
569
+ return core.extern_elementwise("", "", [arg0], {
570
+ (core.dtype("uint32"), ): ("__nv_uint2float_rz", core.dtype("fp32")),
571
+ }, is_pure=True, _semantic=_semantic)
572
+
573
+
574
+ @core.extern
575
+ def uint2float_rd(arg0, _semantic=None):
576
+ return core.extern_elementwise("", "", [arg0], {
577
+ (core.dtype("uint32"), ): ("__nv_uint2float_rd", core.dtype("fp32")),
578
+ }, is_pure=True, _semantic=_semantic)
579
+
580
+
581
+ @core.extern
582
+ def uint2float_ru(arg0, _semantic=None):
583
+ return core.extern_elementwise("", "", [arg0], {
584
+ (core.dtype("uint32"), ): ("__nv_uint2float_ru", core.dtype("fp32")),
585
+ }, is_pure=True, _semantic=_semantic)
586
+
587
+
588
+ @core.extern
589
+ def hiloint2double(arg0, arg1, _semantic=None):
590
+ return core.extern_elementwise("", "", [arg0, arg1], {
591
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_hiloint2double", core.dtype("fp64")),
592
+ }, is_pure=True, _semantic=_semantic)
593
+
594
+
595
+ @core.extern
596
+ def double2loint(arg0, _semantic=None):
597
+ return core.extern_elementwise("", "", [arg0], {
598
+ (core.dtype("fp64"), ): ("__nv_double2loint", core.dtype("int32")),
599
+ }, is_pure=True, _semantic=_semantic)
600
+
601
+
602
+ @core.extern
603
+ def double2hiint(arg0, _semantic=None):
604
+ return core.extern_elementwise("", "", [arg0], {
605
+ (core.dtype("fp64"), ): ("__nv_double2hiint", core.dtype("int32")),
606
+ }, is_pure=True, _semantic=_semantic)
607
+
608
+
609
+ @core.extern
610
+ def float2ll_rn(arg0, _semantic=None):
611
+ return core.extern_elementwise("", "", [arg0], {
612
+ (core.dtype("fp32"), ): ("__nv_float2ll_rn", core.dtype("int64")),
613
+ }, is_pure=True, _semantic=_semantic)
614
+
615
+
616
+ @core.extern
617
+ def float2ll_rz(arg0, _semantic=None):
618
+ return core.extern_elementwise("", "", [arg0], {
619
+ (core.dtype("fp32"), ): ("__nv_float2ll_rz", core.dtype("int64")),
620
+ }, is_pure=True, _semantic=_semantic)
621
+
622
+
623
+ @core.extern
624
+ def float2ll_rd(arg0, _semantic=None):
625
+ return core.extern_elementwise("", "", [arg0], {
626
+ (core.dtype("fp32"), ): ("__nv_float2ll_rd", core.dtype("int64")),
627
+ }, is_pure=True, _semantic=_semantic)
628
+
629
+
630
+ @core.extern
631
+ def float2ll_ru(arg0, _semantic=None):
632
+ return core.extern_elementwise("", "", [arg0], {
633
+ (core.dtype("fp32"), ): ("__nv_float2ll_ru", core.dtype("int64")),
634
+ }, is_pure=True, _semantic=_semantic)
635
+
636
+
637
+ @core.extern
638
+ def float2ull_rn(arg0, _semantic=None):
639
+ return core.extern_elementwise("", "", [arg0], {
640
+ (core.dtype("fp32"), ): ("__nv_float2ull_rn", core.dtype("int64")),
641
+ }, is_pure=True, _semantic=_semantic)
642
+
643
+
644
+ @core.extern
645
+ def float2ull_rz(arg0, _semantic=None):
646
+ return core.extern_elementwise("", "", [arg0], {
647
+ (core.dtype("fp32"), ): ("__nv_float2ull_rz", core.dtype("int64")),
648
+ }, is_pure=True, _semantic=_semantic)
649
+
650
+
651
+ @core.extern
652
+ def float2ull_rd(arg0, _semantic=None):
653
+ return core.extern_elementwise("", "", [arg0], {
654
+ (core.dtype("fp32"), ): ("__nv_float2ull_rd", core.dtype("int64")),
655
+ }, is_pure=True, _semantic=_semantic)
656
+
657
+
658
+ @core.extern
659
+ def float2ull_ru(arg0, _semantic=None):
660
+ return core.extern_elementwise("", "", [arg0], {
661
+ (core.dtype("fp32"), ): ("__nv_float2ull_ru", core.dtype("int64")),
662
+ }, is_pure=True, _semantic=_semantic)
663
+
664
+
665
+ @core.extern
666
+ def double2ll_rn(arg0, _semantic=None):
667
+ return core.extern_elementwise("", "", [arg0], {
668
+ (core.dtype("fp64"), ): ("__nv_double2ll_rn", core.dtype("int64")),
669
+ }, is_pure=True, _semantic=_semantic)
670
+
671
+
672
+ @core.extern
673
+ def double2ll_rz(arg0, _semantic=None):
674
+ return core.extern_elementwise("", "", [arg0], {
675
+ (core.dtype("fp64"), ): ("__nv_double2ll_rz", core.dtype("int64")),
676
+ }, is_pure=True, _semantic=_semantic)
677
+
678
+
679
+ @core.extern
680
+ def double2ll_rd(arg0, _semantic=None):
681
+ return core.extern_elementwise("", "", [arg0], {
682
+ (core.dtype("fp64"), ): ("__nv_double2ll_rd", core.dtype("int64")),
683
+ }, is_pure=True, _semantic=_semantic)
684
+
685
+
686
+ @core.extern
687
+ def double2ll_ru(arg0, _semantic=None):
688
+ return core.extern_elementwise("", "", [arg0], {
689
+ (core.dtype("fp64"), ): ("__nv_double2ll_ru", core.dtype("int64")),
690
+ }, is_pure=True, _semantic=_semantic)
691
+
692
+
693
+ @core.extern
694
+ def double2ull_rn(arg0, _semantic=None):
695
+ return core.extern_elementwise("", "", [arg0], {
696
+ (core.dtype("fp64"), ): ("__nv_double2ull_rn", core.dtype("int64")),
697
+ }, is_pure=True, _semantic=_semantic)
698
+
699
+
700
+ @core.extern
701
+ def double2ull_rz(arg0, _semantic=None):
702
+ return core.extern_elementwise("", "", [arg0], {
703
+ (core.dtype("fp64"), ): ("__nv_double2ull_rz", core.dtype("int64")),
704
+ }, is_pure=True, _semantic=_semantic)
705
+
706
+
707
+ @core.extern
708
+ def double2ull_rd(arg0, _semantic=None):
709
+ return core.extern_elementwise("", "", [arg0], {
710
+ (core.dtype("fp64"), ): ("__nv_double2ull_rd", core.dtype("int64")),
711
+ }, is_pure=True, _semantic=_semantic)
712
+
713
+
714
+ @core.extern
715
+ def double2ull_ru(arg0, _semantic=None):
716
+ return core.extern_elementwise("", "", [arg0], {
717
+ (core.dtype("fp64"), ): ("__nv_double2ull_ru", core.dtype("int64")),
718
+ }, is_pure=True, _semantic=_semantic)
719
+
720
+
721
+ @core.extern
722
+ def ll2float_rn(arg0, _semantic=None):
723
+ return core.extern_elementwise("", "", [arg0], {
724
+ (core.dtype("int64"), ): ("__nv_ll2float_rn", core.dtype("fp32")),
725
+ }, is_pure=True, _semantic=_semantic)
726
+
727
+
728
+ @core.extern
729
+ def ll2float_rz(arg0, _semantic=None):
730
+ return core.extern_elementwise("", "", [arg0], {
731
+ (core.dtype("int64"), ): ("__nv_ll2float_rz", core.dtype("fp32")),
732
+ }, is_pure=True, _semantic=_semantic)
733
+
734
+
735
+ @core.extern
736
+ def ll2float_rd(arg0, _semantic=None):
737
+ return core.extern_elementwise("", "", [arg0], {
738
+ (core.dtype("int64"), ): ("__nv_ll2float_rd", core.dtype("fp32")),
739
+ }, is_pure=True, _semantic=_semantic)
740
+
741
+
742
+ @core.extern
743
+ def ll2float_ru(arg0, _semantic=None):
744
+ return core.extern_elementwise("", "", [arg0], {
745
+ (core.dtype("int64"), ): ("__nv_ll2float_ru", core.dtype("fp32")),
746
+ }, is_pure=True, _semantic=_semantic)
747
+
748
+
749
+ @core.extern
750
+ def ull2float_rn(arg0, _semantic=None):
751
+ return core.extern_elementwise("", "", [arg0], {
752
+ (core.dtype("uint64"), ): ("__nv_ull2float_rn", core.dtype("fp32")),
753
+ }, is_pure=True, _semantic=_semantic)
754
+
755
+
756
+ @core.extern
757
+ def ull2float_rz(arg0, _semantic=None):
758
+ return core.extern_elementwise("", "", [arg0], {
759
+ (core.dtype("uint64"), ): ("__nv_ull2float_rz", core.dtype("fp32")),
760
+ }, is_pure=True, _semantic=_semantic)
761
+
762
+
763
+ @core.extern
764
+ def ull2float_rd(arg0, _semantic=None):
765
+ return core.extern_elementwise("", "", [arg0], {
766
+ (core.dtype("uint64"), ): ("__nv_ull2float_rd", core.dtype("fp32")),
767
+ }, is_pure=True, _semantic=_semantic)
768
+
769
+
770
+ @core.extern
771
+ def ull2float_ru(arg0, _semantic=None):
772
+ return core.extern_elementwise("", "", [arg0], {
773
+ (core.dtype("uint64"), ): ("__nv_ull2float_ru", core.dtype("fp32")),
774
+ }, is_pure=True, _semantic=_semantic)
775
+
776
+
777
+ @core.extern
778
+ def ll2double_rn(arg0, _semantic=None):
779
+ return core.extern_elementwise("", "", [arg0], {
780
+ (core.dtype("int64"), ): ("__nv_ll2double_rn", core.dtype("fp64")),
781
+ }, is_pure=True, _semantic=_semantic)
782
+
783
+
784
+ @core.extern
785
+ def ll2double_rz(arg0, _semantic=None):
786
+ return core.extern_elementwise("", "", [arg0], {
787
+ (core.dtype("int64"), ): ("__nv_ll2double_rz", core.dtype("fp64")),
788
+ }, is_pure=True, _semantic=_semantic)
789
+
790
+
791
+ @core.extern
792
+ def ll2double_rd(arg0, _semantic=None):
793
+ return core.extern_elementwise("", "", [arg0], {
794
+ (core.dtype("int64"), ): ("__nv_ll2double_rd", core.dtype("fp64")),
795
+ }, is_pure=True, _semantic=_semantic)
796
+
797
+
798
+ @core.extern
799
+ def ll2double_ru(arg0, _semantic=None):
800
+ return core.extern_elementwise("", "", [arg0], {
801
+ (core.dtype("int64"), ): ("__nv_ll2double_ru", core.dtype("fp64")),
802
+ }, is_pure=True, _semantic=_semantic)
803
+
804
+
805
+ @core.extern
806
+ def ull2double_rn(arg0, _semantic=None):
807
+ return core.extern_elementwise("", "", [arg0], {
808
+ (core.dtype("uint64"), ): ("__nv_ull2double_rn", core.dtype("fp64")),
809
+ }, is_pure=True, _semantic=_semantic)
810
+
811
+
812
+ @core.extern
813
+ def ull2double_rz(arg0, _semantic=None):
814
+ return core.extern_elementwise("", "", [arg0], {
815
+ (core.dtype("uint64"), ): ("__nv_ull2double_rz", core.dtype("fp64")),
816
+ }, is_pure=True, _semantic=_semantic)
817
+
818
+
819
+ @core.extern
820
+ def ull2double_rd(arg0, _semantic=None):
821
+ return core.extern_elementwise("", "", [arg0], {
822
+ (core.dtype("uint64"), ): ("__nv_ull2double_rd", core.dtype("fp64")),
823
+ }, is_pure=True, _semantic=_semantic)
824
+
825
+
826
+ @core.extern
827
+ def ull2double_ru(arg0, _semantic=None):
828
+ return core.extern_elementwise("", "", [arg0], {
829
+ (core.dtype("uint64"), ): ("__nv_ull2double_ru", core.dtype("fp64")),
830
+ }, is_pure=True, _semantic=_semantic)
831
+
832
+
833
+ @core.extern
834
+ def int_as_float(arg0, _semantic=None):
835
+ return core.extern_elementwise("", "", [arg0], {
836
+ (core.dtype("int32"), ): ("__nv_int_as_float", core.dtype("fp32")),
837
+ }, is_pure=True, _semantic=_semantic)
838
+
839
+
840
+ @core.extern
841
+ def float_as_int(arg0, _semantic=None):
842
+ return core.extern_elementwise("", "", [arg0], {
843
+ (core.dtype("fp32"), ): ("__nv_float_as_int", core.dtype("int32")),
844
+ }, is_pure=True, _semantic=_semantic)
845
+
846
+
847
+ @core.extern
848
+ def uint_as_float(arg0, _semantic=None):
849
+ return core.extern_elementwise("", "", [arg0], {
850
+ (core.dtype("uint32"), ): ("__nv_uint_as_float", core.dtype("fp32")),
851
+ }, is_pure=True, _semantic=_semantic)
852
+
853
+
854
+ @core.extern
855
+ def float_as_uint(arg0, _semantic=None):
856
+ return core.extern_elementwise("", "", [arg0], {
857
+ (core.dtype("fp32"), ): ("__nv_float_as_uint", core.dtype("int32")),
858
+ }, is_pure=True, _semantic=_semantic)
859
+
860
+
861
+ @core.extern
862
+ def longlong_as_double(arg0, _semantic=None):
863
+ return core.extern_elementwise("", "", [arg0], {
864
+ (core.dtype("int64"), ): ("__nv_longlong_as_double", core.dtype("fp64")),
865
+ }, is_pure=True, _semantic=_semantic)
866
+
867
+
868
+ @core.extern
869
+ def double_as_longlong(arg0, _semantic=None):
870
+ return core.extern_elementwise("", "", [arg0], {
871
+ (core.dtype("fp64"), ): ("__nv_double_as_longlong", core.dtype("int64")),
872
+ }, is_pure=True, _semantic=_semantic)
873
+
874
+
875
+ @core.extern
876
+ def fast_sinf(arg0, _semantic=None):
877
+ return core.extern_elementwise("", "", [arg0], {
878
+ (core.dtype("fp32"), ): ("__nv_fast_sinf", core.dtype("fp32")),
879
+ }, is_pure=True, _semantic=_semantic)
880
+
881
+
882
+ @core.extern
883
+ def fast_cosf(arg0, _semantic=None):
884
+ return core.extern_elementwise("", "", [arg0], {
885
+ (core.dtype("fp32"), ): ("__nv_fast_cosf", core.dtype("fp32")),
886
+ }, is_pure=True, _semantic=_semantic)
887
+
888
+
889
+ @core.extern
890
+ def fast_log2f(arg0, _semantic=None):
891
+ return core.extern_elementwise("", "", [arg0], {
892
+ (core.dtype("fp32"), ): ("__nv_fast_log2f", core.dtype("fp32")),
893
+ }, is_pure=True, _semantic=_semantic)
894
+
895
+
896
+ @core.extern
897
+ def fast_logf(arg0, _semantic=None):
898
+ return core.extern_elementwise("", "", [arg0], {
899
+ (core.dtype("fp32"), ): ("__nv_fast_logf", core.dtype("fp32")),
900
+ }, is_pure=True, _semantic=_semantic)
901
+
902
+
903
+ @core.extern
904
+ def fast_expf(arg0, _semantic=None):
905
+ return core.extern_elementwise("", "", [arg0], {
906
+ (core.dtype("fp32"), ): ("__nv_fast_expf", core.dtype("fp32")),
907
+ }, is_pure=True, _semantic=_semantic)
908
+
909
+
910
+ @core.extern
911
+ def fast_tanf(arg0, _semantic=None):
912
+ return core.extern_elementwise("", "", [arg0], {
913
+ (core.dtype("fp32"), ): ("__nv_fast_tanf", core.dtype("fp32")),
914
+ }, is_pure=True, _semantic=_semantic)
915
+
916
+
917
+ @core.extern
918
+ def fast_exp10f(arg0, _semantic=None):
919
+ return core.extern_elementwise("", "", [arg0], {
920
+ (core.dtype("fp32"), ): ("__nv_fast_exp10f", core.dtype("fp32")),
921
+ }, is_pure=True, _semantic=_semantic)
922
+
923
+
924
+ @core.extern
925
+ def fast_log10f(arg0, _semantic=None):
926
+ return core.extern_elementwise("", "", [arg0], {
927
+ (core.dtype("fp32"), ): ("__nv_fast_log10f", core.dtype("fp32")),
928
+ }, is_pure=True, _semantic=_semantic)
929
+
930
+
931
+ @core.extern
932
+ def fast_powf(arg0, arg1, _semantic=None):
933
+ return core.extern_elementwise("", "", [arg0, arg1], {
934
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fast_powf", core.dtype("fp32")),
935
+ }, is_pure=True, _semantic=_semantic)
936
+
937
+
938
+ @core.extern
939
+ def hadd(arg0, arg1, _semantic=None):
940
+ return core.extern_elementwise(
941
+ "", "", [arg0, arg1], {
942
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_hadd", core.dtype("int32")),
943
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_uhadd", core.dtype("uint32")),
944
+ }, is_pure=True, _semantic=_semantic)
945
+
946
+
947
+ @core.extern
948
+ def rhadd(arg0, arg1, _semantic=None):
949
+ return core.extern_elementwise(
950
+ "", "", [arg0, arg1], {
951
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_rhadd", core.dtype("int32")),
952
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_urhadd", core.dtype("uint32")),
953
+ }, is_pure=True, _semantic=_semantic)
954
+
955
+
956
+ @core.extern
957
+ def sub_rn(arg0, arg1, _semantic=None):
958
+ return core.extern_elementwise(
959
+ "", "", [arg0, arg1], {
960
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rn", core.dtype("fp32")),
961
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rn", core.dtype("fp64")),
962
+ }, is_pure=True, _semantic=_semantic)
963
+
964
+
965
+ @core.extern
966
+ def sub_rz(arg0, arg1, _semantic=None):
967
+ return core.extern_elementwise(
968
+ "", "", [arg0, arg1], {
969
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rz", core.dtype("fp32")),
970
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rz", core.dtype("fp64")),
971
+ }, is_pure=True, _semantic=_semantic)
972
+
973
+
974
+ @core.extern
975
+ def sub_rd(arg0, arg1, _semantic=None):
976
+ return core.extern_elementwise(
977
+ "", "", [arg0, arg1], {
978
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rd", core.dtype("fp32")),
979
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rd", core.dtype("fp64")),
980
+ }, is_pure=True, _semantic=_semantic)
981
+
982
+
983
+ @core.extern
984
+ def sub_ru(arg0, arg1, _semantic=None):
985
+ return core.extern_elementwise(
986
+ "", "", [arg0, arg1], {
987
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_ru", core.dtype("fp32")),
988
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_ru", core.dtype("fp64")),
989
+ }, is_pure=True, _semantic=_semantic)
990
+
991
+
992
+ @core.extern
993
+ def rsqrt_rn(arg0, _semantic=None):
994
+ return core.extern_elementwise("", "", [
995
+ arg0,
996
+ ], {
997
+ (core.dtype("fp32"), ): ("__nv_frsqrt_rn", core.dtype("fp32")),
998
+ }, is_pure=True, _semantic=_semantic)
999
+
1000
+
1001
+ @core.extern
1002
+ def ffs(arg0, _semantic=None):
1003
+ return core.extern_elementwise(
1004
+ "", "", [
1005
+ arg0,
1006
+ ], {
1007
+ (core.dtype("int32"), ): ("__nv_ffs", core.dtype("int32")),
1008
+ (core.dtype("int64"), ): ("__nv_ffsll", core.dtype("int32")),
1009
+ }, is_pure=True, _semantic=_semantic)
1010
+
1011
+
1012
+ @core.extern
1013
+ def rint(arg0, _semantic=None):
1014
+ return core.extern_elementwise(
1015
+ "", "", [
1016
+ arg0,
1017
+ ], {
1018
+ (core.dtype("fp32"), ): ("__nv_rintf", core.dtype("fp32")),
1019
+ (core.dtype("fp64"), ): ("__nv_rint", core.dtype("fp64")),
1020
+ }, is_pure=True, _semantic=_semantic)
1021
+
1022
+
1023
+ @core.extern
1024
+ def llrint(arg0, _semantic=None):
1025
+ return core.extern_elementwise(
1026
+ "", "", [
1027
+ arg0,
1028
+ ], {
1029
+ (core.dtype("fp32"), ): ("__nv_llrintf", core.dtype("int64")),
1030
+ (core.dtype("fp64"), ): ("__nv_llrint", core.dtype("int64")),
1031
+ }, is_pure=True, _semantic=_semantic)
1032
+
1033
+
1034
+ @core.extern
1035
+ def nearbyint(arg0, _semantic=None):
1036
+ return core.extern_elementwise(
1037
+ "", "", [
1038
+ arg0,
1039
+ ], {
1040
+ (core.dtype("fp32"), ): ("__nv_nearbyintf", core.dtype("fp32")),
1041
+ (core.dtype("fp64"), ): ("__nv_nearbyint", core.dtype("fp64")),
1042
+ }, is_pure=True, _semantic=_semantic)
1043
+
1044
+
1045
+ @core.extern
1046
+ def isnan(arg0, _semantic=None):
1047
+ return core.extern_elementwise(
1048
+ "", "", [
1049
+ arg0,
1050
+ ], {
1051
+ (core.dtype("fp32"), ): ("__nv_isnanf", core.dtype("int32")),
1052
+ (core.dtype("fp64"), ): ("__nv_isnand", core.dtype("int32")),
1053
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
1054
+
1055
+
1056
+ @core.extern
1057
+ def signbit(arg0, _semantic=None):
1058
+ return core.extern_elementwise(
1059
+ "", "", [
1060
+ arg0,
1061
+ ], {
1062
+ (core.dtype("fp32"), ): ("__nv_signbitf", core.dtype("int32")),
1063
+ (core.dtype("fp64"), ): ("__nv_signbitd", core.dtype("int32")),
1064
+ }, is_pure=True, _semantic=_semantic)
1065
+
1066
+
1067
+ @core.extern
1068
+ def copysign(arg0, arg1, _semantic=None):
1069
+ return core.extern_elementwise(
1070
+ "", "", [arg0, arg1], {
1071
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_copysignf", core.dtype("fp32")),
1072
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_copysign", core.dtype("fp64")),
1073
+ }, is_pure=True, _semantic=_semantic)
1074
+
1075
+
1076
+ @core.extern
1077
+ def finitef(arg0, _semantic=None):
1078
+ return core.extern_elementwise("", "", [arg0], {
1079
+ (core.dtype("fp32"), ): ("__nv_finitef", core.dtype("int32")),
1080
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
1081
+
1082
+
1083
+ @core.extern
1084
+ def isinf(arg0, _semantic=None):
1085
+ return core.extern_elementwise(
1086
+ "", "", [arg0], {
1087
+ (core.dtype("fp32"), ): ("__nv_isinff", core.dtype("int32")),
1088
+ (core.dtype("fp64"), ): ("__nv_isinfd", core.dtype("int32")),
1089
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
1090
+
1091
+
1092
+ @core.extern
1093
+ def nextafter(arg0, arg1, _semantic=None):
1094
+ return core.extern_elementwise(
1095
+ "", "", [arg0, arg1], {
1096
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_nextafterf", core.dtype("fp32")),
1097
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_nextafter", core.dtype("fp64")),
1098
+ }, is_pure=True, _semantic=_semantic)
1099
+
1100
+
1101
+ @core.extern
1102
+ def sin(arg0, _semantic=None):
1103
+ return core.extern_elementwise(
1104
+ "", "", [arg0], {
1105
+ (core.dtype("fp32"), ): ("__nv_sinf", core.dtype("fp32")),
1106
+ (core.dtype("fp64"), ): ("__nv_sin", core.dtype("fp64")),
1107
+ }, is_pure=True, _semantic=_semantic)
1108
+
1109
+
1110
+ @core.extern
1111
+ def cos(arg0, _semantic=None):
1112
+ return core.extern_elementwise(
1113
+ "", "", [arg0], {
1114
+ (core.dtype("fp32"), ): ("__nv_cosf", core.dtype("fp32")),
1115
+ (core.dtype("fp64"), ): ("__nv_cos", core.dtype("fp64")),
1116
+ }, is_pure=True, _semantic=_semantic)
1117
+
1118
+
1119
+ @core.extern
1120
+ def sinpi(arg0, _semantic=None):
1121
+ return core.extern_elementwise(
1122
+ "", "", [arg0], {
1123
+ (core.dtype("fp32"), ): ("__nv_sinpif", core.dtype("fp32")),
1124
+ (core.dtype("fp64"), ): ("__nv_sinpi", core.dtype("fp64")),
1125
+ }, is_pure=True, _semantic=_semantic)
1126
+
1127
+
1128
+ @core.extern
1129
+ def cospi(arg0, _semantic=None):
1130
+ return core.extern_elementwise(
1131
+ "", "", [arg0], {
1132
+ (core.dtype("fp32"), ): ("__nv_cospif", core.dtype("fp32")),
1133
+ (core.dtype("fp64"), ): ("__nv_cospi", core.dtype("fp64")),
1134
+ }, is_pure=True, _semantic=_semantic)
1135
+
1136
+
1137
+ @core.extern
1138
+ def tan(arg0, _semantic=None):
1139
+ return core.extern_elementwise(
1140
+ "", "", [arg0], {
1141
+ (core.dtype("fp32"), ): ("__nv_tanf", core.dtype("fp32")),
1142
+ (core.dtype("fp64"), ): ("__nv_tan", core.dtype("fp64")),
1143
+ }, is_pure=True, _semantic=_semantic)
1144
+
1145
+
1146
+ @core.extern
1147
+ def log2(arg0, _semantic=None):
1148
+ return core.extern_elementwise(
1149
+ "", "", [arg0], {
1150
+ (core.dtype("fp32"), ): ("__nv_log2f", core.dtype("fp32")),
1151
+ (core.dtype("fp64"), ): ("__nv_log2", core.dtype("fp64")),
1152
+ }, is_pure=True, _semantic=_semantic)
1153
+
1154
+
1155
+ @core.extern
1156
+ def exp(arg0, _semantic=None):
1157
+ return core.extern_elementwise(
1158
+ "", "", [arg0], {
1159
+ (core.dtype("fp32"), ): ("__nv_expf", core.dtype("fp32")),
1160
+ (core.dtype("fp64"), ): ("__nv_exp", core.dtype("fp64")),
1161
+ }, is_pure=True, _semantic=_semantic)
1162
+
1163
+
1164
+ @core.extern
1165
+ def exp10(arg0, _semantic=None):
1166
+ return core.extern_elementwise(
1167
+ "", "", [arg0], {
1168
+ (core.dtype("fp32"), ): ("__nv_exp10f", core.dtype("fp32")),
1169
+ (core.dtype("fp64"), ): ("__nv_exp10", core.dtype("fp64")),
1170
+ }, is_pure=True, _semantic=_semantic)
1171
+
1172
+
1173
+ @core.extern
1174
+ def cosh(arg0, _semantic=None):
1175
+ return core.extern_elementwise(
1176
+ "", "", [arg0], {
1177
+ (core.dtype("fp32"), ): ("__nv_coshf", core.dtype("fp32")),
1178
+ (core.dtype("fp64"), ): ("__nv_cosh", core.dtype("fp64")),
1179
+ }, is_pure=True, _semantic=_semantic)
1180
+
1181
+
1182
+ @core.extern
1183
+ def sinh(arg0, _semantic=None):
1184
+ return core.extern_elementwise(
1185
+ "", "", [arg0], {
1186
+ (core.dtype("fp32"), ): ("__nv_sinhf", core.dtype("fp32")),
1187
+ (core.dtype("fp64"), ): ("__nv_sinh", core.dtype("fp64")),
1188
+ }, is_pure=True, _semantic=_semantic)
1189
+
1190
+
1191
+ @core.extern
1192
+ def tanh(arg0, _semantic=None):
1193
+ return core.extern_elementwise(
1194
+ "", "", [arg0], {
1195
+ (core.dtype("fp32"), ): ("__nv_tanhf", core.dtype("fp32")),
1196
+ (core.dtype("fp64"), ): ("__nv_tanh", core.dtype("fp64")),
1197
+ }, is_pure=True, _semantic=_semantic)
1198
+
1199
+
1200
+ @core.extern
1201
+ def atan2(arg0, arg1, _semantic=None):
1202
+ return core.extern_elementwise(
1203
+ "", "", [arg0, arg1], {
1204
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_atan2f", core.dtype("fp32")),
1205
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_atan2", core.dtype("fp64")),
1206
+ }, is_pure=True, _semantic=_semantic)
1207
+
1208
+
1209
+ @core.extern
1210
+ def atan(arg0, _semantic=None):
1211
+ return core.extern_elementwise(
1212
+ "", "", [arg0], {
1213
+ (core.dtype("fp32"), ): ("__nv_atanf", core.dtype("fp32")),
1214
+ (core.dtype("fp64"), ): ("__nv_atan", core.dtype("fp64")),
1215
+ }, is_pure=True, _semantic=_semantic)
1216
+
1217
+
1218
+ @core.extern
1219
+ def asin(arg0, _semantic=None):
1220
+ return core.extern_elementwise(
1221
+ "", "", [arg0], {
1222
+ (core.dtype("fp32"), ): ("__nv_asinf", core.dtype("fp32")),
1223
+ (core.dtype("fp64"), ): ("__nv_asin", core.dtype("fp64")),
1224
+ }, is_pure=True, _semantic=_semantic)
1225
+
1226
+
1227
+ @core.extern
1228
+ def acos(arg0, _semantic=None):
1229
+ return core.extern_elementwise(
1230
+ "", "", [arg0], {
1231
+ (core.dtype("fp32"), ): ("__nv_acosf", core.dtype("fp32")),
1232
+ (core.dtype("fp64"), ): ("__nv_acos", core.dtype("fp64")),
1233
+ }, is_pure=True, _semantic=_semantic)
1234
+
1235
+
1236
+ @core.extern
1237
+ def log(arg0, _semantic=None):
1238
+ return core.extern_elementwise(
1239
+ "", "", [arg0], {
1240
+ (core.dtype("fp32"), ): ("__nv_logf", core.dtype("fp32")),
1241
+ (core.dtype("fp64"), ): ("__nv_log", core.dtype("fp64")),
1242
+ }, is_pure=True, _semantic=_semantic)
1243
+
1244
+
1245
+ @core.extern
1246
+ def log10(arg0, _semantic=None):
1247
+ return core.extern_elementwise(
1248
+ "", "", [arg0], {
1249
+ (core.dtype("fp32"), ): ("__nv_log10f", core.dtype("fp32")),
1250
+ (core.dtype("fp64"), ): ("__nv_log10", core.dtype("fp64")),
1251
+ }, is_pure=True, _semantic=_semantic)
1252
+
1253
+
1254
+ @core.extern
1255
+ def log1p(arg0, _semantic=None):
1256
+ return core.extern_elementwise(
1257
+ "", "", [arg0], {
1258
+ (core.dtype("fp32"), ): ("__nv_log1pf", core.dtype("fp32")),
1259
+ (core.dtype("fp64"), ): ("__nv_log1p", core.dtype("fp64")),
1260
+ }, is_pure=True, _semantic=_semantic)
1261
+
1262
+
1263
+ @core.extern
1264
+ def acosh(arg0, _semantic=None):
1265
+ return core.extern_elementwise(
1266
+ "", "", [arg0], {
1267
+ (core.dtype("fp32"), ): ("__nv_acoshf", core.dtype("fp32")),
1268
+ (core.dtype("fp64"), ): ("__nv_acosh", core.dtype("fp64")),
1269
+ }, is_pure=True, _semantic=_semantic)
1270
+
1271
+
1272
+ @core.extern
1273
+ def asinh(arg0, _semantic=None):
1274
+ return core.extern_elementwise(
1275
+ "", "", [arg0], {
1276
+ (core.dtype("fp32"), ): ("__nv_asinhf", core.dtype("fp32")),
1277
+ (core.dtype("fp64"), ): ("__nv_asinh", core.dtype("fp64")),
1278
+ }, is_pure=True, _semantic=_semantic)
1279
+
1280
+
1281
+ @core.extern
1282
+ def atanh(arg0, _semantic=None):
1283
+ return core.extern_elementwise(
1284
+ "", "", [arg0], {
1285
+ (core.dtype("fp32"), ): ("__nv_atanhf", core.dtype("fp32")),
1286
+ (core.dtype("fp64"), ): ("__nv_atanh", core.dtype("fp64")),
1287
+ }, is_pure=True, _semantic=_semantic)
1288
+
1289
+
1290
+ @core.extern
1291
+ def expm1(arg0, _semantic=None):
1292
+ return core.extern_elementwise(
1293
+ "", "", [arg0], {
1294
+ (core.dtype("fp32"), ): ("__nv_expm1f", core.dtype("fp32")),
1295
+ (core.dtype("fp64"), ): ("__nv_expm1", core.dtype("fp64")),
1296
+ }, is_pure=True, _semantic=_semantic)
1297
+
1298
+
1299
+ @core.extern
1300
+ def hypot(arg0, arg1, _semantic=None):
1301
+ return core.extern_elementwise(
1302
+ "", "", [arg0, arg1], {
1303
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_hypotf", core.dtype("fp32")),
1304
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_hypot", core.dtype("fp64")),
1305
+ }, is_pure=True, _semantic=_semantic)
1306
+
1307
+
1308
+ @core.extern
1309
+ def rhypot(arg0, arg1, _semantic=None):
1310
+ return core.extern_elementwise(
1311
+ "", "", [arg0, arg1], {
1312
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_rhypotf", core.dtype("fp32")),
1313
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_rhypot", core.dtype("fp64")),
1314
+ }, is_pure=True, _semantic=_semantic)
1315
+
1316
+
1317
+ @core.extern
1318
+ def norm3d(arg0, arg1, arg2, _semantic=None):
1319
+ return core.extern_elementwise(
1320
+ "", "", [arg0, arg1, arg2], {
1321
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_norm3df", core.dtype("fp32")),
1322
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_norm3d", core.dtype("fp64")),
1323
+ }, is_pure=True, _semantic=_semantic)
1324
+
1325
+
1326
+ @core.extern
1327
+ def rnorm3d(arg0, arg1, arg2, _semantic=None):
1328
+ return core.extern_elementwise(
1329
+ "", "", [arg0, arg1, arg2], {
1330
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_rnorm3df", core.dtype("fp32")),
1331
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_rnorm3d", core.dtype("fp64")),
1332
+ }, is_pure=True, _semantic=_semantic)
1333
+
1334
+
1335
+ @core.extern
1336
+ def norm4d(arg0, arg1, arg2, arg3, _semantic=None):
1337
+ return core.extern_elementwise(
1338
+ "", "", [arg0, arg1, arg2, arg3], {
1339
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")):
1340
+ ("__nv_norm4df", core.dtype("fp32")),
1341
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")):
1342
+ ("__nv_norm4d", core.dtype("fp64")),
1343
+ }, is_pure=True, _semantic=_semantic)
1344
+
1345
+
1346
+ @core.extern
1347
+ def rnorm4d(arg0, arg1, arg2, arg3, _semantic=None):
1348
+ return core.extern_elementwise(
1349
+ "", "", [arg0, arg1, arg2, arg3], {
1350
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")):
1351
+ ("__nv_rnorm4df", core.dtype("fp32")),
1352
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")):
1353
+ ("__nv_rnorm4d", core.dtype("fp64")),
1354
+ }, is_pure=True, _semantic=_semantic)
1355
+
1356
+
1357
+ @core.extern
1358
+ def cbrt(arg0, _semantic=None):
1359
+ return core.extern_elementwise(
1360
+ "", "", [arg0], {
1361
+ (core.dtype("fp32"), ): ("__nv_cbrtf", core.dtype("fp32")),
1362
+ (core.dtype("fp64"), ): ("__nv_cbrt", core.dtype("fp64")),
1363
+ }, is_pure=True, _semantic=_semantic)
1364
+
1365
+
1366
+ @core.extern
1367
+ def rcbrt(arg0, _semantic=None):
1368
+ return core.extern_elementwise(
1369
+ "", "", [arg0], {
1370
+ (core.dtype("fp32"), ): ("__nv_rcbrtf", core.dtype("fp32")),
1371
+ (core.dtype("fp64"), ): ("__nv_rcbrt", core.dtype("fp64")),
1372
+ }, is_pure=True, _semantic=_semantic)
1373
+
1374
+
1375
+ @core.extern
1376
+ def j0(arg0, _semantic=None):
1377
+ return core.extern_elementwise("", "", [arg0], {
1378
+ (core.dtype("fp32"), ): ("__nv_j0f", core.dtype("fp32")),
1379
+ (core.dtype("fp64"), ): ("__nv_j0", core.dtype("fp64")),
1380
+ }, is_pure=True, _semantic=_semantic)
1381
+
1382
+
1383
+ @core.extern
1384
+ def j1(arg0, _semantic=None):
1385
+ return core.extern_elementwise("", "", [arg0], {
1386
+ (core.dtype("fp32"), ): ("__nv_j1f", core.dtype("fp32")),
1387
+ (core.dtype("fp64"), ): ("__nv_j1", core.dtype("fp64")),
1388
+ }, is_pure=True, _semantic=_semantic)
1389
+
1390
+
1391
+ @core.extern
1392
+ def y0(arg0, _semantic=None):
1393
+ return core.extern_elementwise("", "", [arg0], {
1394
+ (core.dtype("fp32"), ): ("__nv_y0f", core.dtype("fp32")),
1395
+ (core.dtype("fp64"), ): ("__nv_y0", core.dtype("fp64")),
1396
+ }, is_pure=True, _semantic=_semantic)
1397
+
1398
+
1399
+ @core.extern
1400
+ def y1(arg0, _semantic=None):
1401
+ return core.extern_elementwise("", "", [arg0], {
1402
+ (core.dtype("fp32"), ): ("__nv_y1f", core.dtype("fp32")),
1403
+ (core.dtype("fp64"), ): ("__nv_y1", core.dtype("fp64")),
1404
+ }, is_pure=True, _semantic=_semantic)
1405
+
1406
+
1407
+ @core.extern
1408
+ def yn(arg0, arg1, _semantic=None):
1409
+ return core.extern_elementwise(
1410
+ "", "", [arg0, arg1], {
1411
+ (core.dtype("int32"), core.dtype("fp32")): ("__nv_ynf", core.dtype("fp32")),
1412
+ (core.dtype("int32"), core.dtype("fp64")): ("__nv_yn", core.dtype("fp64")),
1413
+ }, is_pure=True, _semantic=_semantic)
1414
+
1415
+
1416
+ @core.extern
1417
+ def jn(arg0, arg1, _semantic=None):
1418
+ return core.extern_elementwise(
1419
+ "", "", [arg0, arg1], {
1420
+ (core.dtype("int32"), core.dtype("fp32")): ("__nv_jnf", core.dtype("fp32")),
1421
+ (core.dtype("int32"), core.dtype("fp64")): ("__nv_jn", core.dtype("fp64")),
1422
+ }, is_pure=True, _semantic=_semantic)
1423
+
1424
+
1425
+ @core.extern
1426
+ def cyl_bessel_i0(arg0, _semantic=None):
1427
+ return core.extern_elementwise(
1428
+ "", "", [arg0], {
1429
+ (core.dtype("fp32"), ): ("__nv_cyl_bessel_i0f", core.dtype("fp32")),
1430
+ (core.dtype("fp64"), ): ("__nv_cyl_bessel_i0", core.dtype("fp64")),
1431
+ }, is_pure=True, _semantic=_semantic)
1432
+
1433
+
1434
+ @core.extern
1435
+ def cyl_bessel_i1(arg0, _semantic=None):
1436
+ return core.extern_elementwise(
1437
+ "", "", [arg0], {
1438
+ (core.dtype("fp32"), ): ("__nv_cyl_bessel_i1f", core.dtype("fp32")),
1439
+ (core.dtype("fp64"), ): ("__nv_cyl_bessel_i1", core.dtype("fp64")),
1440
+ }, is_pure=True, _semantic=_semantic)
1441
+
1442
+
1443
+ @core.extern
1444
+ def erf(arg0, _semantic=None):
1445
+ return core.extern_elementwise(
1446
+ "", "", [arg0], {
1447
+ (core.dtype("fp32"), ): ("__nv_erff", core.dtype("fp32")),
1448
+ (core.dtype("fp64"), ): ("__nv_erf", core.dtype("fp64")),
1449
+ }, is_pure=True, _semantic=_semantic)
1450
+
1451
+
1452
+ @core.extern
1453
+ def erfinv(arg0, _semantic=None):
1454
+ return core.extern_elementwise(
1455
+ "", "", [arg0], {
1456
+ (core.dtype("fp32"), ): ("__nv_erfinvf", core.dtype("fp32")),
1457
+ (core.dtype("fp64"), ): ("__nv_erfinv", core.dtype("fp64")),
1458
+ }, is_pure=True, _semantic=_semantic)
1459
+
1460
+
1461
+ @core.extern
1462
+ def erfc(arg0, _semantic=None):
1463
+ return core.extern_elementwise(
1464
+ "", "", [arg0], {
1465
+ (core.dtype("fp32"), ): ("__nv_erfcf", core.dtype("fp32")),
1466
+ (core.dtype("fp64"), ): ("__nv_erfc", core.dtype("fp64")),
1467
+ }, is_pure=True, _semantic=_semantic)
1468
+
1469
+
1470
+ @core.extern
1471
+ def erfcx(arg0, _semantic=None):
1472
+ return core.extern_elementwise(
1473
+ "", "", [arg0], {
1474
+ (core.dtype("fp32"), ): ("__nv_erfcxf", core.dtype("fp32")),
1475
+ (core.dtype("fp64"), ): ("__nv_erfcx", core.dtype("fp64")),
1476
+ }, is_pure=True, _semantic=_semantic)
1477
+
1478
+
1479
+ @core.extern
1480
+ def erfcinv(arg0, _semantic=None):
1481
+ return core.extern_elementwise(
1482
+ "", "", [arg0], {
1483
+ (core.dtype("fp32"), ): ("__nv_erfcinvf", core.dtype("fp32")),
1484
+ (core.dtype("fp64"), ): ("__nv_erfcinv", core.dtype("fp64")),
1485
+ }, is_pure=True, _semantic=_semantic)
1486
+
1487
+
1488
+ @core.extern
1489
+ def normcdfinv(arg0, _semantic=None):
1490
+ return core.extern_elementwise(
1491
+ "", "", [arg0], {
1492
+ (core.dtype("fp32"), ): ("__nv_normcdfinvf", core.dtype("fp32")),
1493
+ (core.dtype("fp64"), ): ("__nv_normcdfinv", core.dtype("fp64")),
1494
+ }, is_pure=True, _semantic=_semantic)
1495
+
1496
+
1497
+ @core.extern
1498
+ def normcdf(arg0, _semantic=None):
1499
+ return core.extern_elementwise(
1500
+ "", "", [arg0], {
1501
+ (core.dtype("fp32"), ): ("__nv_normcdff", core.dtype("fp32")),
1502
+ (core.dtype("fp64"), ): ("__nv_normcdf", core.dtype("fp64")),
1503
+ }, is_pure=True, _semantic=_semantic)
1504
+
1505
+
1506
+ @core.extern
1507
+ def lgamma(arg0, _semantic=None):
1508
+ return core.extern_elementwise(
1509
+ "", "", [arg0], {
1510
+ (core.dtype("fp32"), ): ("__nv_lgammaf", core.dtype("fp32")),
1511
+ (core.dtype("fp64"), ): ("__nv_lgamma", core.dtype("fp64")),
1512
+ }, is_pure=True, _semantic=_semantic)
1513
+
1514
+
1515
+ @core.extern
1516
+ def ldexp(arg0, arg1, _semantic=None):
1517
+ return core.extern_elementwise(
1518
+ "", "", [arg0, arg1], {
1519
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_ldexpf", core.dtype("fp32")),
1520
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_ldexp", core.dtype("fp64")),
1521
+ }, is_pure=True, _semantic=_semantic)
1522
+
1523
+
1524
+ @core.extern
1525
+ def scalbn(arg0, arg1, _semantic=None):
1526
+ return core.extern_elementwise(
1527
+ "", "", [arg0, arg1], {
1528
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_scalbnf", core.dtype("fp32")),
1529
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_scalbn", core.dtype("fp64")),
1530
+ }, is_pure=True, _semantic=_semantic)
1531
+
1532
+
1533
+ @core.extern
1534
+ def fmod(arg0, arg1, _semantic=None):
1535
+ return core.extern_elementwise(
1536
+ "", "", [arg0, arg1], {
1537
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmodf", core.dtype("fp32")),
1538
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_fmod", core.dtype("fp64")),
1539
+ }, is_pure=True, _semantic=_semantic)
1540
+
1541
+
1542
+ @core.extern
1543
+ def remainder(arg0, arg1, _semantic=None):
1544
+ return core.extern_elementwise(
1545
+ "", "", [arg0, arg1], {
1546
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_remainderf", core.dtype("fp32")),
1547
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_remainder", core.dtype("fp64")),
1548
+ }, is_pure=True, _semantic=_semantic)
1549
+
1550
+
1551
+ @core.extern
1552
+ def fma(arg0, arg1, arg2, _semantic=None):
1553
+ return core.extern_elementwise(
1554
+ "", "", [arg0, arg1, arg2], {
1555
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf", core.dtype("fp32")),
1556
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma", core.dtype("fp64")),
1557
+ }, is_pure=True, _semantic=_semantic)
1558
+
1559
+
1560
+ @core.extern
1561
+ def pow(arg0, arg1, _semantic=None):
1562
+ return core.extern_elementwise(
1563
+ "", "", [arg0, arg1], {
1564
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_powif", core.dtype("fp32")),
1565
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_powi", core.dtype("fp64")),
1566
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_powf", core.dtype("fp32")),
1567
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_pow", core.dtype("fp64")),
1568
+ }, is_pure=True, _semantic=_semantic)
1569
+
1570
+
1571
+ @core.extern
1572
+ def tgamma(arg0, _semantic=None):
1573
+ return core.extern_elementwise(
1574
+ "", "", [arg0], {
1575
+ (core.dtype("fp32"), ): ("__nv_tgammaf", core.dtype("fp32")),
1576
+ (core.dtype("fp64"), ): ("__nv_tgamma", core.dtype("fp64")),
1577
+ }, is_pure=True, _semantic=_semantic)
1578
+
1579
+
1580
+ @core.extern
1581
+ def round(arg0, _semantic=None):
1582
+ return core.extern_elementwise(
1583
+ "", "", [arg0], {
1584
+ (core.dtype("fp32"), ): ("__nv_roundf", core.dtype("fp32")),
1585
+ (core.dtype("fp64"), ): ("__nv_round", core.dtype("fp64")),
1586
+ }, is_pure=True, _semantic=_semantic)
1587
+
1588
+
1589
+ @core.extern
1590
+ def llround(arg0, _semantic=None):
1591
+ return core.extern_elementwise(
1592
+ "", "", [arg0], {
1593
+ (core.dtype("fp32"), ): ("__nv_llroundf", core.dtype("int64")),
1594
+ (core.dtype("fp64"), ): ("__nv_llround", core.dtype("int64")),
1595
+ }, is_pure=True, _semantic=_semantic)
1596
+
1597
+
1598
+ @core.extern
1599
+ def fdim(arg0, arg1, _semantic=None):
1600
+ return core.extern_elementwise(
1601
+ "", "", [arg0, arg1], {
1602
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdimf", core.dtype("fp32")),
1603
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_fdim", core.dtype("fp64")),
1604
+ }, is_pure=True, _semantic=_semantic)
1605
+
1606
+
1607
+ @core.extern
1608
+ def ilogb(arg0, _semantic=None):
1609
+ return core.extern_elementwise(
1610
+ "", "", [arg0], {
1611
+ (core.dtype("fp32"), ): ("__nv_ilogbf", core.dtype("int32")),
1612
+ (core.dtype("fp64"), ): ("__nv_ilogb", core.dtype("int32")),
1613
+ }, is_pure=True, _semantic=_semantic)
1614
+
1615
+
1616
+ @core.extern
1617
+ def logb(arg0, _semantic=None):
1618
+ return core.extern_elementwise(
1619
+ "", "", [arg0], {
1620
+ (core.dtype("fp32"), ): ("__nv_logbf", core.dtype("fp32")),
1621
+ (core.dtype("fp64"), ): ("__nv_logb", core.dtype("fp64")),
1622
+ }, is_pure=True, _semantic=_semantic)
1623
+
1624
+
1625
+ @core.extern
1626
+ def isfinited(arg0, _semantic=None):
1627
+ return core.extern_elementwise("", "", [arg0], {
1628
+ (core.dtype("fp64"), ): ("__nv_isfinited", core.dtype("int32")),
1629
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)