triton-windows 3.3.1.post19__cp310-cp310-win_amd64.whl → 3.5.0.post21__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +11 -2
- triton/_filecheck.py +97 -0
- triton/_internal_testing.py +95 -18
- triton/_utils.py +112 -21
- triton/backends/__init__.py +20 -23
- triton/backends/amd/__init__.py +0 -0
- triton/backends/amd/compiler.py +161 -119
- triton/backends/amd/driver.c +118 -46
- triton/backends/amd/driver.py +274 -96
- triton/backends/compiler.py +7 -21
- triton/backends/driver.py +13 -0
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +163 -106
- triton/backends/nvidia/driver.c +166 -101
- triton/backends/nvidia/driver.py +384 -202
- triton/compiler/__init__.py +5 -2
- triton/compiler/code_generator.py +439 -231
- triton/compiler/compiler.py +152 -84
- triton/experimental/__init__.py +0 -0
- triton/experimental/gluon/__init__.py +5 -0
- triton/experimental/gluon/_compiler.py +0 -0
- triton/experimental/gluon/_runtime.py +102 -0
- triton/experimental/gluon/language/__init__.py +119 -0
- triton/experimental/gluon/language/_core.py +490 -0
- triton/experimental/gluon/language/_layouts.py +583 -0
- triton/experimental/gluon/language/_math.py +20 -0
- triton/experimental/gluon/language/_semantic.py +380 -0
- triton/experimental/gluon/language/_standard.py +80 -0
- triton/experimental/gluon/language/amd/__init__.py +4 -0
- triton/experimental/gluon/language/amd/_layouts.py +96 -0
- triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
- triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
- triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
- triton/experimental/gluon/language/extra/__init__.py +3 -0
- triton/experimental/gluon/language/nvidia/__init__.py +4 -0
- triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
- triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
- triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
- triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
- triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
- triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
- triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
- triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
- triton/experimental/gluon/nvidia/__init__.py +4 -0
- triton/experimental/gluon/nvidia/blackwell.py +3 -0
- triton/experimental/gluon/nvidia/hopper.py +45 -0
- triton/knobs.py +546 -0
- triton/language/__init__.py +50 -19
- triton/language/core.py +909 -572
- triton/language/extra/cuda/__init__.py +10 -7
- triton/language/extra/cuda/gdc.py +42 -0
- triton/language/extra/cuda/libdevice.py +394 -394
- triton/language/extra/cuda/utils.py +21 -21
- triton/language/extra/hip/__init__.py +3 -1
- triton/language/extra/hip/libdevice.py +120 -104
- triton/language/extra/hip/utils.py +35 -0
- triton/language/extra/libdevice.py +4 -0
- triton/language/math.py +65 -66
- triton/language/random.py +12 -2
- triton/language/semantic.py +1757 -1768
- triton/language/standard.py +127 -62
- triton/language/target_info.py +54 -0
- triton/runtime/_allocation.py +15 -3
- triton/runtime/_async_compile.py +55 -0
- triton/runtime/autotuner.py +117 -60
- triton/runtime/build.py +83 -17
- triton/runtime/cache.py +61 -47
- triton/runtime/driver.py +25 -47
- triton/runtime/interpreter.py +95 -50
- triton/runtime/jit.py +445 -248
- triton/runtime/tcc/include/_mingw.h +8 -10
- triton/runtime/tcc/include/assert.h +5 -0
- triton/runtime/tcc/include/errno.h +1 -1
- triton/runtime/tcc/include/float.h +21 -3
- triton/runtime/tcc/include/iso646.h +36 -0
- triton/runtime/tcc/include/limits.h +5 -0
- triton/runtime/tcc/include/malloc.h +2 -2
- triton/runtime/tcc/include/math.h +21 -261
- triton/runtime/tcc/include/stdalign.h +16 -0
- triton/runtime/tcc/include/stdarg.h +5 -70
- triton/runtime/tcc/include/stdatomic.h +171 -0
- triton/runtime/tcc/include/stddef.h +7 -19
- triton/runtime/tcc/include/stdlib.h +15 -4
- triton/runtime/tcc/include/stdnoreturn.h +7 -0
- triton/runtime/tcc/include/sys/stat.h +2 -2
- triton/runtime/tcc/include/sys/types.h +5 -0
- triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
- triton/runtime/tcc/include/tccdefs.h +342 -0
- triton/runtime/tcc/include/tgmath.h +89 -0
- triton/runtime/tcc/include/uchar.h +33 -0
- triton/runtime/tcc/include/unistd.h +1 -0
- triton/runtime/tcc/include/winapi/qos.h +72 -0
- triton/runtime/tcc/include/winapi/shellapi.h +59 -0
- triton/runtime/tcc/include/winapi/winbase.h +9 -2
- triton/runtime/tcc/include/winapi/wincon.h +8 -0
- triton/runtime/tcc/include/winapi/windows.h +1 -1
- triton/runtime/tcc/include/winapi/winnls.h +778 -0
- triton/runtime/tcc/include/winapi/winnt.h +9 -7
- triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
- triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
- triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
- triton/runtime/tcc/lib/libtcc1.a +0 -0
- triton/runtime/tcc/lib/python314.def +1800 -0
- triton/runtime/tcc/lib/python314t.def +1809 -0
- triton/runtime/tcc/libtcc.dll +0 -0
- triton/runtime/tcc/tcc.exe +0 -0
- triton/testing.py +16 -12
- triton/tools/compile.py +62 -14
- triton/tools/disasm.py +3 -4
- triton/tools/extra/cuda/compile.c +1 -0
- triton/tools/extra/hip/compile.cpp +66 -0
- triton/tools/extra/hip/compile.h +13 -0
- triton/tools/ragged_tma.py +92 -0
- triton/tools/tensor_descriptor.py +34 -0
- triton/windows_utils.py +52 -81
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +8 -4
- triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
- triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
- triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
- triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- triton/language/_utils.py +0 -21
- triton/language/extra/cuda/_experimental_tma.py +0 -106
- triton/runtime/tcc/lib/libtcc1-64.a +0 -0
- triton/tools/experimental_descriptor.py +0 -32
- triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
- triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
|
@@ -2,474 +2,490 @@ from triton.language import core
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
@core.extern
|
|
5
|
-
def abs(arg0,
|
|
5
|
+
def abs(arg0, _semantic=None):
|
|
6
6
|
return core.extern_elementwise(
|
|
7
7
|
"", "", [arg0], {
|
|
8
8
|
(core.dtype("int32"), ): ("__triton_hip_iabs", core.dtype("int32")),
|
|
9
9
|
(core.dtype("int64"), ): ("__triton_hip_iabs", core.dtype("int64")),
|
|
10
10
|
(core.dtype("fp32"), ): ("__triton_hip_fabs", core.dtype("fp32")),
|
|
11
11
|
(core.dtype("fp64"), ): ("__triton_hip_fabs", core.dtype("fp64")),
|
|
12
|
-
}, is_pure=True,
|
|
12
|
+
}, is_pure=True, _semantic=_semantic)
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@core.extern
|
|
16
|
-
def floor(arg0,
|
|
16
|
+
def floor(arg0, _semantic=None):
|
|
17
17
|
return core.extern_elementwise(
|
|
18
18
|
"", "", [arg0], {
|
|
19
19
|
(core.dtype("fp32"), ): ("__ocml_floor_f32", core.dtype("fp32")),
|
|
20
20
|
(core.dtype("fp64"), ): ("__ocml_floor_f64", core.dtype("fp64")),
|
|
21
|
-
}, is_pure=True,
|
|
21
|
+
}, is_pure=True, _semantic=_semantic)
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@core.extern
|
|
25
|
-
def rsqrt(arg0,
|
|
25
|
+
def rsqrt(arg0, _semantic=None):
|
|
26
26
|
return core.extern_elementwise(
|
|
27
27
|
"", "", [arg0], {
|
|
28
28
|
(core.dtype("fp32"), ): ("__ocml_rsqrt_f32", core.dtype("fp32")),
|
|
29
29
|
(core.dtype("fp64"), ): ("__ocml_rsqrt_f64", core.dtype("fp64")),
|
|
30
|
-
}, is_pure=True,
|
|
30
|
+
}, is_pure=True, _semantic=_semantic)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@core.extern
|
|
34
|
-
def ceil(arg0,
|
|
34
|
+
def ceil(arg0, _semantic=None):
|
|
35
35
|
return core.extern_elementwise(
|
|
36
36
|
"", "", [arg0], {
|
|
37
37
|
(core.dtype("fp32"), ): ("__ocml_ceil_f32", core.dtype("fp32")),
|
|
38
38
|
(core.dtype("fp64"), ): ("__ocml_ceil_f64", core.dtype("fp64")),
|
|
39
|
-
}, is_pure=True,
|
|
39
|
+
}, is_pure=True, _semantic=_semantic)
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
@core.extern
|
|
43
|
-
def trunc(arg0,
|
|
43
|
+
def trunc(arg0, _semantic=None):
|
|
44
44
|
return core.extern_elementwise(
|
|
45
45
|
"", "", [arg0], {
|
|
46
46
|
(core.dtype("fp32"), ): ("__ocml_trunc_f32", core.dtype("fp32")),
|
|
47
47
|
(core.dtype("fp64"), ): ("__ocml_trunc_f64", core.dtype("fp64")),
|
|
48
|
-
}, is_pure=True,
|
|
48
|
+
}, is_pure=True, _semantic=_semantic)
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
@core.extern
|
|
52
|
-
def exp2(arg0,
|
|
52
|
+
def exp2(arg0, _semantic=None):
|
|
53
53
|
return core.extern_elementwise(
|
|
54
54
|
"", "", [arg0], {
|
|
55
55
|
(core.dtype("fp32"), ): ("__ocml_exp2_f32", core.dtype("fp32")),
|
|
56
56
|
(core.dtype("fp64"), ): ("__ocml_exp2_f64", core.dtype("fp64")),
|
|
57
|
-
}, is_pure=True,
|
|
57
|
+
}, is_pure=True, _semantic=_semantic)
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
@core.extern
|
|
61
|
-
def exp(arg0,
|
|
61
|
+
def exp(arg0, _semantic=None):
|
|
62
62
|
return core.extern_elementwise(
|
|
63
63
|
"", "", [arg0], {
|
|
64
64
|
(core.dtype("fp32"), ): ("__ocml_exp_f32", core.dtype("fp32")),
|
|
65
65
|
(core.dtype("fp64"), ): ("__ocml_exp_f64", core.dtype("fp64")),
|
|
66
|
-
}, is_pure=True,
|
|
66
|
+
}, is_pure=True, _semantic=_semantic)
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
@core.extern
|
|
70
|
-
def fast_expf(arg0,
|
|
70
|
+
def fast_expf(arg0, _semantic=None):
|
|
71
71
|
return core.extern_elementwise("", "", [arg0], {
|
|
72
72
|
(core.dtype("fp32"), ): ("__triton_hip_fast_expf", core.dtype("fp32")),
|
|
73
|
-
}, is_pure=True,
|
|
73
|
+
}, is_pure=True, _semantic=_semantic)
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
@core.extern
|
|
77
|
-
def
|
|
77
|
+
def fast_tanhf(arg0, _semantic=None):
|
|
78
|
+
return core.extern_elementwise("", "", [arg0], {
|
|
79
|
+
(core.dtype("fp32"), ): ("__triton_hip_fast_tanhf", core.dtype("fp32")),
|
|
80
|
+
}, is_pure=True, _semantic=_semantic)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@core.extern
|
|
84
|
+
def fast_dividef(arg0, arg1, _semantic=None):
|
|
78
85
|
return core.extern_elementwise("", "", [arg0, arg1], {
|
|
79
86
|
(core.dtype("fp32"), core.dtype("fp32")): ("__triton_hip_fast_fdividef", core.dtype("fp32")),
|
|
80
|
-
}, is_pure=True,
|
|
87
|
+
}, is_pure=True, _semantic=_semantic)
|
|
81
88
|
|
|
82
89
|
|
|
83
90
|
@core.extern
|
|
84
|
-
def sqrt(arg0,
|
|
91
|
+
def sqrt(arg0, _semantic=None):
|
|
85
92
|
return core.extern_elementwise(
|
|
86
93
|
"", "", [arg0], {
|
|
87
94
|
(core.dtype("fp32"), ): ("__ocml_sqrt_f32", core.dtype("fp32")),
|
|
88
95
|
(core.dtype("fp64"), ): ("__ocml_sqrt_f64", core.dtype("fp64")),
|
|
89
|
-
}, is_pure=True,
|
|
96
|
+
}, is_pure=True, _semantic=_semantic)
|
|
90
97
|
|
|
91
98
|
|
|
92
99
|
@core.extern
|
|
93
|
-
def llrint(arg0,
|
|
100
|
+
def llrint(arg0, _semantic=None):
|
|
94
101
|
return core.extern_elementwise(
|
|
95
102
|
"", "", [arg0], {
|
|
96
103
|
(core.dtype("fp32"), ): ("__triton_hip_llrint", core.dtype("int64")),
|
|
97
104
|
(core.dtype("fp64"), ): ("__triton_hip_llrint", core.dtype("int64")),
|
|
98
|
-
}, is_pure=True,
|
|
105
|
+
}, is_pure=True, _semantic=_semantic)
|
|
99
106
|
|
|
100
107
|
|
|
101
108
|
@core.extern
|
|
102
|
-
def nearbyint(arg0,
|
|
109
|
+
def nearbyint(arg0, _semantic=None):
|
|
103
110
|
return core.extern_elementwise(
|
|
104
111
|
"", "", [
|
|
105
112
|
arg0,
|
|
106
113
|
], {
|
|
107
114
|
(core.dtype("fp32"), ): ("__ocml_nearbyint_f32", core.dtype("fp32")),
|
|
108
115
|
(core.dtype("fp64"), ): ("__ocml_nearbyint_f64", core.dtype("fp64")),
|
|
109
|
-
}, is_pure=True,
|
|
116
|
+
}, is_pure=True, _semantic=_semantic)
|
|
110
117
|
|
|
111
118
|
|
|
112
119
|
@core.extern
|
|
113
|
-
def isnan(arg0,
|
|
120
|
+
def isnan(arg0, _semantic=None):
|
|
114
121
|
return core.extern_elementwise(
|
|
115
122
|
"", "", [
|
|
116
123
|
arg0,
|
|
117
124
|
], {
|
|
118
125
|
(core.dtype("fp32"), ): ("__ocml_isnan_f32", core.dtype("int32")),
|
|
119
126
|
(core.dtype("fp64"), ): ("__ocml_isnan_f64", core.dtype("int32")),
|
|
120
|
-
}, is_pure=True,
|
|
127
|
+
}, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
|
|
121
128
|
|
|
122
129
|
|
|
123
130
|
@core.extern
|
|
124
|
-
def signbit(arg0,
|
|
131
|
+
def signbit(arg0, _semantic=None):
|
|
125
132
|
return core.extern_elementwise(
|
|
126
133
|
"", "", [
|
|
127
134
|
arg0,
|
|
128
135
|
], {
|
|
129
136
|
(core.dtype("fp32"), ): ("__ocml_signbit_f32", core.dtype("int32")),
|
|
130
137
|
(core.dtype("fp64"), ): ("__ocml_signbit_f64", core.dtype("int32")),
|
|
131
|
-
}, is_pure=True,
|
|
138
|
+
}, is_pure=True, _semantic=_semantic)
|
|
132
139
|
|
|
133
140
|
|
|
134
141
|
@core.extern
|
|
135
|
-
def copysign(arg0, arg1,
|
|
142
|
+
def copysign(arg0, arg1, _semantic=None):
|
|
136
143
|
return core.extern_elementwise(
|
|
137
144
|
"", "", [arg0, arg1], {
|
|
138
145
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_copysign_f32", core.dtype("fp32")),
|
|
139
146
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_copysign_f64", core.dtype("fp64")),
|
|
140
|
-
}, is_pure=True,
|
|
147
|
+
}, is_pure=True, _semantic=_semantic)
|
|
141
148
|
|
|
142
149
|
|
|
143
150
|
@core.extern
|
|
144
|
-
def isinf(arg0,
|
|
151
|
+
def isinf(arg0, _semantic=None):
|
|
145
152
|
return core.extern_elementwise(
|
|
146
153
|
"", "", [arg0], {
|
|
147
154
|
(core.dtype("fp32"), ): ("__ocml_isinf_f32", core.dtype("int32")),
|
|
148
155
|
(core.dtype("fp64"), ): ("__ocml_isinf_f64", core.dtype("int32")),
|
|
149
|
-
}, is_pure=True,
|
|
156
|
+
}, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
|
|
150
157
|
|
|
151
158
|
|
|
152
159
|
@core.extern
|
|
153
|
-
def nextafter(arg0, arg1,
|
|
160
|
+
def nextafter(arg0, arg1, _semantic=None):
|
|
154
161
|
return core.extern_elementwise(
|
|
155
162
|
"", "", [arg0, arg1], {
|
|
156
163
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_nextafter_f32", core.dtype("fp32")),
|
|
157
164
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_nextafter_f64", core.dtype("fp64")),
|
|
158
|
-
}, is_pure=True,
|
|
165
|
+
}, is_pure=True, _semantic=_semantic)
|
|
159
166
|
|
|
160
167
|
|
|
161
168
|
@core.extern
|
|
162
|
-
def sin(arg0,
|
|
169
|
+
def sin(arg0, _semantic=None):
|
|
163
170
|
return core.extern_elementwise(
|
|
164
171
|
"", "", [arg0], {
|
|
165
172
|
(core.dtype("fp32"), ): ("__ocml_sin_f32", core.dtype("fp32")),
|
|
166
173
|
(core.dtype("fp64"), ): ("__ocml_sin_f64", core.dtype("fp64")),
|
|
167
|
-
}, is_pure=True,
|
|
174
|
+
}, is_pure=True, _semantic=_semantic)
|
|
168
175
|
|
|
169
176
|
|
|
170
177
|
@core.extern
|
|
171
|
-
def cos(arg0,
|
|
178
|
+
def cos(arg0, _semantic=None):
|
|
172
179
|
return core.extern_elementwise(
|
|
173
180
|
"", "", [arg0], {
|
|
174
181
|
(core.dtype("fp32"), ): ("__ocml_cos_f32", core.dtype("fp32")),
|
|
175
182
|
(core.dtype("fp64"), ): ("__ocml_cos_f64", core.dtype("fp64")),
|
|
176
|
-
}, is_pure=True,
|
|
183
|
+
}, is_pure=True, _semantic=_semantic)
|
|
177
184
|
|
|
178
185
|
|
|
179
186
|
@core.extern
|
|
180
|
-
def tan(arg0,
|
|
187
|
+
def tan(arg0, _semantic=None):
|
|
181
188
|
return core.extern_elementwise(
|
|
182
189
|
"", "", [arg0], {
|
|
183
190
|
(core.dtype("fp32"), ): ("__ocml_tan_f32", core.dtype("fp32")),
|
|
184
191
|
(core.dtype("fp64"), ): ("__ocml_tan_f64", core.dtype("fp64")),
|
|
185
|
-
}, is_pure=True,
|
|
192
|
+
}, is_pure=True, _semantic=_semantic)
|
|
186
193
|
|
|
187
194
|
|
|
188
195
|
@core.extern
|
|
189
|
-
def log2(arg0,
|
|
196
|
+
def log2(arg0, _semantic=None):
|
|
190
197
|
return core.extern_elementwise(
|
|
191
198
|
"", "", [arg0], {
|
|
192
199
|
(core.dtype("fp32"), ): ("__ocml_log2_f32", core.dtype("fp32")),
|
|
193
200
|
(core.dtype("fp64"), ): ("__ocml_log2_f64", core.dtype("fp64")),
|
|
194
|
-
}, is_pure=True,
|
|
201
|
+
}, is_pure=True, _semantic=_semantic)
|
|
195
202
|
|
|
196
203
|
|
|
197
204
|
@core.extern
|
|
198
|
-
def cosh(arg0,
|
|
205
|
+
def cosh(arg0, _semantic=None):
|
|
199
206
|
return core.extern_elementwise(
|
|
200
207
|
"", "", [arg0], {
|
|
201
208
|
(core.dtype("fp32"), ): ("__ocml_cosh_f32", core.dtype("fp32")),
|
|
202
209
|
(core.dtype("fp64"), ): ("__ocml_cosh_f64", core.dtype("fp64")),
|
|
203
|
-
}, is_pure=True,
|
|
210
|
+
}, is_pure=True, _semantic=_semantic)
|
|
204
211
|
|
|
205
212
|
|
|
206
213
|
@core.extern
|
|
207
|
-
def sinh(arg0,
|
|
214
|
+
def sinh(arg0, _semantic=None):
|
|
208
215
|
return core.extern_elementwise(
|
|
209
216
|
"", "", [arg0], {
|
|
210
217
|
(core.dtype("fp32"), ): ("__ocml_sinh_f32", core.dtype("fp32")),
|
|
211
218
|
(core.dtype("fp64"), ): ("__ocml_sinh_f64", core.dtype("fp64")),
|
|
212
|
-
}, is_pure=True,
|
|
219
|
+
}, is_pure=True, _semantic=_semantic)
|
|
213
220
|
|
|
214
221
|
|
|
215
222
|
@core.extern
|
|
216
|
-
def tanh(arg0,
|
|
223
|
+
def tanh(arg0, _semantic=None):
|
|
217
224
|
return core.extern_elementwise(
|
|
218
225
|
"", "", [arg0], {
|
|
219
226
|
(core.dtype("fp32"), ): ("__ocml_tanh_f32", core.dtype("fp32")),
|
|
220
227
|
(core.dtype("fp64"), ): ("__ocml_tanh_f64", core.dtype("fp64")),
|
|
221
|
-
}, is_pure=True,
|
|
228
|
+
}, is_pure=True, _semantic=_semantic)
|
|
222
229
|
|
|
223
230
|
|
|
224
231
|
@core.extern
|
|
225
|
-
def atan2(arg0, arg1,
|
|
232
|
+
def atan2(arg0, arg1, _semantic=None):
|
|
226
233
|
return core.extern_elementwise(
|
|
227
234
|
"", "", [arg0, arg1], {
|
|
228
235
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_atan2_f32", core.dtype("fp32")),
|
|
229
236
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_atan2_f64", core.dtype("fp64")),
|
|
230
|
-
}, is_pure=True,
|
|
237
|
+
}, is_pure=True, _semantic=_semantic)
|
|
231
238
|
|
|
232
239
|
|
|
233
240
|
@core.extern
|
|
234
|
-
def atan(arg0,
|
|
241
|
+
def atan(arg0, _semantic=None):
|
|
235
242
|
return core.extern_elementwise(
|
|
236
243
|
"", "", [arg0], {
|
|
237
244
|
(core.dtype("fp32"), ): ("__ocml_atan_f32", core.dtype("fp32")),
|
|
238
245
|
(core.dtype("fp64"), ): ("__ocml_atan_f64", core.dtype("fp64")),
|
|
239
|
-
}, is_pure=True,
|
|
246
|
+
}, is_pure=True, _semantic=_semantic)
|
|
240
247
|
|
|
241
248
|
|
|
242
249
|
@core.extern
|
|
243
|
-
def asin(arg0,
|
|
250
|
+
def asin(arg0, _semantic=None):
|
|
244
251
|
return core.extern_elementwise(
|
|
245
252
|
"", "", [arg0], {
|
|
246
253
|
(core.dtype("fp32"), ): ("__ocml_asin_f32", core.dtype("fp32")),
|
|
247
254
|
(core.dtype("fp64"), ): ("__ocml_asin_f64", core.dtype("fp64")),
|
|
248
|
-
}, is_pure=True,
|
|
255
|
+
}, is_pure=True, _semantic=_semantic)
|
|
249
256
|
|
|
250
257
|
|
|
251
258
|
@core.extern
|
|
252
|
-
def acos(arg0,
|
|
259
|
+
def acos(arg0, _semantic=None):
|
|
253
260
|
return core.extern_elementwise(
|
|
254
261
|
"", "", [arg0], {
|
|
255
262
|
(core.dtype("fp32"), ): ("__ocml_acos_f32", core.dtype("fp32")),
|
|
256
263
|
(core.dtype("fp64"), ): ("__ocml_acos_f64", core.dtype("fp64")),
|
|
257
|
-
}, is_pure=True,
|
|
264
|
+
}, is_pure=True, _semantic=_semantic)
|
|
258
265
|
|
|
259
266
|
|
|
260
267
|
@core.extern
|
|
261
|
-
def log(arg0,
|
|
268
|
+
def log(arg0, _semantic=None):
|
|
262
269
|
return core.extern_elementwise(
|
|
263
270
|
"", "", [arg0], {
|
|
264
271
|
(core.dtype("fp32"), ): ("__ocml_log_f32", core.dtype("fp32")),
|
|
265
272
|
(core.dtype("fp64"), ): ("__ocml_log_f64", core.dtype("fp64")),
|
|
266
|
-
}, is_pure=True,
|
|
273
|
+
}, is_pure=True, _semantic=_semantic)
|
|
267
274
|
|
|
268
275
|
|
|
269
276
|
@core.extern
|
|
270
|
-
def log10(arg0,
|
|
277
|
+
def log10(arg0, _semantic=None):
|
|
271
278
|
return core.extern_elementwise(
|
|
272
279
|
"", "", [arg0], {
|
|
273
280
|
(core.dtype("fp32"), ): ("__ocml_log10_f32", core.dtype("fp32")),
|
|
274
281
|
(core.dtype("fp64"), ): ("__ocml_log10_f64", core.dtype("fp64")),
|
|
275
|
-
}, is_pure=True,
|
|
282
|
+
}, is_pure=True, _semantic=_semantic)
|
|
276
283
|
|
|
277
284
|
|
|
278
285
|
@core.extern
|
|
279
|
-
def log1p(arg0,
|
|
286
|
+
def log1p(arg0, _semantic=None):
|
|
280
287
|
return core.extern_elementwise(
|
|
281
288
|
"", "", [arg0], {
|
|
282
289
|
(core.dtype("fp32"), ): ("__ocml_log1p_f32", core.dtype("fp32")),
|
|
283
290
|
(core.dtype("fp64"), ): ("__ocml_log1p_f64", core.dtype("fp64")),
|
|
284
|
-
}, is_pure=True,
|
|
291
|
+
}, is_pure=True, _semantic=_semantic)
|
|
285
292
|
|
|
286
293
|
|
|
287
294
|
@core.extern
|
|
288
|
-
def acosh(arg0,
|
|
295
|
+
def acosh(arg0, _semantic=None):
|
|
289
296
|
return core.extern_elementwise(
|
|
290
297
|
"", "", [arg0], {
|
|
291
298
|
(core.dtype("fp32"), ): ("__ocml_acosh_f32", core.dtype("fp32")),
|
|
292
299
|
(core.dtype("fp64"), ): ("__ocml_acosh_f64", core.dtype("fp64")),
|
|
293
|
-
}, is_pure=True,
|
|
300
|
+
}, is_pure=True, _semantic=_semantic)
|
|
294
301
|
|
|
295
302
|
|
|
296
303
|
@core.extern
|
|
297
|
-
def asinh(arg0,
|
|
304
|
+
def asinh(arg0, _semantic=None):
|
|
298
305
|
return core.extern_elementwise(
|
|
299
306
|
"", "", [arg0], {
|
|
300
307
|
(core.dtype("fp32"), ): ("__ocml_asinh_f32", core.dtype("fp32")),
|
|
301
308
|
(core.dtype("fp64"), ): ("__ocml_asinh_f64", core.dtype("fp64")),
|
|
302
|
-
}, is_pure=True,
|
|
309
|
+
}, is_pure=True, _semantic=_semantic)
|
|
303
310
|
|
|
304
311
|
|
|
305
312
|
@core.extern
|
|
306
|
-
def atanh(arg0,
|
|
313
|
+
def atanh(arg0, _semantic=None):
|
|
307
314
|
return core.extern_elementwise(
|
|
308
315
|
"", "", [arg0], {
|
|
309
316
|
(core.dtype("fp32"), ): ("__ocml_atanh_f32", core.dtype("fp32")),
|
|
310
317
|
(core.dtype("fp64"), ): ("__ocml_atanh_f64", core.dtype("fp64")),
|
|
311
|
-
}, is_pure=True,
|
|
318
|
+
}, is_pure=True, _semantic=_semantic)
|
|
312
319
|
|
|
313
320
|
|
|
314
321
|
@core.extern
|
|
315
|
-
def expm1(arg0,
|
|
322
|
+
def expm1(arg0, _semantic=None):
|
|
316
323
|
return core.extern_elementwise(
|
|
317
324
|
"", "", [arg0], {
|
|
318
325
|
(core.dtype("fp32"), ): ("__ocml_expm1_f32", core.dtype("fp32")),
|
|
319
326
|
(core.dtype("fp64"), ): ("__ocml_expm1_f64", core.dtype("fp64")),
|
|
320
|
-
}, is_pure=True,
|
|
327
|
+
}, is_pure=True, _semantic=_semantic)
|
|
321
328
|
|
|
322
329
|
|
|
323
330
|
@core.extern
|
|
324
|
-
def hypot(arg0, arg1,
|
|
331
|
+
def hypot(arg0, arg1, _semantic=None):
|
|
325
332
|
return core.extern_elementwise(
|
|
326
333
|
"", "", [arg0, arg1], {
|
|
327
334
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_hypot_f32", core.dtype("fp32")),
|
|
328
335
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_hypot_f64", core.dtype("fp64")),
|
|
329
|
-
}, is_pure=True,
|
|
336
|
+
}, is_pure=True, _semantic=_semantic)
|
|
330
337
|
|
|
331
338
|
|
|
332
339
|
@core.extern
|
|
333
|
-
def j0(arg0,
|
|
340
|
+
def j0(arg0, _semantic=None):
|
|
334
341
|
return core.extern_elementwise(
|
|
335
342
|
"", "", [arg0], {
|
|
336
343
|
(core.dtype("fp32"), ): ("__ocml_j0_f32", core.dtype("fp32")),
|
|
337
344
|
(core.dtype("fp64"), ): ("__ocml_j0_f64", core.dtype("fp64")),
|
|
338
|
-
}, is_pure=True,
|
|
345
|
+
}, is_pure=True, _semantic=_semantic)
|
|
339
346
|
|
|
340
347
|
|
|
341
348
|
@core.extern
|
|
342
|
-
def j1(arg0,
|
|
349
|
+
def j1(arg0, _semantic=None):
|
|
343
350
|
return core.extern_elementwise(
|
|
344
351
|
"", "", [arg0], {
|
|
345
352
|
(core.dtype("fp32"), ): ("__ocml_j1_f32", core.dtype("fp32")),
|
|
346
353
|
(core.dtype("fp64"), ): ("__ocml_j1_f64", core.dtype("fp64")),
|
|
347
|
-
}, is_pure=True,
|
|
354
|
+
}, is_pure=True, _semantic=_semantic)
|
|
348
355
|
|
|
349
356
|
|
|
350
357
|
@core.extern
|
|
351
|
-
def y0(arg0,
|
|
358
|
+
def y0(arg0, _semantic=None):
|
|
352
359
|
return core.extern_elementwise(
|
|
353
360
|
"", "", [arg0], {
|
|
354
361
|
(core.dtype("fp32"), ): ("__ocml_y0_f32", core.dtype("fp32")),
|
|
355
362
|
(core.dtype("fp64"), ): ("__ocml_y0_f64", core.dtype("fp64")),
|
|
356
|
-
}, is_pure=True,
|
|
363
|
+
}, is_pure=True, _semantic=_semantic)
|
|
357
364
|
|
|
358
365
|
|
|
359
366
|
@core.extern
|
|
360
|
-
def y1(arg0,
|
|
367
|
+
def y1(arg0, _semantic=None):
|
|
361
368
|
return core.extern_elementwise(
|
|
362
369
|
"", "", [arg0], {
|
|
363
370
|
(core.dtype("fp32"), ): ("__ocml_y1_f32", core.dtype("fp32")),
|
|
364
371
|
(core.dtype("fp64"), ): ("__ocml_y1_f64", core.dtype("fp64")),
|
|
365
|
-
}, is_pure=True,
|
|
372
|
+
}, is_pure=True, _semantic=_semantic)
|
|
366
373
|
|
|
367
374
|
|
|
368
375
|
@core.extern
|
|
369
|
-
def cyl_bessel_i0(arg0,
|
|
376
|
+
def cyl_bessel_i0(arg0, _semantic=None):
|
|
370
377
|
return core.extern_elementwise(
|
|
371
378
|
"", "", [arg0], {
|
|
372
379
|
(core.dtype("fp32"), ): ("__ocml_i0_f32", core.dtype("fp32")),
|
|
373
380
|
(core.dtype("fp64"), ): ("__ocml_i0_f64", core.dtype("fp64")),
|
|
374
|
-
}, is_pure=True,
|
|
381
|
+
}, is_pure=True, _semantic=_semantic)
|
|
375
382
|
|
|
376
383
|
|
|
377
384
|
@core.extern
|
|
378
|
-
def cyl_bessel_i1(arg0,
|
|
385
|
+
def cyl_bessel_i1(arg0, _semantic=None):
|
|
379
386
|
return core.extern_elementwise(
|
|
380
387
|
"", "", [arg0], {
|
|
381
388
|
(core.dtype("fp32"), ): ("__ocml_i1_f32", core.dtype("fp32")),
|
|
382
389
|
(core.dtype("fp64"), ): ("__ocml_i1_f64", core.dtype("fp64")),
|
|
383
|
-
}, is_pure=True,
|
|
390
|
+
}, is_pure=True, _semantic=_semantic)
|
|
384
391
|
|
|
385
392
|
|
|
386
393
|
@core.extern
|
|
387
|
-
def erf(arg0,
|
|
394
|
+
def erf(arg0, _semantic=None):
|
|
388
395
|
return core.extern_elementwise(
|
|
389
396
|
"", "", [arg0], {
|
|
390
397
|
(core.dtype("fp32"), ): ("__ocml_erf_f32", core.dtype("fp32")),
|
|
391
398
|
(core.dtype("fp64"), ): ("__ocml_erf_f64", core.dtype("fp64")),
|
|
392
|
-
}, is_pure=True,
|
|
399
|
+
}, is_pure=True, _semantic=_semantic)
|
|
393
400
|
|
|
394
401
|
|
|
395
402
|
@core.extern
|
|
396
|
-
def erfinv(arg0,
|
|
403
|
+
def erfinv(arg0, _semantic=None):
|
|
397
404
|
return core.extern_elementwise(
|
|
398
405
|
"", "", [arg0], {
|
|
399
406
|
(core.dtype("fp32"), ): ("__ocml_erfinv_f32", core.dtype("fp32")),
|
|
400
407
|
(core.dtype("fp64"), ): ("__ocml_erfinv_f64", core.dtype("fp64")),
|
|
401
|
-
}, is_pure=True,
|
|
408
|
+
}, is_pure=True, _semantic=_semantic)
|
|
402
409
|
|
|
403
410
|
|
|
404
411
|
@core.extern
|
|
405
|
-
def erfc(arg0,
|
|
412
|
+
def erfc(arg0, _semantic=None):
|
|
406
413
|
return core.extern_elementwise(
|
|
407
414
|
"", "", [arg0], {
|
|
408
415
|
(core.dtype("fp32"), ): ("__ocml_erfc_f32", core.dtype("fp32")),
|
|
409
416
|
(core.dtype("fp64"), ): ("__ocml_erfc_f64", core.dtype("fp64")),
|
|
410
|
-
}, is_pure=True,
|
|
417
|
+
}, is_pure=True, _semantic=_semantic)
|
|
411
418
|
|
|
412
419
|
|
|
413
420
|
@core.extern
|
|
414
|
-
def erfcx(arg0,
|
|
421
|
+
def erfcx(arg0, _semantic=None):
|
|
415
422
|
return core.extern_elementwise(
|
|
416
423
|
"", "", [arg0], {
|
|
417
424
|
(core.dtype("fp32"), ): ("__ocml_erfcx_f32", core.dtype("fp32")),
|
|
418
425
|
(core.dtype("fp64"), ): ("__ocml_erfcx_f64", core.dtype("fp64")),
|
|
419
|
-
}, is_pure=True,
|
|
426
|
+
}, is_pure=True, _semantic=_semantic)
|
|
420
427
|
|
|
421
428
|
|
|
422
429
|
@core.extern
|
|
423
|
-
def lgamma(arg0,
|
|
430
|
+
def lgamma(arg0, _semantic=None):
|
|
424
431
|
return core.extern_elementwise(
|
|
425
432
|
"", "", [arg0], {
|
|
426
433
|
(core.dtype("fp32"), ): ("__ocml_lgamma_f32", core.dtype("fp32")),
|
|
427
434
|
(core.dtype("fp64"), ): ("__ocml_lgamma_f64", core.dtype("fp64")),
|
|
428
|
-
}, is_pure=True,
|
|
435
|
+
}, is_pure=True, _semantic=_semantic)
|
|
429
436
|
|
|
430
437
|
|
|
431
438
|
@core.extern
|
|
432
|
-
def ldexp(arg0, arg1,
|
|
439
|
+
def ldexp(arg0, arg1, _semantic=None):
|
|
433
440
|
return core.extern_elementwise(
|
|
434
441
|
"", "", [arg0, arg1], {
|
|
435
442
|
(core.dtype("fp32"), core.dtype("int32")): ("__ocml_ldexp_f32", core.dtype("fp32")),
|
|
436
443
|
(core.dtype("fp64"), core.dtype("int32")): ("__ocml_ldexp_f64", core.dtype("fp64")),
|
|
437
|
-
}, is_pure=True,
|
|
444
|
+
}, is_pure=True, _semantic=_semantic)
|
|
438
445
|
|
|
439
446
|
|
|
440
447
|
@core.extern
|
|
441
|
-
def fmod(arg0, arg1,
|
|
448
|
+
def fmod(arg0, arg1, _semantic=None):
|
|
442
449
|
return core.extern_elementwise(
|
|
443
450
|
"", "", [arg0, arg1], {
|
|
444
451
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fmod_f32", core.dtype("fp32")),
|
|
445
452
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fmod_f64", core.dtype("fp64")),
|
|
446
|
-
}, is_pure=True,
|
|
453
|
+
}, is_pure=True, _semantic=_semantic)
|
|
447
454
|
|
|
448
455
|
|
|
449
456
|
@core.extern
|
|
450
|
-
def fma(arg0, arg1, arg2,
|
|
457
|
+
def fma(arg0, arg1, arg2, _semantic=None):
|
|
451
458
|
return core.extern_elementwise(
|
|
452
459
|
"", "", [arg0, arg1, arg2], {
|
|
453
460
|
(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fma_f32", core.dtype("fp32")),
|
|
454
461
|
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fma_f64", core.dtype("fp64")),
|
|
455
|
-
}, is_pure=True,
|
|
462
|
+
}, is_pure=True, _semantic=_semantic)
|
|
456
463
|
|
|
457
464
|
|
|
458
465
|
@core.extern
|
|
459
|
-
def pow(arg0, arg1,
|
|
466
|
+
def pow(arg0, arg1, _semantic=None):
|
|
460
467
|
return core.extern_elementwise(
|
|
461
468
|
"", "", [arg0, arg1], {
|
|
462
469
|
(core.dtype("fp32"), core.dtype("int32")): ("__ocml_pown_f32", core.dtype("fp32")),
|
|
463
470
|
(core.dtype("fp64"), core.dtype("int32")): ("__ocml_pown_f64", core.dtype("fp64")),
|
|
464
471
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_pow_f32", core.dtype("fp32")),
|
|
465
472
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_pow_f64", core.dtype("fp64")),
|
|
466
|
-
}, is_pure=True,
|
|
473
|
+
}, is_pure=True, _semantic=_semantic)
|
|
467
474
|
|
|
468
475
|
|
|
469
476
|
@core.extern
|
|
470
|
-
def ilogb(arg0,
|
|
477
|
+
def ilogb(arg0, _semantic=None):
|
|
471
478
|
return core.extern_elementwise(
|
|
472
479
|
"", "", [arg0], {
|
|
473
480
|
(core.dtype("fp32"), ): ("__ocml_ilogb_f32", core.dtype("int32")),
|
|
474
481
|
(core.dtype("fp64"), ): ("__ocml_ilogb_f64", core.dtype("int32")),
|
|
475
|
-
}, is_pure=True,
|
|
482
|
+
}, is_pure=True, _semantic=_semantic)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@core.extern
|
|
486
|
+
def round(arg0, _semantic=None):
|
|
487
|
+
return core.extern_elementwise(
|
|
488
|
+
"", "", [arg0], {
|
|
489
|
+
(core.dtype("fp32"), ): ("__ocml_round_f32", core.dtype("fp32")),
|
|
490
|
+
(core.dtype("fp64"), ): ("__ocml_round_f64", core.dtype("fp64")),
|
|
491
|
+
}, is_pure=True, _semantic=_semantic)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from triton.language import core
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@core.extern
|
|
5
|
+
def memrealtime(_semantic=None):
|
|
6
|
+
"""
|
|
7
|
+
Returns a 64-bit real time-counter value
|
|
8
|
+
"""
|
|
9
|
+
target_arch = _semantic.builder.options.arch
|
|
10
|
+
if 'gfx11' in target_arch or 'gfx12' in target_arch:
|
|
11
|
+
return core.inline_asm_elementwise(
|
|
12
|
+
"""
|
|
13
|
+
s_sendmsg_rtn_b64 $0, sendmsg(MSG_RTN_GET_REALTIME)
|
|
14
|
+
s_waitcnt lgkmcnt(0)
|
|
15
|
+
""",
|
|
16
|
+
"=r",
|
|
17
|
+
[],
|
|
18
|
+
dtype=core.int64,
|
|
19
|
+
is_pure=False,
|
|
20
|
+
pack=1,
|
|
21
|
+
_semantic=_semantic,
|
|
22
|
+
)
|
|
23
|
+
else:
|
|
24
|
+
return core.inline_asm_elementwise(
|
|
25
|
+
"""
|
|
26
|
+
s_memrealtime $0
|
|
27
|
+
s_waitcnt vmcnt(0)
|
|
28
|
+
""",
|
|
29
|
+
"=r",
|
|
30
|
+
[],
|
|
31
|
+
dtype=core.int64,
|
|
32
|
+
is_pure=False,
|
|
33
|
+
pack=1,
|
|
34
|
+
_semantic=_semantic,
|
|
35
|
+
)
|