triton-windows 3.3.1.post19__cp313-cp313-win_amd64.whl → 3.4.0.post20__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +4 -1
- triton/_filecheck.py +87 -0
- triton/_internal_testing.py +26 -15
- triton/_utils.py +110 -21
- triton/backends/__init__.py +20 -23
- triton/backends/amd/__init__.py +0 -0
- triton/backends/amd/compiler.py +112 -78
- triton/backends/amd/driver.c +5 -2
- triton/backends/amd/driver.py +149 -47
- triton/backends/compiler.py +7 -21
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +92 -93
- triton/backends/nvidia/driver.c +90 -98
- triton/backends/nvidia/driver.py +303 -128
- triton/compiler/code_generator.py +212 -111
- triton/compiler/compiler.py +110 -25
- triton/experimental/__init__.py +0 -0
- triton/experimental/gluon/__init__.py +4 -0
- triton/experimental/gluon/_compiler.py +0 -0
- triton/experimental/gluon/_runtime.py +99 -0
- triton/experimental/gluon/language/__init__.py +18 -0
- triton/experimental/gluon/language/_core.py +312 -0
- triton/experimental/gluon/language/_layouts.py +230 -0
- triton/experimental/gluon/language/_math.py +12 -0
- triton/experimental/gluon/language/_semantic.py +287 -0
- triton/experimental/gluon/language/_standard.py +47 -0
- triton/experimental/gluon/language/nvidia/__init__.py +4 -0
- triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
- triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
- triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
- triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
- triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
- triton/experimental/gluon/nvidia/__init__.py +4 -0
- triton/experimental/gluon/nvidia/blackwell.py +3 -0
- triton/experimental/gluon/nvidia/hopper.py +40 -0
- triton/knobs.py +481 -0
- triton/language/__init__.py +39 -14
- triton/language/core.py +794 -537
- triton/language/extra/cuda/__init__.py +10 -7
- triton/language/extra/cuda/gdc.py +42 -0
- triton/language/extra/cuda/libdevice.py +394 -394
- triton/language/extra/cuda/utils.py +21 -21
- triton/language/extra/hip/libdevice.py +113 -104
- triton/language/math.py +65 -66
- triton/language/random.py +12 -2
- triton/language/semantic.py +1706 -1770
- triton/language/standard.py +116 -51
- triton/runtime/autotuner.py +117 -59
- triton/runtime/build.py +76 -12
- triton/runtime/cache.py +18 -47
- triton/runtime/driver.py +32 -29
- triton/runtime/interpreter.py +72 -35
- triton/runtime/jit.py +146 -110
- triton/testing.py +16 -12
- triton/tools/disasm.py +3 -4
- triton/tools/tensor_descriptor.py +36 -0
- triton/windows_utils.py +14 -6
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
- triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
- triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
- triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
- triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- triton/language/_utils.py +0 -21
- triton/language/extra/cuda/_experimental_tma.py +0 -106
- triton/tools/experimental_descriptor.py +0 -32
- triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
- triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +0 -0
|
@@ -2,474 +2,483 @@ from triton.language import core
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
@core.extern
|
|
5
|
-
def abs(arg0,
|
|
5
|
+
def abs(arg0, _semantic=None):
|
|
6
6
|
return core.extern_elementwise(
|
|
7
7
|
"", "", [arg0], {
|
|
8
8
|
(core.dtype("int32"), ): ("__triton_hip_iabs", core.dtype("int32")),
|
|
9
9
|
(core.dtype("int64"), ): ("__triton_hip_iabs", core.dtype("int64")),
|
|
10
10
|
(core.dtype("fp32"), ): ("__triton_hip_fabs", core.dtype("fp32")),
|
|
11
11
|
(core.dtype("fp64"), ): ("__triton_hip_fabs", core.dtype("fp64")),
|
|
12
|
-
}, is_pure=True,
|
|
12
|
+
}, is_pure=True, _semantic=_semantic)
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@core.extern
|
|
16
|
-
def floor(arg0,
|
|
16
|
+
def floor(arg0, _semantic=None):
|
|
17
17
|
return core.extern_elementwise(
|
|
18
18
|
"", "", [arg0], {
|
|
19
19
|
(core.dtype("fp32"), ): ("__ocml_floor_f32", core.dtype("fp32")),
|
|
20
20
|
(core.dtype("fp64"), ): ("__ocml_floor_f64", core.dtype("fp64")),
|
|
21
|
-
}, is_pure=True,
|
|
21
|
+
}, is_pure=True, _semantic=_semantic)
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@core.extern
|
|
25
|
-
def rsqrt(arg0,
|
|
25
|
+
def rsqrt(arg0, _semantic=None):
|
|
26
26
|
return core.extern_elementwise(
|
|
27
27
|
"", "", [arg0], {
|
|
28
28
|
(core.dtype("fp32"), ): ("__ocml_rsqrt_f32", core.dtype("fp32")),
|
|
29
29
|
(core.dtype("fp64"), ): ("__ocml_rsqrt_f64", core.dtype("fp64")),
|
|
30
|
-
}, is_pure=True,
|
|
30
|
+
}, is_pure=True, _semantic=_semantic)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@core.extern
|
|
34
|
-
def ceil(arg0,
|
|
34
|
+
def ceil(arg0, _semantic=None):
|
|
35
35
|
return core.extern_elementwise(
|
|
36
36
|
"", "", [arg0], {
|
|
37
37
|
(core.dtype("fp32"), ): ("__ocml_ceil_f32", core.dtype("fp32")),
|
|
38
38
|
(core.dtype("fp64"), ): ("__ocml_ceil_f64", core.dtype("fp64")),
|
|
39
|
-
}, is_pure=True,
|
|
39
|
+
}, is_pure=True, _semantic=_semantic)
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
@core.extern
|
|
43
|
-
def trunc(arg0,
|
|
43
|
+
def trunc(arg0, _semantic=None):
|
|
44
44
|
return core.extern_elementwise(
|
|
45
45
|
"", "", [arg0], {
|
|
46
46
|
(core.dtype("fp32"), ): ("__ocml_trunc_f32", core.dtype("fp32")),
|
|
47
47
|
(core.dtype("fp64"), ): ("__ocml_trunc_f64", core.dtype("fp64")),
|
|
48
|
-
}, is_pure=True,
|
|
48
|
+
}, is_pure=True, _semantic=_semantic)
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
@core.extern
|
|
52
|
-
def exp2(arg0,
|
|
52
|
+
def exp2(arg0, _semantic=None):
|
|
53
53
|
return core.extern_elementwise(
|
|
54
54
|
"", "", [arg0], {
|
|
55
55
|
(core.dtype("fp32"), ): ("__ocml_exp2_f32", core.dtype("fp32")),
|
|
56
56
|
(core.dtype("fp64"), ): ("__ocml_exp2_f64", core.dtype("fp64")),
|
|
57
|
-
}, is_pure=True,
|
|
57
|
+
}, is_pure=True, _semantic=_semantic)
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
@core.extern
|
|
61
|
-
def exp(arg0,
|
|
61
|
+
def exp(arg0, _semantic=None):
|
|
62
62
|
return core.extern_elementwise(
|
|
63
63
|
"", "", [arg0], {
|
|
64
64
|
(core.dtype("fp32"), ): ("__ocml_exp_f32", core.dtype("fp32")),
|
|
65
65
|
(core.dtype("fp64"), ): ("__ocml_exp_f64", core.dtype("fp64")),
|
|
66
|
-
}, is_pure=True,
|
|
66
|
+
}, is_pure=True, _semantic=_semantic)
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
@core.extern
|
|
70
|
-
def fast_expf(arg0,
|
|
70
|
+
def fast_expf(arg0, _semantic=None):
|
|
71
71
|
return core.extern_elementwise("", "", [arg0], {
|
|
72
72
|
(core.dtype("fp32"), ): ("__triton_hip_fast_expf", core.dtype("fp32")),
|
|
73
|
-
}, is_pure=True,
|
|
73
|
+
}, is_pure=True, _semantic=_semantic)
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
@core.extern
|
|
77
|
-
def fast_dividef(arg0, arg1,
|
|
77
|
+
def fast_dividef(arg0, arg1, _semantic=None):
|
|
78
78
|
return core.extern_elementwise("", "", [arg0, arg1], {
|
|
79
79
|
(core.dtype("fp32"), core.dtype("fp32")): ("__triton_hip_fast_fdividef", core.dtype("fp32")),
|
|
80
|
-
}, is_pure=True,
|
|
80
|
+
}, is_pure=True, _semantic=_semantic)
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
@core.extern
|
|
84
|
-
def sqrt(arg0,
|
|
84
|
+
def sqrt(arg0, _semantic=None):
|
|
85
85
|
return core.extern_elementwise(
|
|
86
86
|
"", "", [arg0], {
|
|
87
87
|
(core.dtype("fp32"), ): ("__ocml_sqrt_f32", core.dtype("fp32")),
|
|
88
88
|
(core.dtype("fp64"), ): ("__ocml_sqrt_f64", core.dtype("fp64")),
|
|
89
|
-
}, is_pure=True,
|
|
89
|
+
}, is_pure=True, _semantic=_semantic)
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
@core.extern
|
|
93
|
-
def llrint(arg0,
|
|
93
|
+
def llrint(arg0, _semantic=None):
|
|
94
94
|
return core.extern_elementwise(
|
|
95
95
|
"", "", [arg0], {
|
|
96
96
|
(core.dtype("fp32"), ): ("__triton_hip_llrint", core.dtype("int64")),
|
|
97
97
|
(core.dtype("fp64"), ): ("__triton_hip_llrint", core.dtype("int64")),
|
|
98
|
-
}, is_pure=True,
|
|
98
|
+
}, is_pure=True, _semantic=_semantic)
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
@core.extern
|
|
102
|
-
def nearbyint(arg0,
|
|
102
|
+
def nearbyint(arg0, _semantic=None):
|
|
103
103
|
return core.extern_elementwise(
|
|
104
104
|
"", "", [
|
|
105
105
|
arg0,
|
|
106
106
|
], {
|
|
107
107
|
(core.dtype("fp32"), ): ("__ocml_nearbyint_f32", core.dtype("fp32")),
|
|
108
108
|
(core.dtype("fp64"), ): ("__ocml_nearbyint_f64", core.dtype("fp64")),
|
|
109
|
-
}, is_pure=True,
|
|
109
|
+
}, is_pure=True, _semantic=_semantic)
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
@core.extern
|
|
113
|
-
def isnan(arg0,
|
|
113
|
+
def isnan(arg0, _semantic=None):
|
|
114
114
|
return core.extern_elementwise(
|
|
115
115
|
"", "", [
|
|
116
116
|
arg0,
|
|
117
117
|
], {
|
|
118
118
|
(core.dtype("fp32"), ): ("__ocml_isnan_f32", core.dtype("int32")),
|
|
119
119
|
(core.dtype("fp64"), ): ("__ocml_isnan_f64", core.dtype("int32")),
|
|
120
|
-
}, is_pure=True,
|
|
120
|
+
}, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
|
|
121
121
|
|
|
122
122
|
|
|
123
123
|
@core.extern
|
|
124
|
-
def signbit(arg0,
|
|
124
|
+
def signbit(arg0, _semantic=None):
|
|
125
125
|
return core.extern_elementwise(
|
|
126
126
|
"", "", [
|
|
127
127
|
arg0,
|
|
128
128
|
], {
|
|
129
129
|
(core.dtype("fp32"), ): ("__ocml_signbit_f32", core.dtype("int32")),
|
|
130
130
|
(core.dtype("fp64"), ): ("__ocml_signbit_f64", core.dtype("int32")),
|
|
131
|
-
}, is_pure=True,
|
|
131
|
+
}, is_pure=True, _semantic=_semantic)
|
|
132
132
|
|
|
133
133
|
|
|
134
134
|
@core.extern
|
|
135
|
-
def copysign(arg0, arg1,
|
|
135
|
+
def copysign(arg0, arg1, _semantic=None):
|
|
136
136
|
return core.extern_elementwise(
|
|
137
137
|
"", "", [arg0, arg1], {
|
|
138
138
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_copysign_f32", core.dtype("fp32")),
|
|
139
139
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_copysign_f64", core.dtype("fp64")),
|
|
140
|
-
}, is_pure=True,
|
|
140
|
+
}, is_pure=True, _semantic=_semantic)
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
@core.extern
|
|
144
|
-
def isinf(arg0,
|
|
144
|
+
def isinf(arg0, _semantic=None):
|
|
145
145
|
return core.extern_elementwise(
|
|
146
146
|
"", "", [arg0], {
|
|
147
147
|
(core.dtype("fp32"), ): ("__ocml_isinf_f32", core.dtype("int32")),
|
|
148
148
|
(core.dtype("fp64"), ): ("__ocml_isinf_f64", core.dtype("int32")),
|
|
149
|
-
}, is_pure=True,
|
|
149
|
+
}, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
@core.extern
|
|
153
|
-
def nextafter(arg0, arg1,
|
|
153
|
+
def nextafter(arg0, arg1, _semantic=None):
|
|
154
154
|
return core.extern_elementwise(
|
|
155
155
|
"", "", [arg0, arg1], {
|
|
156
156
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_nextafter_f32", core.dtype("fp32")),
|
|
157
157
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_nextafter_f64", core.dtype("fp64")),
|
|
158
|
-
}, is_pure=True,
|
|
158
|
+
}, is_pure=True, _semantic=_semantic)
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
@core.extern
|
|
162
|
-
def sin(arg0,
|
|
162
|
+
def sin(arg0, _semantic=None):
|
|
163
163
|
return core.extern_elementwise(
|
|
164
164
|
"", "", [arg0], {
|
|
165
165
|
(core.dtype("fp32"), ): ("__ocml_sin_f32", core.dtype("fp32")),
|
|
166
166
|
(core.dtype("fp64"), ): ("__ocml_sin_f64", core.dtype("fp64")),
|
|
167
|
-
}, is_pure=True,
|
|
167
|
+
}, is_pure=True, _semantic=_semantic)
|
|
168
168
|
|
|
169
169
|
|
|
170
170
|
@core.extern
|
|
171
|
-
def cos(arg0,
|
|
171
|
+
def cos(arg0, _semantic=None):
|
|
172
172
|
return core.extern_elementwise(
|
|
173
173
|
"", "", [arg0], {
|
|
174
174
|
(core.dtype("fp32"), ): ("__ocml_cos_f32", core.dtype("fp32")),
|
|
175
175
|
(core.dtype("fp64"), ): ("__ocml_cos_f64", core.dtype("fp64")),
|
|
176
|
-
}, is_pure=True,
|
|
176
|
+
}, is_pure=True, _semantic=_semantic)
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
@core.extern
|
|
180
|
-
def tan(arg0,
|
|
180
|
+
def tan(arg0, _semantic=None):
|
|
181
181
|
return core.extern_elementwise(
|
|
182
182
|
"", "", [arg0], {
|
|
183
183
|
(core.dtype("fp32"), ): ("__ocml_tan_f32", core.dtype("fp32")),
|
|
184
184
|
(core.dtype("fp64"), ): ("__ocml_tan_f64", core.dtype("fp64")),
|
|
185
|
-
}, is_pure=True,
|
|
185
|
+
}, is_pure=True, _semantic=_semantic)
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
@core.extern
|
|
189
|
-
def log2(arg0,
|
|
189
|
+
def log2(arg0, _semantic=None):
|
|
190
190
|
return core.extern_elementwise(
|
|
191
191
|
"", "", [arg0], {
|
|
192
192
|
(core.dtype("fp32"), ): ("__ocml_log2_f32", core.dtype("fp32")),
|
|
193
193
|
(core.dtype("fp64"), ): ("__ocml_log2_f64", core.dtype("fp64")),
|
|
194
|
-
}, is_pure=True,
|
|
194
|
+
}, is_pure=True, _semantic=_semantic)
|
|
195
195
|
|
|
196
196
|
|
|
197
197
|
@core.extern
|
|
198
|
-
def cosh(arg0,
|
|
198
|
+
def cosh(arg0, _semantic=None):
|
|
199
199
|
return core.extern_elementwise(
|
|
200
200
|
"", "", [arg0], {
|
|
201
201
|
(core.dtype("fp32"), ): ("__ocml_cosh_f32", core.dtype("fp32")),
|
|
202
202
|
(core.dtype("fp64"), ): ("__ocml_cosh_f64", core.dtype("fp64")),
|
|
203
|
-
}, is_pure=True,
|
|
203
|
+
}, is_pure=True, _semantic=_semantic)
|
|
204
204
|
|
|
205
205
|
|
|
206
206
|
@core.extern
|
|
207
|
-
def sinh(arg0,
|
|
207
|
+
def sinh(arg0, _semantic=None):
|
|
208
208
|
return core.extern_elementwise(
|
|
209
209
|
"", "", [arg0], {
|
|
210
210
|
(core.dtype("fp32"), ): ("__ocml_sinh_f32", core.dtype("fp32")),
|
|
211
211
|
(core.dtype("fp64"), ): ("__ocml_sinh_f64", core.dtype("fp64")),
|
|
212
|
-
}, is_pure=True,
|
|
212
|
+
}, is_pure=True, _semantic=_semantic)
|
|
213
213
|
|
|
214
214
|
|
|
215
215
|
@core.extern
|
|
216
|
-
def tanh(arg0,
|
|
216
|
+
def tanh(arg0, _semantic=None):
|
|
217
217
|
return core.extern_elementwise(
|
|
218
218
|
"", "", [arg0], {
|
|
219
219
|
(core.dtype("fp32"), ): ("__ocml_tanh_f32", core.dtype("fp32")),
|
|
220
220
|
(core.dtype("fp64"), ): ("__ocml_tanh_f64", core.dtype("fp64")),
|
|
221
|
-
}, is_pure=True,
|
|
221
|
+
}, is_pure=True, _semantic=_semantic)
|
|
222
222
|
|
|
223
223
|
|
|
224
224
|
@core.extern
|
|
225
|
-
def atan2(arg0, arg1,
|
|
225
|
+
def atan2(arg0, arg1, _semantic=None):
|
|
226
226
|
return core.extern_elementwise(
|
|
227
227
|
"", "", [arg0, arg1], {
|
|
228
228
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_atan2_f32", core.dtype("fp32")),
|
|
229
229
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_atan2_f64", core.dtype("fp64")),
|
|
230
|
-
}, is_pure=True,
|
|
230
|
+
}, is_pure=True, _semantic=_semantic)
|
|
231
231
|
|
|
232
232
|
|
|
233
233
|
@core.extern
|
|
234
|
-
def atan(arg0,
|
|
234
|
+
def atan(arg0, _semantic=None):
|
|
235
235
|
return core.extern_elementwise(
|
|
236
236
|
"", "", [arg0], {
|
|
237
237
|
(core.dtype("fp32"), ): ("__ocml_atan_f32", core.dtype("fp32")),
|
|
238
238
|
(core.dtype("fp64"), ): ("__ocml_atan_f64", core.dtype("fp64")),
|
|
239
|
-
}, is_pure=True,
|
|
239
|
+
}, is_pure=True, _semantic=_semantic)
|
|
240
240
|
|
|
241
241
|
|
|
242
242
|
@core.extern
|
|
243
|
-
def asin(arg0,
|
|
243
|
+
def asin(arg0, _semantic=None):
|
|
244
244
|
return core.extern_elementwise(
|
|
245
245
|
"", "", [arg0], {
|
|
246
246
|
(core.dtype("fp32"), ): ("__ocml_asin_f32", core.dtype("fp32")),
|
|
247
247
|
(core.dtype("fp64"), ): ("__ocml_asin_f64", core.dtype("fp64")),
|
|
248
|
-
}, is_pure=True,
|
|
248
|
+
}, is_pure=True, _semantic=_semantic)
|
|
249
249
|
|
|
250
250
|
|
|
251
251
|
@core.extern
|
|
252
|
-
def acos(arg0,
|
|
252
|
+
def acos(arg0, _semantic=None):
|
|
253
253
|
return core.extern_elementwise(
|
|
254
254
|
"", "", [arg0], {
|
|
255
255
|
(core.dtype("fp32"), ): ("__ocml_acos_f32", core.dtype("fp32")),
|
|
256
256
|
(core.dtype("fp64"), ): ("__ocml_acos_f64", core.dtype("fp64")),
|
|
257
|
-
}, is_pure=True,
|
|
257
|
+
}, is_pure=True, _semantic=_semantic)
|
|
258
258
|
|
|
259
259
|
|
|
260
260
|
@core.extern
|
|
261
|
-
def log(arg0,
|
|
261
|
+
def log(arg0, _semantic=None):
|
|
262
262
|
return core.extern_elementwise(
|
|
263
263
|
"", "", [arg0], {
|
|
264
264
|
(core.dtype("fp32"), ): ("__ocml_log_f32", core.dtype("fp32")),
|
|
265
265
|
(core.dtype("fp64"), ): ("__ocml_log_f64", core.dtype("fp64")),
|
|
266
|
-
}, is_pure=True,
|
|
266
|
+
}, is_pure=True, _semantic=_semantic)
|
|
267
267
|
|
|
268
268
|
|
|
269
269
|
@core.extern
|
|
270
|
-
def log10(arg0,
|
|
270
|
+
def log10(arg0, _semantic=None):
|
|
271
271
|
return core.extern_elementwise(
|
|
272
272
|
"", "", [arg0], {
|
|
273
273
|
(core.dtype("fp32"), ): ("__ocml_log10_f32", core.dtype("fp32")),
|
|
274
274
|
(core.dtype("fp64"), ): ("__ocml_log10_f64", core.dtype("fp64")),
|
|
275
|
-
}, is_pure=True,
|
|
275
|
+
}, is_pure=True, _semantic=_semantic)
|
|
276
276
|
|
|
277
277
|
|
|
278
278
|
@core.extern
|
|
279
|
-
def log1p(arg0,
|
|
279
|
+
def log1p(arg0, _semantic=None):
|
|
280
280
|
return core.extern_elementwise(
|
|
281
281
|
"", "", [arg0], {
|
|
282
282
|
(core.dtype("fp32"), ): ("__ocml_log1p_f32", core.dtype("fp32")),
|
|
283
283
|
(core.dtype("fp64"), ): ("__ocml_log1p_f64", core.dtype("fp64")),
|
|
284
|
-
}, is_pure=True,
|
|
284
|
+
}, is_pure=True, _semantic=_semantic)
|
|
285
285
|
|
|
286
286
|
|
|
287
287
|
@core.extern
|
|
288
|
-
def acosh(arg0,
|
|
288
|
+
def acosh(arg0, _semantic=None):
|
|
289
289
|
return core.extern_elementwise(
|
|
290
290
|
"", "", [arg0], {
|
|
291
291
|
(core.dtype("fp32"), ): ("__ocml_acosh_f32", core.dtype("fp32")),
|
|
292
292
|
(core.dtype("fp64"), ): ("__ocml_acosh_f64", core.dtype("fp64")),
|
|
293
|
-
}, is_pure=True,
|
|
293
|
+
}, is_pure=True, _semantic=_semantic)
|
|
294
294
|
|
|
295
295
|
|
|
296
296
|
@core.extern
|
|
297
|
-
def asinh(arg0,
|
|
297
|
+
def asinh(arg0, _semantic=None):
|
|
298
298
|
return core.extern_elementwise(
|
|
299
299
|
"", "", [arg0], {
|
|
300
300
|
(core.dtype("fp32"), ): ("__ocml_asinh_f32", core.dtype("fp32")),
|
|
301
301
|
(core.dtype("fp64"), ): ("__ocml_asinh_f64", core.dtype("fp64")),
|
|
302
|
-
}, is_pure=True,
|
|
302
|
+
}, is_pure=True, _semantic=_semantic)
|
|
303
303
|
|
|
304
304
|
|
|
305
305
|
@core.extern
|
|
306
|
-
def atanh(arg0,
|
|
306
|
+
def atanh(arg0, _semantic=None):
|
|
307
307
|
return core.extern_elementwise(
|
|
308
308
|
"", "", [arg0], {
|
|
309
309
|
(core.dtype("fp32"), ): ("__ocml_atanh_f32", core.dtype("fp32")),
|
|
310
310
|
(core.dtype("fp64"), ): ("__ocml_atanh_f64", core.dtype("fp64")),
|
|
311
|
-
}, is_pure=True,
|
|
311
|
+
}, is_pure=True, _semantic=_semantic)
|
|
312
312
|
|
|
313
313
|
|
|
314
314
|
@core.extern
|
|
315
|
-
def expm1(arg0,
|
|
315
|
+
def expm1(arg0, _semantic=None):
|
|
316
316
|
return core.extern_elementwise(
|
|
317
317
|
"", "", [arg0], {
|
|
318
318
|
(core.dtype("fp32"), ): ("__ocml_expm1_f32", core.dtype("fp32")),
|
|
319
319
|
(core.dtype("fp64"), ): ("__ocml_expm1_f64", core.dtype("fp64")),
|
|
320
|
-
}, is_pure=True,
|
|
320
|
+
}, is_pure=True, _semantic=_semantic)
|
|
321
321
|
|
|
322
322
|
|
|
323
323
|
@core.extern
|
|
324
|
-
def hypot(arg0, arg1,
|
|
324
|
+
def hypot(arg0, arg1, _semantic=None):
|
|
325
325
|
return core.extern_elementwise(
|
|
326
326
|
"", "", [arg0, arg1], {
|
|
327
327
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_hypot_f32", core.dtype("fp32")),
|
|
328
328
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_hypot_f64", core.dtype("fp64")),
|
|
329
|
-
}, is_pure=True,
|
|
329
|
+
}, is_pure=True, _semantic=_semantic)
|
|
330
330
|
|
|
331
331
|
|
|
332
332
|
@core.extern
|
|
333
|
-
def j0(arg0,
|
|
333
|
+
def j0(arg0, _semantic=None):
|
|
334
334
|
return core.extern_elementwise(
|
|
335
335
|
"", "", [arg0], {
|
|
336
336
|
(core.dtype("fp32"), ): ("__ocml_j0_f32", core.dtype("fp32")),
|
|
337
337
|
(core.dtype("fp64"), ): ("__ocml_j0_f64", core.dtype("fp64")),
|
|
338
|
-
}, is_pure=True,
|
|
338
|
+
}, is_pure=True, _semantic=_semantic)
|
|
339
339
|
|
|
340
340
|
|
|
341
341
|
@core.extern
|
|
342
|
-
def j1(arg0,
|
|
342
|
+
def j1(arg0, _semantic=None):
|
|
343
343
|
return core.extern_elementwise(
|
|
344
344
|
"", "", [arg0], {
|
|
345
345
|
(core.dtype("fp32"), ): ("__ocml_j1_f32", core.dtype("fp32")),
|
|
346
346
|
(core.dtype("fp64"), ): ("__ocml_j1_f64", core.dtype("fp64")),
|
|
347
|
-
}, is_pure=True,
|
|
347
|
+
}, is_pure=True, _semantic=_semantic)
|
|
348
348
|
|
|
349
349
|
|
|
350
350
|
@core.extern
|
|
351
|
-
def y0(arg0,
|
|
351
|
+
def y0(arg0, _semantic=None):
|
|
352
352
|
return core.extern_elementwise(
|
|
353
353
|
"", "", [arg0], {
|
|
354
354
|
(core.dtype("fp32"), ): ("__ocml_y0_f32", core.dtype("fp32")),
|
|
355
355
|
(core.dtype("fp64"), ): ("__ocml_y0_f64", core.dtype("fp64")),
|
|
356
|
-
}, is_pure=True,
|
|
356
|
+
}, is_pure=True, _semantic=_semantic)
|
|
357
357
|
|
|
358
358
|
|
|
359
359
|
@core.extern
|
|
360
|
-
def y1(arg0,
|
|
360
|
+
def y1(arg0, _semantic=None):
|
|
361
361
|
return core.extern_elementwise(
|
|
362
362
|
"", "", [arg0], {
|
|
363
363
|
(core.dtype("fp32"), ): ("__ocml_y1_f32", core.dtype("fp32")),
|
|
364
364
|
(core.dtype("fp64"), ): ("__ocml_y1_f64", core.dtype("fp64")),
|
|
365
|
-
}, is_pure=True,
|
|
365
|
+
}, is_pure=True, _semantic=_semantic)
|
|
366
366
|
|
|
367
367
|
|
|
368
368
|
@core.extern
|
|
369
|
-
def cyl_bessel_i0(arg0,
|
|
369
|
+
def cyl_bessel_i0(arg0, _semantic=None):
|
|
370
370
|
return core.extern_elementwise(
|
|
371
371
|
"", "", [arg0], {
|
|
372
372
|
(core.dtype("fp32"), ): ("__ocml_i0_f32", core.dtype("fp32")),
|
|
373
373
|
(core.dtype("fp64"), ): ("__ocml_i0_f64", core.dtype("fp64")),
|
|
374
|
-
}, is_pure=True,
|
|
374
|
+
}, is_pure=True, _semantic=_semantic)
|
|
375
375
|
|
|
376
376
|
|
|
377
377
|
@core.extern
|
|
378
|
-
def cyl_bessel_i1(arg0,
|
|
378
|
+
def cyl_bessel_i1(arg0, _semantic=None):
|
|
379
379
|
return core.extern_elementwise(
|
|
380
380
|
"", "", [arg0], {
|
|
381
381
|
(core.dtype("fp32"), ): ("__ocml_i1_f32", core.dtype("fp32")),
|
|
382
382
|
(core.dtype("fp64"), ): ("__ocml_i1_f64", core.dtype("fp64")),
|
|
383
|
-
}, is_pure=True,
|
|
383
|
+
}, is_pure=True, _semantic=_semantic)
|
|
384
384
|
|
|
385
385
|
|
|
386
386
|
@core.extern
|
|
387
|
-
def erf(arg0,
|
|
387
|
+
def erf(arg0, _semantic=None):
|
|
388
388
|
return core.extern_elementwise(
|
|
389
389
|
"", "", [arg0], {
|
|
390
390
|
(core.dtype("fp32"), ): ("__ocml_erf_f32", core.dtype("fp32")),
|
|
391
391
|
(core.dtype("fp64"), ): ("__ocml_erf_f64", core.dtype("fp64")),
|
|
392
|
-
}, is_pure=True,
|
|
392
|
+
}, is_pure=True, _semantic=_semantic)
|
|
393
393
|
|
|
394
394
|
|
|
395
395
|
@core.extern
|
|
396
|
-
def erfinv(arg0,
|
|
396
|
+
def erfinv(arg0, _semantic=None):
|
|
397
397
|
return core.extern_elementwise(
|
|
398
398
|
"", "", [arg0], {
|
|
399
399
|
(core.dtype("fp32"), ): ("__ocml_erfinv_f32", core.dtype("fp32")),
|
|
400
400
|
(core.dtype("fp64"), ): ("__ocml_erfinv_f64", core.dtype("fp64")),
|
|
401
|
-
}, is_pure=True,
|
|
401
|
+
}, is_pure=True, _semantic=_semantic)
|
|
402
402
|
|
|
403
403
|
|
|
404
404
|
@core.extern
|
|
405
|
-
def erfc(arg0,
|
|
405
|
+
def erfc(arg0, _semantic=None):
|
|
406
406
|
return core.extern_elementwise(
|
|
407
407
|
"", "", [arg0], {
|
|
408
408
|
(core.dtype("fp32"), ): ("__ocml_erfc_f32", core.dtype("fp32")),
|
|
409
409
|
(core.dtype("fp64"), ): ("__ocml_erfc_f64", core.dtype("fp64")),
|
|
410
|
-
}, is_pure=True,
|
|
410
|
+
}, is_pure=True, _semantic=_semantic)
|
|
411
411
|
|
|
412
412
|
|
|
413
413
|
@core.extern
|
|
414
|
-
def erfcx(arg0,
|
|
414
|
+
def erfcx(arg0, _semantic=None):
|
|
415
415
|
return core.extern_elementwise(
|
|
416
416
|
"", "", [arg0], {
|
|
417
417
|
(core.dtype("fp32"), ): ("__ocml_erfcx_f32", core.dtype("fp32")),
|
|
418
418
|
(core.dtype("fp64"), ): ("__ocml_erfcx_f64", core.dtype("fp64")),
|
|
419
|
-
}, is_pure=True,
|
|
419
|
+
}, is_pure=True, _semantic=_semantic)
|
|
420
420
|
|
|
421
421
|
|
|
422
422
|
@core.extern
|
|
423
|
-
def lgamma(arg0,
|
|
423
|
+
def lgamma(arg0, _semantic=None):
|
|
424
424
|
return core.extern_elementwise(
|
|
425
425
|
"", "", [arg0], {
|
|
426
426
|
(core.dtype("fp32"), ): ("__ocml_lgamma_f32", core.dtype("fp32")),
|
|
427
427
|
(core.dtype("fp64"), ): ("__ocml_lgamma_f64", core.dtype("fp64")),
|
|
428
|
-
}, is_pure=True,
|
|
428
|
+
}, is_pure=True, _semantic=_semantic)
|
|
429
429
|
|
|
430
430
|
|
|
431
431
|
@core.extern
|
|
432
|
-
def ldexp(arg0, arg1,
|
|
432
|
+
def ldexp(arg0, arg1, _semantic=None):
|
|
433
433
|
return core.extern_elementwise(
|
|
434
434
|
"", "", [arg0, arg1], {
|
|
435
435
|
(core.dtype("fp32"), core.dtype("int32")): ("__ocml_ldexp_f32", core.dtype("fp32")),
|
|
436
436
|
(core.dtype("fp64"), core.dtype("int32")): ("__ocml_ldexp_f64", core.dtype("fp64")),
|
|
437
|
-
}, is_pure=True,
|
|
437
|
+
}, is_pure=True, _semantic=_semantic)
|
|
438
438
|
|
|
439
439
|
|
|
440
440
|
@core.extern
|
|
441
|
-
def fmod(arg0, arg1,
|
|
441
|
+
def fmod(arg0, arg1, _semantic=None):
|
|
442
442
|
return core.extern_elementwise(
|
|
443
443
|
"", "", [arg0, arg1], {
|
|
444
444
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fmod_f32", core.dtype("fp32")),
|
|
445
445
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fmod_f64", core.dtype("fp64")),
|
|
446
|
-
}, is_pure=True,
|
|
446
|
+
}, is_pure=True, _semantic=_semantic)
|
|
447
447
|
|
|
448
448
|
|
|
449
449
|
@core.extern
|
|
450
|
-
def fma(arg0, arg1, arg2,
|
|
450
|
+
def fma(arg0, arg1, arg2, _semantic=None):
|
|
451
451
|
return core.extern_elementwise(
|
|
452
452
|
"", "", [arg0, arg1, arg2], {
|
|
453
453
|
(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fma_f32", core.dtype("fp32")),
|
|
454
454
|
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fma_f64", core.dtype("fp64")),
|
|
455
|
-
}, is_pure=True,
|
|
455
|
+
}, is_pure=True, _semantic=_semantic)
|
|
456
456
|
|
|
457
457
|
|
|
458
458
|
@core.extern
|
|
459
|
-
def pow(arg0, arg1,
|
|
459
|
+
def pow(arg0, arg1, _semantic=None):
|
|
460
460
|
return core.extern_elementwise(
|
|
461
461
|
"", "", [arg0, arg1], {
|
|
462
462
|
(core.dtype("fp32"), core.dtype("int32")): ("__ocml_pown_f32", core.dtype("fp32")),
|
|
463
463
|
(core.dtype("fp64"), core.dtype("int32")): ("__ocml_pown_f64", core.dtype("fp64")),
|
|
464
464
|
(core.dtype("fp32"), core.dtype("fp32")): ("__ocml_pow_f32", core.dtype("fp32")),
|
|
465
465
|
(core.dtype("fp64"), core.dtype("fp64")): ("__ocml_pow_f64", core.dtype("fp64")),
|
|
466
|
-
}, is_pure=True,
|
|
466
|
+
}, is_pure=True, _semantic=_semantic)
|
|
467
467
|
|
|
468
468
|
|
|
469
469
|
@core.extern
|
|
470
|
-
def ilogb(arg0,
|
|
470
|
+
def ilogb(arg0, _semantic=None):
|
|
471
471
|
return core.extern_elementwise(
|
|
472
472
|
"", "", [arg0], {
|
|
473
473
|
(core.dtype("fp32"), ): ("__ocml_ilogb_f32", core.dtype("int32")),
|
|
474
474
|
(core.dtype("fp64"), ): ("__ocml_ilogb_f64", core.dtype("int32")),
|
|
475
|
-
}, is_pure=True,
|
|
475
|
+
}, is_pure=True, _semantic=_semantic)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@core.extern
|
|
479
|
+
def round(arg0, _semantic=None):
|
|
480
|
+
return core.extern_elementwise(
|
|
481
|
+
"", "", [arg0], {
|
|
482
|
+
(core.dtype("fp32"), ): ("__ocml_round_f32", core.dtype("fp32")),
|
|
483
|
+
(core.dtype("fp64"), ): ("__ocml_round_f64", core.dtype("fp64")),
|
|
484
|
+
}, is_pure=True, _semantic=_semantic)
|