triton-windows 3.3.1.post19__cp313-cp313-win_amd64.whl → 3.4.0.post20__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +4 -1
- triton/_filecheck.py +87 -0
- triton/_internal_testing.py +26 -15
- triton/_utils.py +110 -21
- triton/backends/__init__.py +20 -23
- triton/backends/amd/__init__.py +0 -0
- triton/backends/amd/compiler.py +112 -78
- triton/backends/amd/driver.c +5 -2
- triton/backends/amd/driver.py +149 -47
- triton/backends/compiler.py +7 -21
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +92 -93
- triton/backends/nvidia/driver.c +90 -98
- triton/backends/nvidia/driver.py +303 -128
- triton/compiler/code_generator.py +212 -111
- triton/compiler/compiler.py +110 -25
- triton/experimental/__init__.py +0 -0
- triton/experimental/gluon/__init__.py +4 -0
- triton/experimental/gluon/_compiler.py +0 -0
- triton/experimental/gluon/_runtime.py +99 -0
- triton/experimental/gluon/language/__init__.py +18 -0
- triton/experimental/gluon/language/_core.py +312 -0
- triton/experimental/gluon/language/_layouts.py +230 -0
- triton/experimental/gluon/language/_math.py +12 -0
- triton/experimental/gluon/language/_semantic.py +287 -0
- triton/experimental/gluon/language/_standard.py +47 -0
- triton/experimental/gluon/language/nvidia/__init__.py +4 -0
- triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
- triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
- triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
- triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
- triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
- triton/experimental/gluon/nvidia/__init__.py +4 -0
- triton/experimental/gluon/nvidia/blackwell.py +3 -0
- triton/experimental/gluon/nvidia/hopper.py +40 -0
- triton/knobs.py +481 -0
- triton/language/__init__.py +39 -14
- triton/language/core.py +794 -537
- triton/language/extra/cuda/__init__.py +10 -7
- triton/language/extra/cuda/gdc.py +42 -0
- triton/language/extra/cuda/libdevice.py +394 -394
- triton/language/extra/cuda/utils.py +21 -21
- triton/language/extra/hip/libdevice.py +113 -104
- triton/language/math.py +65 -66
- triton/language/random.py +12 -2
- triton/language/semantic.py +1706 -1770
- triton/language/standard.py +116 -51
- triton/runtime/autotuner.py +117 -59
- triton/runtime/build.py +76 -12
- triton/runtime/cache.py +18 -47
- triton/runtime/driver.py +32 -29
- triton/runtime/interpreter.py +72 -35
- triton/runtime/jit.py +146 -110
- triton/testing.py +16 -12
- triton/tools/disasm.py +3 -4
- triton/tools/tensor_descriptor.py +36 -0
- triton/windows_utils.py +14 -6
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
- triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
- triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
- triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
- triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- triton/language/_utils.py +0 -21
- triton/language/extra/cuda/_experimental_tma.py +0 -106
- triton/tools/experimental_descriptor.py +0 -32
- triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
- triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +0 -0
|
@@ -1,531 +0,0 @@
|
|
|
1
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
2
|
-
//
|
|
3
|
-
// The University of Illinois/NCSA
|
|
4
|
-
// Open Source License (NCSA)
|
|
5
|
-
//
|
|
6
|
-
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
7
|
-
//
|
|
8
|
-
// Developed by:
|
|
9
|
-
//
|
|
10
|
-
// AMD Research and AMD HSA Software Development
|
|
11
|
-
//
|
|
12
|
-
// Advanced Micro Devices, Inc.
|
|
13
|
-
//
|
|
14
|
-
// www.amd.com
|
|
15
|
-
//
|
|
16
|
-
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
17
|
-
// of this software and associated documentation files (the "Software"), to
|
|
18
|
-
// deal with the Software without restriction, including without limitation
|
|
19
|
-
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
20
|
-
// and/or sell copies of the Software, and to permit persons to whom the
|
|
21
|
-
// Software is furnished to do so, subject to the following conditions:
|
|
22
|
-
//
|
|
23
|
-
// - Redistributions of source code must retain the above copyright notice,
|
|
24
|
-
// this list of conditions and the following disclaimers.
|
|
25
|
-
// - Redistributions in binary form must reproduce the above copyright
|
|
26
|
-
// notice, this list of conditions and the following disclaimers in
|
|
27
|
-
// the documentation and/or other materials provided with the distribution.
|
|
28
|
-
// - Neither the names of Advanced Micro Devices, Inc,
|
|
29
|
-
// nor the names of its contributors may be used to endorse or promote
|
|
30
|
-
// products derived from this Software without specific prior written
|
|
31
|
-
// permission.
|
|
32
|
-
//
|
|
33
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34
|
-
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35
|
-
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
36
|
-
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
37
|
-
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
38
|
-
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
39
|
-
// DEALINGS WITH THE SOFTWARE.
|
|
40
|
-
//
|
|
41
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
42
|
-
|
|
43
|
-
#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
|
44
|
-
#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|
|
45
|
-
|
|
46
|
-
#include "hsa.h"
|
|
47
|
-
|
|
48
|
-
#undef HSA_API
|
|
49
|
-
#ifdef HSA_EXPORT_FINALIZER
|
|
50
|
-
#define HSA_API HSA_API_EXPORT
|
|
51
|
-
#else
|
|
52
|
-
#define HSA_API HSA_API_IMPORT
|
|
53
|
-
#endif
|
|
54
|
-
|
|
55
|
-
#ifdef __cplusplus
|
|
56
|
-
extern "C" {
|
|
57
|
-
#endif // __cplusplus
|
|
58
|
-
|
|
59
|
-
struct BrigModuleHeader;
|
|
60
|
-
typedef struct BrigModuleHeader* BrigModule_t;
|
|
61
|
-
|
|
62
|
-
/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
|
|
63
|
-
* @{
|
|
64
|
-
*/
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* @brief Enumeration constants added to ::hsa_status_t by this extension.
|
|
68
|
-
*/
|
|
69
|
-
enum {
|
|
70
|
-
/**
|
|
71
|
-
* The HSAIL program is invalid.
|
|
72
|
-
*/
|
|
73
|
-
HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
|
|
74
|
-
/**
|
|
75
|
-
* The HSAIL module is invalid.
|
|
76
|
-
*/
|
|
77
|
-
HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
|
|
78
|
-
/**
|
|
79
|
-
* Machine model or profile of the HSAIL module do not match the machine model
|
|
80
|
-
* or profile of the HSAIL program.
|
|
81
|
-
*/
|
|
82
|
-
HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
|
|
83
|
-
/**
|
|
84
|
-
* The HSAIL module is already a part of the HSAIL program.
|
|
85
|
-
*/
|
|
86
|
-
HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
|
|
87
|
-
/**
|
|
88
|
-
* Compatibility mismatch between symbol declaration and symbol definition.
|
|
89
|
-
*/
|
|
90
|
-
HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
|
|
91
|
-
/**
|
|
92
|
-
* The finalization encountered an error while finalizing a kernel or
|
|
93
|
-
* indirect function.
|
|
94
|
-
*/
|
|
95
|
-
HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
|
|
96
|
-
/**
|
|
97
|
-
* Mismatch between a directive in the control directive structure and in
|
|
98
|
-
* the HSAIL kernel.
|
|
99
|
-
*/
|
|
100
|
-
HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
/** @} */
|
|
104
|
-
|
|
105
|
-
/** \defgroup ext-alt-finalizer-program Finalization Program
|
|
106
|
-
* @{
|
|
107
|
-
*/
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
|
|
111
|
-
* the definition of the BrigModule_t type.
|
|
112
|
-
*/
|
|
113
|
-
typedef BrigModule_t hsa_ext_module_t;
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
|
|
117
|
-
* modules that collectively define functions and variables used by kernels and
|
|
118
|
-
* indirect functions.
|
|
119
|
-
*/
|
|
120
|
-
typedef struct hsa_ext_program_s {
|
|
121
|
-
/**
|
|
122
|
-
* Opaque handle.
|
|
123
|
-
*/
|
|
124
|
-
uint64_t handle;
|
|
125
|
-
} hsa_ext_program_t;
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* @brief Create an empty HSAIL program.
|
|
129
|
-
*
|
|
130
|
-
* @param[in] machine_model Machine model used in the HSAIL program.
|
|
131
|
-
*
|
|
132
|
-
* @param[in] profile Profile used in the HSAIL program.
|
|
133
|
-
*
|
|
134
|
-
* @param[in] default_float_rounding_mode Default float rounding mode used in
|
|
135
|
-
* the HSAIL program.
|
|
136
|
-
*
|
|
137
|
-
* @param[in] options Vendor-specific options. May be NULL.
|
|
138
|
-
*
|
|
139
|
-
* @param[out] program Memory location where the HSA runtime stores the newly
|
|
140
|
-
* created HSAIL program handle.
|
|
141
|
-
*
|
|
142
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
143
|
-
*
|
|
144
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
145
|
-
* initialized.
|
|
146
|
-
*
|
|
147
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
|
148
|
-
* resources required for the operation.
|
|
149
|
-
*
|
|
150
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
|
|
151
|
-
* @p profile is invalid, @p default_float_rounding_mode is invalid, or
|
|
152
|
-
* @p program is NULL.
|
|
153
|
-
*/
|
|
154
|
-
hsa_status_t HSA_API hsa_ext_program_create(
|
|
155
|
-
hsa_machine_model_t machine_model,
|
|
156
|
-
hsa_profile_t profile,
|
|
157
|
-
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
|
158
|
-
const char *options,
|
|
159
|
-
hsa_ext_program_t *program);
|
|
160
|
-
|
|
161
|
-
/**
|
|
162
|
-
* @brief Destroy a HSAIL program.
|
|
163
|
-
*
|
|
164
|
-
* @details The HSAIL program handle becomes invalid after it has been
|
|
165
|
-
* destroyed. Code object handles produced by ::hsa_ext_program_finalize are
|
|
166
|
-
* still valid after the HSAIL program has been destroyed, and can be used as
|
|
167
|
-
* intended. Resources allocated outside and associated with the HSAIL program
|
|
168
|
-
* (such as HSAIL modules that are added to the HSAIL program) can be released
|
|
169
|
-
* after the finalization program has been destroyed.
|
|
170
|
-
*
|
|
171
|
-
* @param[in] program HSAIL program.
|
|
172
|
-
*
|
|
173
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
174
|
-
*
|
|
175
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
176
|
-
* initialized.
|
|
177
|
-
*
|
|
178
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
|
179
|
-
* invalid.
|
|
180
|
-
*/
|
|
181
|
-
hsa_status_t HSA_API hsa_ext_program_destroy(
|
|
182
|
-
hsa_ext_program_t program);
|
|
183
|
-
|
|
184
|
-
/**
|
|
185
|
-
* @brief Add a HSAIL module to an existing HSAIL program.
|
|
186
|
-
*
|
|
187
|
-
* @details The HSA runtime does not perform a deep copy of the HSAIL module
|
|
188
|
-
* upon addition. Instead, it stores a pointer to the HSAIL module. The
|
|
189
|
-
* ownership of the HSAIL module belongs to the application, which must ensure
|
|
190
|
-
* that @p module is not released before destroying the HSAIL program.
|
|
191
|
-
*
|
|
192
|
-
* The HSAIL module is successfully added to the HSAIL program if @p module is
|
|
193
|
-
* valid, if all the declarations and definitions for the same symbol are
|
|
194
|
-
* compatible, and if @p module specify machine model and profile that matches
|
|
195
|
-
* the HSAIL program.
|
|
196
|
-
*
|
|
197
|
-
* @param[in] program HSAIL program.
|
|
198
|
-
*
|
|
199
|
-
* @param[in] module HSAIL module. The application can add the same HSAIL module
|
|
200
|
-
* to @p program at most once. The HSAIL module must specify the same machine
|
|
201
|
-
* model and profile as @p program. If the floating-mode rounding mode of @p
|
|
202
|
-
* module is not default, then it should match that of @p program.
|
|
203
|
-
*
|
|
204
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
205
|
-
*
|
|
206
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
207
|
-
* initialized.
|
|
208
|
-
*
|
|
209
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
|
210
|
-
* resources required for the operation.
|
|
211
|
-
*
|
|
212
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
|
213
|
-
*
|
|
214
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
|
|
215
|
-
*
|
|
216
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
|
|
217
|
-
* module does not match machine model of @p program, or the profile of @p
|
|
218
|
-
* module does not match profile of @p program.
|
|
219
|
-
*
|
|
220
|
-
* @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
|
|
221
|
-
* already a part of the HSAIL program.
|
|
222
|
-
*
|
|
223
|
-
* @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
|
|
224
|
-
* definition compatibility mismatch. See the symbol compatibility rules in the
|
|
225
|
-
* HSA Programming Reference Manual.
|
|
226
|
-
*/
|
|
227
|
-
hsa_status_t HSA_API hsa_ext_program_add_module(
|
|
228
|
-
hsa_ext_program_t program,
|
|
229
|
-
hsa_ext_module_t module);
|
|
230
|
-
|
|
231
|
-
/**
|
|
232
|
-
* @brief Iterate over the HSAIL modules in a program, and invoke an
|
|
233
|
-
* application-defined callback on every iteration.
|
|
234
|
-
*
|
|
235
|
-
* @param[in] program HSAIL program.
|
|
236
|
-
*
|
|
237
|
-
* @param[in] callback Callback to be invoked once per HSAIL module in the
|
|
238
|
-
* program. The HSA runtime passes three arguments to the callback: the program,
|
|
239
|
-
* a HSAIL module, and the application data. If @p callback returns a status
|
|
240
|
-
* other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
|
|
241
|
-
* stops and ::hsa_ext_program_iterate_modules returns that status value.
|
|
242
|
-
*
|
|
243
|
-
* @param[in] data Application data that is passed to @p callback on every
|
|
244
|
-
* iteration. May be NULL.
|
|
245
|
-
*
|
|
246
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
247
|
-
*
|
|
248
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
249
|
-
* initialized.
|
|
250
|
-
*
|
|
251
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
|
|
252
|
-
*
|
|
253
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
|
254
|
-
*/
|
|
255
|
-
hsa_status_t HSA_API hsa_ext_program_iterate_modules(
|
|
256
|
-
hsa_ext_program_t program,
|
|
257
|
-
hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
|
|
258
|
-
void* data),
|
|
259
|
-
void* data);
|
|
260
|
-
|
|
261
|
-
/**
|
|
262
|
-
* @brief HSAIL program attributes.
|
|
263
|
-
*/
|
|
264
|
-
typedef enum {
|
|
265
|
-
/**
|
|
266
|
-
* Machine model specified when the HSAIL program was created. The type
|
|
267
|
-
* of this attribute is ::hsa_machine_model_t.
|
|
268
|
-
*/
|
|
269
|
-
HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
|
|
270
|
-
/**
|
|
271
|
-
* Profile specified when the HSAIL program was created. The type of
|
|
272
|
-
* this attribute is ::hsa_profile_t.
|
|
273
|
-
*/
|
|
274
|
-
HSA_EXT_PROGRAM_INFO_PROFILE = 1,
|
|
275
|
-
/**
|
|
276
|
-
* Default float rounding mode specified when the HSAIL program was
|
|
277
|
-
* created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
|
|
278
|
-
*/
|
|
279
|
-
HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
|
|
280
|
-
} hsa_ext_program_info_t;
|
|
281
|
-
|
|
282
|
-
/**
|
|
283
|
-
* @brief Get the current value of an attribute for a given HSAIL program.
|
|
284
|
-
*
|
|
285
|
-
* @param[in] program HSAIL program.
|
|
286
|
-
*
|
|
287
|
-
* @param[in] attribute Attribute to query.
|
|
288
|
-
*
|
|
289
|
-
* @param[out] value Pointer to an application-allocated buffer where to store
|
|
290
|
-
* the value of the attribute. If the buffer passed by the application is not
|
|
291
|
-
* large enough to hold the value of @p attribute, the behaviour is undefined.
|
|
292
|
-
*
|
|
293
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
294
|
-
*
|
|
295
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
296
|
-
* initialized.
|
|
297
|
-
*
|
|
298
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
|
|
299
|
-
*
|
|
300
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
|
301
|
-
* HSAIL program attribute, or @p value is NULL.
|
|
302
|
-
*/
|
|
303
|
-
hsa_status_t HSA_API hsa_ext_program_get_info(
|
|
304
|
-
hsa_ext_program_t program,
|
|
305
|
-
hsa_ext_program_info_t attribute,
|
|
306
|
-
void *value);
|
|
307
|
-
|
|
308
|
-
/**
|
|
309
|
-
* @brief Finalizer-determined call convention.
|
|
310
|
-
*/
|
|
311
|
-
typedef enum {
|
|
312
|
-
/**
|
|
313
|
-
* Finalizer-determined call convention.
|
|
314
|
-
*/
|
|
315
|
-
HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
|
|
316
|
-
} hsa_ext_finalizer_call_convention_t;
|
|
317
|
-
|
|
318
|
-
/**
|
|
319
|
-
* @brief Control directives specify low-level information about the
|
|
320
|
-
* finalization process.
|
|
321
|
-
*/
|
|
322
|
-
typedef struct hsa_ext_control_directives_s {
|
|
323
|
-
/**
|
|
324
|
-
* Bitset indicating which control directives are enabled. The bit assigned to
|
|
325
|
-
* a control directive is determined by the corresponding value in
|
|
326
|
-
* BrigControlDirective.
|
|
327
|
-
*
|
|
328
|
-
* If a control directive is disabled, its corresponding field value (if any)
|
|
329
|
-
* must be 0. Control directives that are only present or absent (such as
|
|
330
|
-
* partial workgroups) have no corresponding field as the presence of the bit
|
|
331
|
-
* in this mask is sufficient.
|
|
332
|
-
*/
|
|
333
|
-
uint64_t control_directives_mask;
|
|
334
|
-
/**
|
|
335
|
-
* Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
|
|
336
|
-
* assigned to an HSAIL exception is determined by the corresponding value
|
|
337
|
-
* in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
|
|
338
|
-
* control directive, the finalizer uses the union of the two masks.
|
|
339
|
-
*/
|
|
340
|
-
uint16_t break_exceptions_mask;
|
|
341
|
-
/**
|
|
342
|
-
* Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
|
|
343
|
-
* bit assigned to an HSAIL exception is determined by the corresponding value
|
|
344
|
-
* in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
|
|
345
|
-
* control directive, the finalizer uses the union of the two masks.
|
|
346
|
-
*/
|
|
347
|
-
uint16_t detect_exceptions_mask;
|
|
348
|
-
/**
|
|
349
|
-
* Maximum size (in bytes) of dynamic group memory that will be allocated by
|
|
350
|
-
* the application for any dispatch of the kernel. If the kernel contains a
|
|
351
|
-
* maxdynamicsize control directive, the two values should match.
|
|
352
|
-
*/
|
|
353
|
-
uint32_t max_dynamic_group_size;
|
|
354
|
-
/**
|
|
355
|
-
* Maximum number of grid work-items that will be used by the application to
|
|
356
|
-
* launch the kernel. If the kernel contains a maxflatgridsize control
|
|
357
|
-
* directive, the value of @a max_flat_grid_size must not be greater than the
|
|
358
|
-
* value of the directive, and takes precedence.
|
|
359
|
-
*
|
|
360
|
-
* The value specified for maximum absolute grid size must be greater than or
|
|
361
|
-
* equal to the product of the values specified by @a required_grid_size.
|
|
362
|
-
*
|
|
363
|
-
* If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
|
|
364
|
-
* control_directives_mask, this field must be greater than 0.
|
|
365
|
-
*/
|
|
366
|
-
uint64_t max_flat_grid_size;
|
|
367
|
-
/**
|
|
368
|
-
* Maximum number of work-group work-items that will be used by the
|
|
369
|
-
* application to launch the kernel. If the kernel contains a
|
|
370
|
-
* maxflatworkgroupsize control directive, the value of @a
|
|
371
|
-
* max_flat_workgroup_size must not be greater than the value of the
|
|
372
|
-
* directive, and takes precedence.
|
|
373
|
-
*
|
|
374
|
-
* The value specified for maximum absolute grid size must be greater than or
|
|
375
|
-
* equal to the product of the values specified by @a required_workgroup_size.
|
|
376
|
-
*
|
|
377
|
-
* If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
|
|
378
|
-
* control_directives_mask, this field must be greater than 0.
|
|
379
|
-
*/
|
|
380
|
-
uint32_t max_flat_workgroup_size;
|
|
381
|
-
/**
|
|
382
|
-
* Reserved. Must be 0.
|
|
383
|
-
*/
|
|
384
|
-
uint32_t reserved1;
|
|
385
|
-
/**
|
|
386
|
-
* Grid size that will be used by the application in any dispatch of the
|
|
387
|
-
* kernel. If the kernel contains a requiredgridsize control directive, the
|
|
388
|
-
* dimensions should match.
|
|
389
|
-
*
|
|
390
|
-
* The specified grid size must be consistent with @a required_workgroup_size
|
|
391
|
-
* and @a required_dim. Also, the product of the three dimensions must not
|
|
392
|
-
* exceed @a max_flat_grid_size. Note that the listed invariants must hold
|
|
393
|
-
* only if all the corresponding control directives are enabled.
|
|
394
|
-
*
|
|
395
|
-
* If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
|
|
396
|
-
* control_directives_mask, the three dimension values must be greater than 0.
|
|
397
|
-
*/
|
|
398
|
-
uint64_t required_grid_size[3];
|
|
399
|
-
/**
|
|
400
|
-
* Work-group size that will be used by the application in any dispatch of the
|
|
401
|
-
* kernel. If the kernel contains a requiredworkgroupsize control directive,
|
|
402
|
-
* the dimensions should match.
|
|
403
|
-
*
|
|
404
|
-
* The specified work-group size must be consistent with @a required_grid_size
|
|
405
|
-
* and @a required_dim. Also, the product of the three dimensions must not
|
|
406
|
-
* exceed @a max_flat_workgroup_size. Note that the listed invariants must
|
|
407
|
-
* hold only if all the corresponding control directives are enabled.
|
|
408
|
-
*
|
|
409
|
-
* If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
|
|
410
|
-
* control_directives_mask, the three dimension values must be greater than 0.
|
|
411
|
-
*/
|
|
412
|
-
hsa_dim3_t required_workgroup_size;
|
|
413
|
-
/**
|
|
414
|
-
* Number of dimensions that will be used by the application to launch the
|
|
415
|
-
* kernel. If the kernel contains a requireddim control directive, the two
|
|
416
|
-
* values should match.
|
|
417
|
-
*
|
|
418
|
-
* The specified dimensions must be consistent with @a required_grid_size and
|
|
419
|
-
* @a required_workgroup_size. This invariant must hold only if all the
|
|
420
|
-
* corresponding control directives are enabled.
|
|
421
|
-
*
|
|
422
|
-
* If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
|
|
423
|
-
* control_directives_mask, this field must be 1, 2, or 3.
|
|
424
|
-
*/
|
|
425
|
-
uint8_t required_dim;
|
|
426
|
-
/**
|
|
427
|
-
* Reserved. Must be 0.
|
|
428
|
-
*/
|
|
429
|
-
uint8_t reserved2[75];
|
|
430
|
-
} hsa_ext_control_directives_t;
|
|
431
|
-
|
|
432
|
-
/**
|
|
433
|
-
* @brief Finalize an HSAIL program for a given instruction set architecture.
|
|
434
|
-
*
|
|
435
|
-
* @details Finalize all of the kernels and indirect functions that belong to
|
|
436
|
-
* the same HSAIL program for a specific instruction set architecture (ISA). The
|
|
437
|
-
* transitive closure of all functions specified by call or scall must be
|
|
438
|
-
* defined. Kernels and indirect functions that are being finalized must be
|
|
439
|
-
* defined. Kernels and indirect functions that are referenced in kernels and
|
|
440
|
-
* indirect functions being finalized may or may not be defined, but must be
|
|
441
|
-
* declared. All the global/readonly segment variables that are referenced in
|
|
442
|
-
* kernels and indirect functions being finalized may or may not be defined, but
|
|
443
|
-
* must be declared.
|
|
444
|
-
*
|
|
445
|
-
* @param[in] program HSAIL program.
|
|
446
|
-
*
|
|
447
|
-
* @param[in] isa Instruction set architecture to finalize for.
|
|
448
|
-
*
|
|
449
|
-
* @param[in] call_convention A call convention used in a finalization. Must
|
|
450
|
-
* have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
|
|
451
|
-
* and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
|
|
452
|
-
* isa (not inclusive).
|
|
453
|
-
*
|
|
454
|
-
* @param[in] control_directives Low-level control directives that influence
|
|
455
|
-
* the finalization process.
|
|
456
|
-
*
|
|
457
|
-
* @param[in] options Vendor-specific options. May be NULL.
|
|
458
|
-
*
|
|
459
|
-
* @param[in] code_object_type Type of code object to produce.
|
|
460
|
-
*
|
|
461
|
-
* @param[out] code_object Code object generated by the Finalizer, which
|
|
462
|
-
* contains the machine code for the kernels and indirect functions in the HSAIL
|
|
463
|
-
* program. The code object is independent of the HSAIL module that was used to
|
|
464
|
-
* generate it.
|
|
465
|
-
*
|
|
466
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
467
|
-
*
|
|
468
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
469
|
-
* initialized.
|
|
470
|
-
*
|
|
471
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
|
|
472
|
-
* resources required for the operation.
|
|
473
|
-
*
|
|
474
|
-
* @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
|
|
475
|
-
* invalid.
|
|
476
|
-
*
|
|
477
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
|
|
478
|
-
*
|
|
479
|
-
* @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
|
|
480
|
-
* the control directive structure and in the HSAIL kernel mismatch, or if the
|
|
481
|
-
* same directive is used with a different value in one of the functions used by
|
|
482
|
-
* this kernel.
|
|
483
|
-
*
|
|
484
|
-
* @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
|
|
485
|
-
* encountered an error while compiling a kernel or an indirect function.
|
|
486
|
-
*/
|
|
487
|
-
hsa_status_t HSA_API hsa_ext_program_finalize(
|
|
488
|
-
hsa_ext_program_t program,
|
|
489
|
-
hsa_isa_t isa,
|
|
490
|
-
int32_t call_convention,
|
|
491
|
-
hsa_ext_control_directives_t control_directives,
|
|
492
|
-
const char *options,
|
|
493
|
-
hsa_code_object_type_t code_object_type,
|
|
494
|
-
hsa_code_object_t *code_object);
|
|
495
|
-
|
|
496
|
-
/** @} */
|
|
497
|
-
|
|
498
|
-
#define hsa_ext_finalizer_1_00
|
|
499
|
-
|
|
500
|
-
typedef struct hsa_ext_finalizer_1_00_pfn_s {
|
|
501
|
-
hsa_status_t (*hsa_ext_program_create)(
|
|
502
|
-
hsa_machine_model_t machine_model, hsa_profile_t profile,
|
|
503
|
-
hsa_default_float_rounding_mode_t default_float_rounding_mode,
|
|
504
|
-
const char *options, hsa_ext_program_t *program);
|
|
505
|
-
|
|
506
|
-
hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
|
|
507
|
-
|
|
508
|
-
hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
|
|
509
|
-
hsa_ext_module_t module);
|
|
510
|
-
|
|
511
|
-
hsa_status_t (*hsa_ext_program_iterate_modules)(
|
|
512
|
-
hsa_ext_program_t program,
|
|
513
|
-
hsa_status_t (*callback)(hsa_ext_program_t program,
|
|
514
|
-
hsa_ext_module_t module, void *data),
|
|
515
|
-
void *data);
|
|
516
|
-
|
|
517
|
-
hsa_status_t (*hsa_ext_program_get_info)(
|
|
518
|
-
hsa_ext_program_t program, hsa_ext_program_info_t attribute,
|
|
519
|
-
void *value);
|
|
520
|
-
|
|
521
|
-
hsa_status_t (*hsa_ext_program_finalize)(
|
|
522
|
-
hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
|
|
523
|
-
hsa_ext_control_directives_t control_directives, const char *options,
|
|
524
|
-
hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
|
|
525
|
-
} hsa_ext_finalizer_1_00_pfn_t;
|
|
526
|
-
|
|
527
|
-
#ifdef __cplusplus
|
|
528
|
-
} // extern "C" block
|
|
529
|
-
#endif // __cplusplus
|
|
530
|
-
|
|
531
|
-
#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
|