triton-windows 3.3.0.post19__cp39-cp39-win_amd64.whl → 3.3.1.post21__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (116) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +1 -1
  3. triton/backends/amd/driver.py +6 -1
  4. triton/backends/nvidia/compiler.py +1 -3
  5. triton/backends/nvidia/driver.py +7 -3
  6. triton/runtime/autotuner.py +2 -2
  7. triton/runtime/build.py +5 -5
  8. triton/runtime/tcc/lib/python310.def +1610 -0
  9. triton/runtime/tcc/lib/python311.def +1633 -0
  10. triton/runtime/tcc/lib/python312.def +1703 -0
  11. triton/runtime/tcc/lib/python313.def +1651 -0
  12. triton/runtime/tcc/lib/python313t.def +1656 -0
  13. triton/runtime/tcc/lib/python39.def +1644 -0
  14. triton/runtime/tcc/lib/python3t.def +905 -0
  15. triton/windows_utils.py +11 -4
  16. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/METADATA +1 -1
  17. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/RECORD +19 -109
  18. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/WHEEL +1 -1
  19. triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
  20. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
  21. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
  22. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
  23. triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
  24. triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
  25. triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
  26. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
  27. triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
  28. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
  29. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
  30. triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
  31. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
  32. triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
  33. triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
  34. triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
  35. triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
  36. triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
  37. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
  38. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
  39. triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
  40. triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
  41. triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
  42. triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
  43. triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
  44. triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
  45. triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
  46. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
  47. triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
  48. triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
  49. triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
  50. triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
  51. triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
  52. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
  53. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
  54. triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
  55. triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
  56. triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
  57. triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
  58. triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
  59. triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
  60. triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
  61. triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
  62. triton/backends/amd/include/hip/channel_descriptor.h +0 -39
  63. triton/backends/amd/include/hip/device_functions.h +0 -38
  64. triton/backends/amd/include/hip/driver_types.h +0 -468
  65. triton/backends/amd/include/hip/hip_bf16.h +0 -36
  66. triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
  67. triton/backends/amd/include/hip/hip_common.h +0 -100
  68. triton/backends/amd/include/hip/hip_complex.h +0 -38
  69. triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
  70. triton/backends/amd/include/hip/hip_deprecated.h +0 -95
  71. triton/backends/amd/include/hip/hip_ext.h +0 -161
  72. triton/backends/amd/include/hip/hip_fp16.h +0 -36
  73. triton/backends/amd/include/hip/hip_fp8.h +0 -33
  74. triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
  75. triton/backends/amd/include/hip/hip_hcc.h +0 -24
  76. triton/backends/amd/include/hip/hip_math_constants.h +0 -36
  77. triton/backends/amd/include/hip/hip_profile.h +0 -27
  78. triton/backends/amd/include/hip/hip_runtime.h +0 -75
  79. triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
  80. triton/backends/amd/include/hip/hip_texture_types.h +0 -29
  81. triton/backends/amd/include/hip/hip_vector_types.h +0 -41
  82. triton/backends/amd/include/hip/hip_version.h +0 -17
  83. triton/backends/amd/include/hip/hiprtc.h +0 -421
  84. triton/backends/amd/include/hip/library_types.h +0 -78
  85. triton/backends/amd/include/hip/math_functions.h +0 -42
  86. triton/backends/amd/include/hip/surface_types.h +0 -63
  87. triton/backends/amd/include/hip/texture_types.h +0 -194
  88. triton/backends/amd/include/hsa/Brig.h +0 -1131
  89. triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
  90. triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
  91. triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
  92. triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
  93. triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
  94. triton/backends/amd/include/hsa/hsa.h +0 -5738
  95. triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
  96. triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
  97. triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
  98. triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
  99. triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
  100. triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
  101. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
  102. triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
  103. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
  104. triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
  105. triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
  106. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
  107. triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
  108. triton/backends/amd/include/roctracer/roctracer.h +0 -779
  109. triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
  110. triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
  111. triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
  112. triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
  113. triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
  114. triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
  115. triton/backends/amd/include/roctracer/roctx.h +0 -229
  116. {triton_windows-3.3.0.post19.dist-info → triton_windows-3.3.1.post21.dist-info}/top_level.txt +0 -0
@@ -1,416 +0,0 @@
1
- ////////////////////////////////////////////////////////////////////////////////
2
- //
3
- // The University of Illinois/NCSA
4
- // Open Source License (NCSA)
5
- //
6
- // Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
7
- //
8
- // Developed by:
9
- //
10
- // AMD Research and AMD HSA Software Development
11
- //
12
- // Advanced Micro Devices, Inc.
13
- //
14
- // www.amd.com
15
- //
16
- // Permission is hereby granted, free of charge, to any person obtaining a copy
17
- // of this software and associated documentation files (the "Software"), to
18
- // deal with the Software without restriction, including without limitation
19
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20
- // and/or sell copies of the Software, and to permit persons to whom the
21
- // Software is furnished to do so, subject to the following conditions:
22
- //
23
- // - Redistributions of source code must retain the above copyright notice,
24
- // this list of conditions and the following disclaimers.
25
- // - Redistributions in binary form must reproduce the above copyright
26
- // notice, this list of conditions and the following disclaimers in
27
- // the documentation and/or other materials provided with the distribution.
28
- // - Neither the names of Advanced Micro Devices, Inc,
29
- // nor the names of its contributors may be used to endorse or promote
30
- // products derived from this Software without specific prior written
31
- // permission.
32
- //
33
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36
- // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37
- // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38
- // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39
- // DEALINGS WITH THE SOFTWARE.
40
- //
41
- ////////////////////////////////////////////////////////////////////////////////
42
-
43
- #ifndef HSA_VEN_AMD_PC_SAMPLING_H
44
- #define HSA_VEN_AMD_PC_SAMPLING_H
45
-
46
- #include "hsa.h"
47
-
48
- #ifdef __cplusplus
49
- extern "C" {
50
- #endif /*__cplusplus*/
51
-
52
-
53
- /**
54
- * @brief HSA AMD Vendor PC Sampling APIs
55
- * EXPERIMENTAL: All PC Sampling APIs are currently in an experimental phase and the APIs may be
56
- * modified extensively in the future
57
- */
58
-
59
- /**
60
- * @brief PC Sampling sample data for hosttrap sampling method
61
- */
62
- typedef struct {
63
- uint64_t pc;
64
- uint64_t exec_mask;
65
- uint32_t workgroup_id_x;
66
- uint32_t workgroup_id_y;
67
- uint32_t workgroup_id_z;
68
- uint32_t wave_in_wg : 6;
69
- uint32_t chiplet : 3; // Currently not used
70
- uint32_t reserved : 23;
71
- uint32_t hw_id;
72
- uint32_t reserved0;
73
- uint64_t reserved1;
74
- uint64_t timestamp;
75
- uint64_t correlation_id;
76
- } perf_sample_hosttrap_v1_t;
77
-
78
- /**
79
- * @brief PC Sampling sample data for stochastic sampling method
80
- */
81
- typedef struct {
82
- uint64_t pc;
83
- uint64_t exec_mask;
84
- uint32_t workgroup_id_x;
85
- uint32_t workgroup_id_y;
86
- uint32_t workgroup_id_z;
87
- uint32_t wave_in_wg : 6;
88
- uint32_t chiplet : 3; // Currently not used
89
- uint32_t reserved : 23;
90
- uint32_t hw_id;
91
- uint32_t perf_snapshot_data;
92
- uint32_t perf_snapshot_data1;
93
- uint32_t perf_snapshot_data2;
94
- uint64_t timestamp;
95
- uint64_t correlation_id;
96
- } perf_sample_snapshot_v1_t;
97
-
98
- /**
99
- * @brief PC Sampling method kinds
100
- */
101
- typedef enum {
102
- HSA_VEN_AMD_PCS_METHOD_HOSTTRAP_V1,
103
- HSA_VEN_AMD_PCS_METHOD_STOCHASTIC_V1
104
- } hsa_ven_amd_pcs_method_kind_t;
105
-
106
- /**
107
- * @brief PC Sampling interval unit type
108
- */
109
- typedef enum {
110
- HSA_VEN_AMD_PCS_INTERVAL_UNITS_MICRO_SECONDS,
111
- HSA_VEN_AMD_PCS_INTERVAL_UNITS_CLOCK_CYCLES,
112
- HSA_VEN_AMD_PCS_INTERVAL_UNITS_INSTRUCTIONS
113
- } hsa_ven_amd_pcs_units_t;
114
-
115
- /**
116
- * @brief HSA callback function to perform the copy onto a destination buffer
117
- *
118
- * If data_size is 0, HSA will stop current copy operation and keep remaining data in internal
119
- * buffers. Remaining contents of HSA internal buffers will be included in next
120
- * hsa_ven_amd_pcs_data_ready_callback_t. HSA internal buffers can also be drained by calling
121
- * hsa_ven_amd_pcs_flush.
122
- *
123
- * @param[in] hsa_callback_data private data to pass back to HSA. Provided in
124
- * hsa_ven_amd_pcs_data_ready_callback_t
125
- *
126
- * @param[in] data_size size of destination buffer in bytes.
127
- * @param[in] destination destination buffer
128
- * @retval TBD: but could be used to indicate that there is no more data to be read.
129
- * Or indicate an error and abort of current copy operations
130
- */
131
- typedef hsa_status_t (*hsa_ven_amd_pcs_data_copy_callback_t)(void* hsa_callback_data,
132
- size_t data_size, void* destination);
133
-
134
- /**
135
- * @brief HSA callback function to to indicate that there is data ready to be copied
136
- *
137
- * When the client receives this callback, the client should call back @p data_copy_callback for HSA
138
- * to perform the copy operation into an available buffer. @p data_copy_callback can be called back
139
- * multiple times with smaller @p data_size to split the copy operation.
140
- *
141
- * This callback must not call ::hsa_ven_amd_pcs_flush.
142
- *
143
- * @param[in] client_callback_data client private data passed in via
144
- * hsa_ven_amd_pcs_create/hsa_ven_amd_pcs_create_from_id
145
- * @param[in] data_size size of data available to be copied
146
- * @param[in] lost_sample_count number of lost samples since last call to
147
- * hsa_ven_amd_pcs_data_ready_callback_t.
148
- * @param[in] data_copy_callback callback function for HSA to perform the actual copy
149
- * @param[in] hsa_callback_data private data to pass back to HSA
150
- */
151
- typedef void (*hsa_ven_amd_pcs_data_ready_callback_t)(
152
- void* client_callback_data, size_t data_size, size_t lost_sample_count,
153
- hsa_ven_amd_pcs_data_copy_callback_t data_copy_callback, void* hsa_callback_data);
154
-
155
- /**
156
- * @brief Opaque handle representing a sampling session.
157
- * Two sessions having same handle value represent the same session
158
- */
159
- typedef struct {
160
- uint64_t handle;
161
- } hsa_ven_amd_pcs_t;
162
-
163
- /**
164
- * @brief PC Sampling configuration flag options
165
- */
166
- typedef enum {
167
- /* The interval for this sampling method have to be a power of 2 */
168
- HSA_VEN_AMD_PCS_CONFIGURATION_FLAGS_INTERVAL_POWER_OF_2 = (1 << 0)
169
- } hsa_ven_amd_pcs_configuration_flags_t;
170
-
171
- /**
172
- * @brief PC Sampling method information
173
- * Used to provide client with list of supported PC Sampling methods
174
- */
175
- typedef struct {
176
- hsa_ven_amd_pcs_method_kind_t method;
177
- hsa_ven_amd_pcs_units_t units;
178
- size_t min_interval;
179
- size_t max_interval;
180
- uint64_t flags;
181
- } hsa_ven_amd_pcs_configuration_t;
182
-
183
- /**
184
- * @brief Callback function to iterate through list of supported PC Sampling configurations
185
- *
186
- * @param[in] configuration one entry for supported PC Sampling method and configuration options
187
- * @param[in] callback_data client private callback data that was passed in when calling
188
- * hsa_ven_amd_pcs_iterate_configuration
189
- */
190
- typedef hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration_callback_t)(
191
- const hsa_ven_amd_pcs_configuration_t* configuration, void* callback_data);
192
-
193
- /**
194
- * @brief Iterate through list of current supported PC Sampling configurations for this @p agent
195
- *
196
- * HSA will callback @p configuration_callback for each currently available PC Sampling
197
- * configuration. The list of currently available configurations may not be the complete list of
198
- * configurations supported on the @p agent. The list of currently available configurations may be
199
- * reduced if the @p agent is currently handling other PC sampling sessions.
200
- *
201
- * @param[in] agent target agent
202
- * @param[in] configuration_callback callback function to iterate through list of configurations
203
- * @param[in] callback_data client private callback data
204
- **/
205
- hsa_status_t hsa_ven_amd_pcs_iterate_configuration(
206
- hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
207
- void* callback_data);
208
-
209
- /**
210
- * @brief Create a PC Sampling session on @p agent
211
- *
212
- * Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
213
- * parameters must be a legal configuration value, as described by the
214
- * hsa_ven_amd_pcs_configuration_t configurations passed to the callbacks of
215
- * hsa_ven_amd_pcs_iterate_configuration for this @p agent.
216
- * A successfull call may restrict the list of possible PC sampling methods available to subsequent
217
- * calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
218
- * on what types of PC sampling they can perform concurrently.
219
- * For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
220
- * The session will be in a stopped/inactive state after this call
221
- *
222
- * @param[in] agent target agent
223
- * @param[in] method method to use
224
- * @param[in] units sampling units
225
- * @param[in] interval sampling interval in @p units
226
- * @param[in] latency expected latency in microseconds for client to provide a buffer for the data
227
- * copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
228
- * buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
229
- * how many samples are received within @p latency and call @p data_ready_callback ahead of time so
230
- * that the client has @p latency time to allocate the buffer before the HSA-runtime internal
231
- * buffers are full. The value of latency can be 0.
232
- * @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
233
- * HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
234
- * perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
235
- * @param[in] data_ready_callback client callback function that will be called when:
236
- * 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
237
- * within @p latency period.
238
- * OR
239
- * 2. When hsa_ven_amd_pcs_flush is called.
240
- * @param[in] client_callback_data client private data to be provided back when data_ready_callback
241
- * is called.
242
- * @param[out] pc_sampling PC sampling session handle used to reference this session when calling
243
- * hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
244
- *
245
- * @retval ::HSA_STATUS_SUCCESS session created successfully
246
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
247
- * @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
248
- * cannot handle the type requested.
249
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
250
- * @retval ::HSA_STATUS_ERROR Unexpected error
251
- **/
252
- hsa_status_t hsa_ven_amd_pcs_create(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
253
- hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency,
254
- size_t buffer_size,
255
- hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
256
- void* client_callback_data, hsa_ven_amd_pcs_t* pc_sampling);
257
-
258
-
259
- /**
260
- * @brief Creates a PC Sampling session on @p agent. Assumes that the caller provides the
261
- * @p pcs_id generated by the previous call to the underlying driver that reserved PC sampling
262
- * on the @p agent.
263
- *
264
- * Similar to the @ref hsa_ven_amd_pcs_create with the difference that it inherits an existing
265
- * PC sampling session that was previously created in the underlying driver.
266
- *
267
- * Allocate the resources required for a PC Sampling session. The @p method, @p units, @p interval
268
- * parameters must be a legal configuration value, and match the parameters that we used to create
269
- * the underlying PC Sampling session in the underlying driver.
270
- * A successfull call may restrict the list of possible PC sampling methods available to subsequent
271
- * calls to hsa_ven_amd_pcs_iterate_configuration on the same agent as agents have limitations
272
- * on what types of PC sampling they can perform concurrently.
273
- * For all successful calls, hsa_ven_amd_pcs_destroy should be called to free this session.
274
- * The session will be in a stopped/inactive state after this call
275
- *
276
- * @param[in] pcs_id ID that uniquely identifies the PC sampling session within underlying driver
277
- * @param[in] agent target agent
278
- * @param[in] method method to use
279
- * @param[in] units sampling units
280
- * @param[in] interval sampling interval in @p units
281
- * @param[in] latency expected latency in microseconds for client to provide a buffer for the data
282
- * copy callback once HSA calls @p data_ready_callback. This is a performance hint to avoid the
283
- * buffer filling up before the client is notified that data is ready. HSA-runtime will estimate
284
- * how many samples are received within @p latency and call @p data_ready_callback ahead of time so
285
- * that the client has @p latency time to allocate the buffer before the HSA-runtime internal
286
- * buffers are full. The value of latency can be 0.
287
- * @param[in] buffer_size size of client buffer in bytes. @p data_ready_callback will be called once
288
- * HSA-runtime has enough samples to fill @p buffer_size. This needs to be a multiple of size of
289
- * perf_sample_hosttrap_v1_t or size of perf_sample_snapshot_v1_t.
290
- * @param[in] data_ready_callback client callback function that will be called when:
291
- * 1. There is enough samples fill a buffer with @p buffer_size - estimated samples received
292
- * within @p latency period.
293
- * OR
294
- * 2. When hsa_ven_amd_pcs_flush is called.
295
- * @param[in] client_callback_data client private data to be provided back when data_ready_callback
296
- * is called.
297
- * @param[out] pc_sampling PC sampling session handle used to reference this session when calling
298
- * hsa_ven_amd_pcs_start, hsa_ven_amd_pcs_stop, hsa_ven_amd_pcs_destroy
299
- *
300
- * @retval ::HSA_STATUS_SUCCESS session created successfully
301
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT invalid parameters
302
- * @retval ::HSA_STATUS_ERROR_RESOURCE_BUSY agent currently handling another PC Sampling session and
303
- * cannot handle the type requested.
304
- * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Failed to allocate resources
305
- * @retval ::HSA_STATUS_ERROR Unexpected error
306
- **/
307
- hsa_status_t hsa_ven_amd_pcs_create_from_id(
308
- uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
309
- hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
310
- hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
311
- hsa_ven_amd_pcs_t* pc_sampling);
312
-
313
- /**
314
- * @brief Free a PC Sampling session on @p agent
315
- *
316
- * Free all the resources allocated for a PC Sampling session on @p agent
317
- * Internal buffers for this session will be lost.
318
- * If the session was active, the session will be stopped before it is destroyed.
319
- *
320
- * @param[in] pc_sampling PC sampling session handle
321
- *
322
- * @retval ::HSA_STATUS_SUCCESS Session destroyed successfully
323
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
324
- * @retval ::HSA_STATUS_ERROR unexpected error
325
- */
326
- hsa_status_t hsa_ven_amd_pcs_destroy(hsa_ven_amd_pcs_t pc_sampling);
327
-
328
- /**
329
- * @brief Start a PC Sampling session
330
- *
331
- * Activate a PC Sampling session that was previous created.
332
- * The session with be in a active state after this call
333
- * If the session was already active, this will result in a no-op and will return HSA_STATUS_SUCCESS
334
- *
335
- * @param[in] pc_sampling PC sampling session handle
336
- *
337
- * @retval ::HSA_STATUS_SUCCESS Session started successfully
338
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
339
- * @retval ::HSA_STATUS_ERROR unexpected error
340
- */
341
- hsa_status_t hsa_ven_amd_pcs_start(hsa_ven_amd_pcs_t pc_sampling);
342
-
343
- /**
344
- * @brief Stop a PC Sampling session
345
- *
346
- * Stop a session that is currently active
347
- * After a session is stopped HSA may still have some PC Sampling data in its internal buffers.
348
- * The internal buffers can be drained using hsa_ven_amd_pcs_flush. If the internal
349
- * buffers are not drained and the session is started again, the internal buffers will be available
350
- * on the next data_ready_callback.
351
- * If the session was already inactive, this will result in a no-op and will return
352
- * HSA_STATUS_SUCCESS
353
- *
354
- * @param[in] pc_sampling PC sampling session handle
355
- *
356
- * @retval ::HSA_STATUS_SUCCESS Session stopped successfully
357
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
358
- */
359
- hsa_status_t hsa_ven_amd_pcs_stop(hsa_ven_amd_pcs_t pc_sampling);
360
-
361
- /**
362
- * @brief Flush internal buffers for a PC Sampling session
363
- *
364
- * Drain internal buffers for a PC Sampling session. If internal buffers have available data,
365
- * this trigger a data_ready_callback.
366
- *
367
- * The function blocks until all PC samples associated with the @p pc_sampling session
368
- * generated prior to the function call have been communicated by invocations of
369
- * @p data_ready_callback having completed execution.
370
- *
371
- * @param[in] pc_sampling PC sampling session handle
372
- *
373
- * @retval ::HSA_STATUS_SUCCESS Session flushed successfully
374
- * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT Invalid PC sampling handle
375
- */
376
- hsa_status_t hsa_ven_amd_pcs_flush(hsa_ven_amd_pcs_t pc_sampling);
377
-
378
- #define hsa_ven_amd_pc_sampling_1_00
379
-
380
- /**
381
- * @brief The function pointer table for the PC Sampling v1.00 extension. Can be returned by
382
- * ::hsa_system_get_extension_table or ::hsa_system_get_major_extension_table.
383
- */
384
- typedef struct hsa_ven_amd_pc_sampling_1_00_pfn_t {
385
- hsa_status_t (*hsa_ven_amd_pcs_iterate_configuration)(
386
- hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
387
- void* callback_data);
388
-
389
- hsa_status_t (*hsa_ven_amd_pcs_create)(hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
390
- hsa_ven_amd_pcs_units_t units, size_t interval,
391
- size_t latency, size_t buffer_size,
392
- hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback,
393
- void* client_callback_data,
394
- hsa_ven_amd_pcs_t* pc_sampling);
395
-
396
- hsa_status_t (*hsa_ven_amd_pcs_create_from_id)(
397
- uint32_t pcs_id, hsa_agent_t agent, hsa_ven_amd_pcs_method_kind_t method,
398
- hsa_ven_amd_pcs_units_t units, size_t interval, size_t latency, size_t buffer_size,
399
- hsa_ven_amd_pcs_data_ready_callback_t data_ready_callback, void* client_callback_data,
400
- hsa_ven_amd_pcs_t* pc_sampling);
401
-
402
- hsa_status_t (*hsa_ven_amd_pcs_destroy)(hsa_ven_amd_pcs_t pc_sampling);
403
-
404
- hsa_status_t (*hsa_ven_amd_pcs_start)(hsa_ven_amd_pcs_t pc_sampling);
405
-
406
- hsa_status_t (*hsa_ven_amd_pcs_stop)(hsa_ven_amd_pcs_t pc_sampling);
407
-
408
- hsa_status_t (*hsa_ven_amd_pcs_flush)(hsa_ven_amd_pcs_t pc_sampling);
409
-
410
- } hsa_ven_amd_pc_sampling_1_00_pfn_t;
411
-
412
- #ifdef __cplusplus
413
- } // end extern "C" block
414
- #endif /*__cplusplus*/
415
-
416
- #endif /* HSA_VEN_AMD_PC_SAMPLING_H */
@@ -1,107 +0,0 @@
1
- /* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining a copy
4
- of this software and associated documentation files (the "Software"), to deal
5
- in the Software without restriction, including without limitation the rights
6
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- copies of the Software, and to permit persons to whom the Software is
8
- furnished to do so, subject to the following conditions:
9
-
10
- The above copyright notice and this permission notice shall be included in
11
- all copies or substantial portions of the Software.
12
-
13
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- THE SOFTWARE. */
20
-
21
- #ifndef EXT_PROF_PROTOCOL_H_
22
- #define EXT_PROF_PROTOCOL_H_
23
-
24
- #include <stdint.h>
25
- #include <stdlib.h>
26
-
27
- /* Traced API domains */
28
- typedef enum {
29
- ACTIVITY_DOMAIN_HSA_API = 0, /* HSA API domain */
30
- ACTIVITY_DOMAIN_HSA_OPS = 1, /* HSA async activity domain */
31
- ACTIVITY_DOMAIN_HIP_OPS = 2, /* HIP async activity domain */
32
- ACTIVITY_DOMAIN_HCC_OPS =
33
- ACTIVITY_DOMAIN_HIP_OPS, /* HCC async activity domain */
34
- ACTIVITY_DOMAIN_HIP_VDI =
35
- ACTIVITY_DOMAIN_HIP_OPS, /* HIP VDI async activity domain */
36
- ACTIVITY_DOMAIN_HIP_API = 3, /* HIP API domain */
37
- ACTIVITY_DOMAIN_KFD_API = 4, /* KFD API domain */
38
- ACTIVITY_DOMAIN_EXT_API = 5, /* External ID domain */
39
- ACTIVITY_DOMAIN_ROCTX = 6, /* ROCTX domain */
40
- ACTIVITY_DOMAIN_HSA_EVT = 7, /* HSA events */
41
- ACTIVITY_DOMAIN_NUMBER
42
- } activity_domain_t;
43
-
44
- /* API callback type */
45
- typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid,
46
- const void* data, void* arg);
47
- typedef uint32_t activity_kind_t;
48
- typedef uint32_t activity_op_t;
49
-
50
- /* API callback phase */
51
- typedef enum {
52
- ACTIVITY_API_PHASE_ENTER = 0,
53
- ACTIVITY_API_PHASE_EXIT = 1
54
- } activity_api_phase_t;
55
-
56
- /* Trace record types */
57
-
58
- /* Correlation id */
59
- typedef uint64_t activity_correlation_id_t;
60
-
61
- /* Timestamp in nanoseconds */
62
- typedef uint64_t roctracer_timestamp_t;
63
-
64
- /* Activity record type */
65
- typedef struct activity_record_s {
66
- uint32_t domain; /* activity domain id */
67
- activity_kind_t kind; /* activity kind */
68
- activity_op_t op; /* activity op */
69
- union {
70
- struct {
71
- activity_correlation_id_t correlation_id; /* activity ID */
72
- roctracer_timestamp_t begin_ns; /* host begin timestamp */
73
- roctracer_timestamp_t end_ns; /* host end timestamp */
74
- };
75
- struct {
76
- uint32_t se; /* sampled SE */
77
- uint64_t cycle; /* sample cycle */
78
- uint64_t pc; /* sample PC */
79
- } pc_sample;
80
- };
81
- union {
82
- struct {
83
- int device_id; /* device id */
84
- uint64_t queue_id; /* queue id */
85
- };
86
- struct {
87
- uint32_t process_id; /* device id */
88
- uint32_t thread_id; /* thread id */
89
- };
90
- struct {
91
- activity_correlation_id_t external_id; /* external correlation id */
92
- };
93
- };
94
- union {
95
- size_t bytes; /* data size bytes */
96
- const char* kernel_name; /* kernel name */
97
- const char* mark_message;
98
- };
99
- } activity_record_t;
100
-
101
- /* Activity sync callback type */
102
- typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
103
- void* arg);
104
- /* Activity async callback type */
105
- typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
106
-
107
- #endif /* EXT_PROF_PROTOCOL_H_ */