triton-windows 3.3.1.post19__cp313-cp313-win_amd64.whl → 3.4.0.post20__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +4 -1
- triton/_filecheck.py +87 -0
- triton/_internal_testing.py +26 -15
- triton/_utils.py +110 -21
- triton/backends/__init__.py +20 -23
- triton/backends/amd/__init__.py +0 -0
- triton/backends/amd/compiler.py +112 -78
- triton/backends/amd/driver.c +5 -2
- triton/backends/amd/driver.py +149 -47
- triton/backends/compiler.py +7 -21
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +92 -93
- triton/backends/nvidia/driver.c +90 -98
- triton/backends/nvidia/driver.py +303 -128
- triton/compiler/code_generator.py +212 -111
- triton/compiler/compiler.py +110 -25
- triton/experimental/__init__.py +0 -0
- triton/experimental/gluon/__init__.py +4 -0
- triton/experimental/gluon/_compiler.py +0 -0
- triton/experimental/gluon/_runtime.py +99 -0
- triton/experimental/gluon/language/__init__.py +18 -0
- triton/experimental/gluon/language/_core.py +312 -0
- triton/experimental/gluon/language/_layouts.py +230 -0
- triton/experimental/gluon/language/_math.py +12 -0
- triton/experimental/gluon/language/_semantic.py +287 -0
- triton/experimental/gluon/language/_standard.py +47 -0
- triton/experimental/gluon/language/nvidia/__init__.py +4 -0
- triton/experimental/gluon/language/nvidia/blackwell/__init__.py +202 -0
- triton/experimental/gluon/language/nvidia/blackwell/tma.py +32 -0
- triton/experimental/gluon/language/nvidia/hopper/__init__.py +11 -0
- triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +51 -0
- triton/experimental/gluon/language/nvidia/hopper/tma.py +96 -0
- triton/experimental/gluon/nvidia/__init__.py +4 -0
- triton/experimental/gluon/nvidia/blackwell.py +3 -0
- triton/experimental/gluon/nvidia/hopper.py +40 -0
- triton/knobs.py +481 -0
- triton/language/__init__.py +39 -14
- triton/language/core.py +794 -537
- triton/language/extra/cuda/__init__.py +10 -7
- triton/language/extra/cuda/gdc.py +42 -0
- triton/language/extra/cuda/libdevice.py +394 -394
- triton/language/extra/cuda/utils.py +21 -21
- triton/language/extra/hip/libdevice.py +113 -104
- triton/language/math.py +65 -66
- triton/language/random.py +12 -2
- triton/language/semantic.py +1706 -1770
- triton/language/standard.py +116 -51
- triton/runtime/autotuner.py +117 -59
- triton/runtime/build.py +76 -12
- triton/runtime/cache.py +18 -47
- triton/runtime/driver.py +32 -29
- triton/runtime/interpreter.py +72 -35
- triton/runtime/jit.py +146 -110
- triton/testing.py +16 -12
- triton/tools/disasm.py +3 -4
- triton/tools/tensor_descriptor.py +36 -0
- triton/windows_utils.py +14 -6
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/METADATA +7 -2
- triton_windows-3.4.0.post20.dist-info/RECORD +186 -0
- triton_windows-3.4.0.post20.dist-info/entry_points.txt +3 -0
- triton_windows-3.4.0.post20.dist-info/licenses/LICENSE +23 -0
- triton_windows-3.4.0.post20.dist-info/top_level.txt +1 -0
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- triton/language/_utils.py +0 -21
- triton/language/extra/cuda/_experimental_tma.py +0 -106
- triton/tools/experimental_descriptor.py +0 -32
- triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
- triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.4.0.post20.dist-info}/WHEEL +0 -0
|
@@ -1,488 +0,0 @@
|
|
|
1
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
2
|
-
//
|
|
3
|
-
// The University of Illinois/NCSA
|
|
4
|
-
// Open Source License (NCSA)
|
|
5
|
-
//
|
|
6
|
-
// Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
7
|
-
//
|
|
8
|
-
// Developed by:
|
|
9
|
-
//
|
|
10
|
-
// AMD Research and AMD HSA Software Development
|
|
11
|
-
//
|
|
12
|
-
// Advanced Micro Devices, Inc.
|
|
13
|
-
//
|
|
14
|
-
// www.amd.com
|
|
15
|
-
//
|
|
16
|
-
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
17
|
-
// of this software and associated documentation files (the "Software"), to
|
|
18
|
-
// deal with the Software without restriction, including without limitation
|
|
19
|
-
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
20
|
-
// and/or sell copies of the Software, and to permit persons to whom the
|
|
21
|
-
// Software is furnished to do so, subject to the following conditions:
|
|
22
|
-
//
|
|
23
|
-
// - Redistributions of source code must retain the above copyright notice,
|
|
24
|
-
// this list of conditions and the following disclaimers.
|
|
25
|
-
// - Redistributions in binary form must reproduce the above copyright
|
|
26
|
-
// notice, this list of conditions and the following disclaimers in
|
|
27
|
-
// the documentation and/or other materials provided with the distribution.
|
|
28
|
-
// - Neither the names of Advanced Micro Devices, Inc,
|
|
29
|
-
// nor the names of its contributors may be used to endorse or promote
|
|
30
|
-
// products derived from this Software without specific prior written
|
|
31
|
-
// permission.
|
|
32
|
-
//
|
|
33
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34
|
-
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35
|
-
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
36
|
-
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
37
|
-
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
38
|
-
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
39
|
-
// DEALINGS WITH THE SOFTWARE.
|
|
40
|
-
//
|
|
41
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
42
|
-
|
|
43
|
-
#ifndef OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
|
44
|
-
#define OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|
|
45
|
-
|
|
46
|
-
#include <stdint.h>
|
|
47
|
-
#include "hsa.h"
|
|
48
|
-
|
|
49
|
-
#define HSA_AQLPROFILE_VERSION_MAJOR 2
|
|
50
|
-
#define HSA_AQLPROFILE_VERSION_MINOR 0
|
|
51
|
-
|
|
52
|
-
#ifdef __cplusplus
|
|
53
|
-
extern "C" {
|
|
54
|
-
#endif // __cplusplus
|
|
55
|
-
|
|
56
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
57
|
-
// Library version
|
|
58
|
-
uint32_t hsa_ven_amd_aqlprofile_version_major();
|
|
59
|
-
uint32_t hsa_ven_amd_aqlprofile_version_minor();
|
|
60
|
-
|
|
61
|
-
///////////////////////////////////////////////////////////////////////
|
|
62
|
-
// Library API:
|
|
63
|
-
// The library provides helper methods for instantiation of
|
|
64
|
-
// the profile context object and for populating of the start
|
|
65
|
-
// and stop AQL packets. The profile object contains a profiling
|
|
66
|
-
// events list and needed for profiling buffers descriptors,
|
|
67
|
-
// a command buffer and an output data buffer. To check if there
|
|
68
|
-
// was an error the library methods return a status code. Also
|
|
69
|
-
// the library provides methods for querying required buffers
|
|
70
|
-
// attributes, to validate the event attributes and to get profiling
|
|
71
|
-
// output data.
|
|
72
|
-
//
|
|
73
|
-
// Returned status:
|
|
74
|
-
// hsa_status_t – HSA status codes are used from hsa.h header
|
|
75
|
-
//
|
|
76
|
-
// Supported profiling features:
|
|
77
|
-
//
|
|
78
|
-
// Supported profiling events
|
|
79
|
-
typedef enum {
|
|
80
|
-
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC = 0,
|
|
81
|
-
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_TRACE = 1,
|
|
82
|
-
} hsa_ven_amd_aqlprofile_event_type_t;
|
|
83
|
-
|
|
84
|
-
// Supported performance counters (PMC) blocks
|
|
85
|
-
// The block ID is the same for a block instances set, for example
|
|
86
|
-
// each block instance from the TCC block set, TCC0, TCC1, …, TCCN
|
|
87
|
-
// will have the same block ID HSA_VEN_AMD_AQLPROFILE_BLOCKS_TCC.
|
|
88
|
-
typedef enum {
|
|
89
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC = 0,
|
|
90
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPF = 1,
|
|
91
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GDS = 2,
|
|
92
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM = 3,
|
|
93
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBMSE = 4,
|
|
94
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SPI = 5,
|
|
95
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ = 6,
|
|
96
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQCS = 7,
|
|
97
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SRBM = 8,
|
|
98
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SX = 9,
|
|
99
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TA = 10,
|
|
100
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCA = 11,
|
|
101
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC = 12,
|
|
102
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCP = 13,
|
|
103
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TD = 14,
|
|
104
|
-
// Memory related blocks
|
|
105
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCARB = 15,
|
|
106
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCHUB = 16,
|
|
107
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCMCBVM = 17,
|
|
108
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCSEQ = 18,
|
|
109
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCVML2 = 19,
|
|
110
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MCXBAR = 20,
|
|
111
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATC = 21,
|
|
112
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_ATCL2 = 22,
|
|
113
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCEA = 23,
|
|
114
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_RPB = 24,
|
|
115
|
-
// System blocks
|
|
116
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SDMA = 25,
|
|
117
|
-
// GFX10 added blocks
|
|
118
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1A = 26,
|
|
119
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL1C = 27,
|
|
120
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2A = 28,
|
|
121
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GL2C = 29,
|
|
122
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GCR = 30,
|
|
123
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GUS = 31,
|
|
124
|
-
|
|
125
|
-
// UMC & MMEA System Blocks
|
|
126
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_UMC = 32,
|
|
127
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_MMEA = 33,
|
|
128
|
-
|
|
129
|
-
HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER
|
|
130
|
-
} hsa_ven_amd_aqlprofile_block_name_t;
|
|
131
|
-
|
|
132
|
-
// PMC event object structure
|
|
133
|
-
// ‘counter_id’ value is specified in GFXIPs perfcounter user guides
|
|
134
|
-
// which is the counters select value, “Performance Counters Selection”
|
|
135
|
-
// chapter.
|
|
136
|
-
typedef struct {
|
|
137
|
-
hsa_ven_amd_aqlprofile_block_name_t block_name;
|
|
138
|
-
uint32_t block_index;
|
|
139
|
-
uint32_t counter_id;
|
|
140
|
-
} hsa_ven_amd_aqlprofile_event_t;
|
|
141
|
-
|
|
142
|
-
// Check if event is valid for the specific GPU
|
|
143
|
-
hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
|
144
|
-
hsa_agent_t agent, // HSA handle for the profiling GPU
|
|
145
|
-
const hsa_ven_amd_aqlprofile_event_t* event, // [in] Pointer on validated event
|
|
146
|
-
bool* result); // [out] True if the event valid, False otherwise
|
|
147
|
-
|
|
148
|
-
// Profiling parameters
|
|
149
|
-
// All parameters are generic and if not applicable for a specific
|
|
150
|
-
// profile configuration then error status will be returned.
|
|
151
|
-
typedef enum {
|
|
152
|
-
/**
|
|
153
|
-
* Select the target compute unit (wgp) for profiling.
|
|
154
|
-
*/
|
|
155
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET = 0,
|
|
156
|
-
/**
|
|
157
|
-
* VMID Mask
|
|
158
|
-
*/
|
|
159
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK = 1,
|
|
160
|
-
/**
|
|
161
|
-
* Legacy. Deprecated.
|
|
162
|
-
*/
|
|
163
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK = 2,
|
|
164
|
-
/**
|
|
165
|
-
* Legacy. Deprecated.
|
|
166
|
-
*/
|
|
167
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK = 3,
|
|
168
|
-
/**
|
|
169
|
-
* Legacy. Deprecated.
|
|
170
|
-
*/
|
|
171
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2 = 4,
|
|
172
|
-
/**
|
|
173
|
-
* Shader engine mask for selection.
|
|
174
|
-
*/
|
|
175
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK = 5,
|
|
176
|
-
/**
|
|
177
|
-
* Legacy. Deprecated.
|
|
178
|
-
*/
|
|
179
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE = 6,
|
|
180
|
-
/**
|
|
181
|
-
* Legacy. Deprecated.
|
|
182
|
-
*/
|
|
183
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT = 7,
|
|
184
|
-
/**
|
|
185
|
-
* Set SIMD Mask (GFX9) or SIMD ID for collection (Navi)
|
|
186
|
-
*/
|
|
187
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION = 8,
|
|
188
|
-
/**
|
|
189
|
-
* Set true for occupancy collection only.
|
|
190
|
-
*/
|
|
191
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE = 9,
|
|
192
|
-
/**
|
|
193
|
-
* ATT collection max data size, in MB. Shared among shader engines.
|
|
194
|
-
*/
|
|
195
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE = 10,
|
|
196
|
-
/**
|
|
197
|
-
* Mask of which compute units to generate perfcounters. GFX9 only.
|
|
198
|
-
*/
|
|
199
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK = 240,
|
|
200
|
-
/**
|
|
201
|
-
* Select collection period for perfcounters. GFX9 only.
|
|
202
|
-
*/
|
|
203
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL = 241,
|
|
204
|
-
/**
|
|
205
|
-
* Select perfcounter ID (SQ block) for collection. GFX9 only.
|
|
206
|
-
*/
|
|
207
|
-
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME = 242,
|
|
208
|
-
} hsa_ven_amd_aqlprofile_parameter_name_t;
|
|
209
|
-
|
|
210
|
-
// Profile parameter object
|
|
211
|
-
typedef struct {
|
|
212
|
-
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
|
|
213
|
-
uint32_t value;
|
|
214
|
-
} hsa_ven_amd_aqlprofile_parameter_t;
|
|
215
|
-
|
|
216
|
-
typedef enum {
|
|
217
|
-
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0 = 0,
|
|
218
|
-
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1,
|
|
219
|
-
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2,
|
|
220
|
-
HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
|
|
221
|
-
} hsa_ven_amd_aqlprofile_att_marker_channel_t;
|
|
222
|
-
|
|
223
|
-
//
|
|
224
|
-
// Profile context object:
|
|
225
|
-
// The library provides a profile object structure which contains
|
|
226
|
-
// the events array, a buffer for the profiling start/stop commands
|
|
227
|
-
// and a buffer for the output data.
|
|
228
|
-
// The buffers are specified by the buffer descriptors and allocated
|
|
229
|
-
// by the application. The buffers allocation attributes, the command
|
|
230
|
-
// buffer size, the PMC output buffer size as well as profiling output
|
|
231
|
-
// data can be get using the generic get profile info helper _get_info.
|
|
232
|
-
//
|
|
233
|
-
// Buffer descriptor
|
|
234
|
-
typedef struct {
|
|
235
|
-
void* ptr;
|
|
236
|
-
uint32_t size;
|
|
237
|
-
} hsa_ven_amd_aqlprofile_descriptor_t;
|
|
238
|
-
|
|
239
|
-
// Profile context object structure, contains profiling events list and
|
|
240
|
-
// needed for profiling buffers descriptors, a command buffer and
|
|
241
|
-
// an output data buffer
|
|
242
|
-
typedef struct {
|
|
243
|
-
hsa_agent_t agent; // GFXIP handle
|
|
244
|
-
hsa_ven_amd_aqlprofile_event_type_t type; // Events type
|
|
245
|
-
const hsa_ven_amd_aqlprofile_event_t* events; // Events array
|
|
246
|
-
uint32_t event_count; // Events count
|
|
247
|
-
const hsa_ven_amd_aqlprofile_parameter_t* parameters; // Parameters array
|
|
248
|
-
uint32_t parameter_count; // Parameters count
|
|
249
|
-
hsa_ven_amd_aqlprofile_descriptor_t output_buffer; // Output buffer
|
|
250
|
-
hsa_ven_amd_aqlprofile_descriptor_t command_buffer; // PM4 commands
|
|
251
|
-
} hsa_ven_amd_aqlprofile_profile_t;
|
|
252
|
-
|
|
253
|
-
//
|
|
254
|
-
// AQL packets populating methods:
|
|
255
|
-
// The helper methods to populate provided by the application START and
|
|
256
|
-
// STOP AQL packets which the application is required to submit before and
|
|
257
|
-
// after profiled GPU task packets respectively.
|
|
258
|
-
//
|
|
259
|
-
// AQL Vendor Specific packet which carries a PM4 command
|
|
260
|
-
typedef struct {
|
|
261
|
-
uint16_t header;
|
|
262
|
-
uint16_t pm4_command[27];
|
|
263
|
-
hsa_signal_t completion_signal;
|
|
264
|
-
} hsa_ext_amd_aql_pm4_packet_t;
|
|
265
|
-
|
|
266
|
-
// Method to populate the provided AQL packet with profiling start commands
|
|
267
|
-
// Only 'pm4_command' fields of the packet are set and the application
|
|
268
|
-
// is responsible to set Vendor Specific header type a completion signal
|
|
269
|
-
hsa_status_t hsa_ven_amd_aqlprofile_start(
|
|
270
|
-
hsa_ven_amd_aqlprofile_profile_t* profile, // [in/out] profile contex object
|
|
271
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet); // [out] profile start AQL packet
|
|
272
|
-
|
|
273
|
-
// Method to populate the provided AQL packet with profiling stop commands
|
|
274
|
-
// Only 'pm4_command' fields of the packet are set and the application
|
|
275
|
-
// is responsible to set Vendor Specific header type and a completion signal
|
|
276
|
-
hsa_status_t hsa_ven_amd_aqlprofile_stop(
|
|
277
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
|
|
278
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet); // [out] profile stop AQL packet
|
|
279
|
-
|
|
280
|
-
// Method to populate the provided AQL packet with profiling read commands
|
|
281
|
-
// Only 'pm4_command' fields of the packet are set and the application
|
|
282
|
-
// is responsible to set Vendor Specific header type and a completion signal
|
|
283
|
-
hsa_status_t hsa_ven_amd_aqlprofile_read(
|
|
284
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile contex object
|
|
285
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet); // [out] profile stop AQL packet
|
|
286
|
-
|
|
287
|
-
// Legacy devices, PM4 profiling packet size
|
|
288
|
-
const unsigned HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE = 192;
|
|
289
|
-
// Legacy devices, converting the profiling AQL packet to PM4 packet blob
|
|
290
|
-
hsa_status_t hsa_ven_amd_aqlprofile_legacy_get_pm4(
|
|
291
|
-
const hsa_ext_amd_aql_pm4_packet_t* aql_packet, // [in] AQL packet
|
|
292
|
-
void* data); // [out] PM4 packet blob
|
|
293
|
-
|
|
294
|
-
// Method to add a marker (correlation ID) into the ATT buffer.
|
|
295
|
-
hsa_status_t hsa_ven_amd_aqlprofile_att_marker(
|
|
296
|
-
hsa_ven_amd_aqlprofile_profile_t* profile, // [in/out] profile contex object
|
|
297
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_marker_packet, // [out] profile marker AQL packet
|
|
298
|
-
uint32_t data, // [in] Data to be inserted
|
|
299
|
-
hsa_ven_amd_aqlprofile_att_marker_channel_t channel); // [in] Comm channel
|
|
300
|
-
|
|
301
|
-
//
|
|
302
|
-
// Get profile info:
|
|
303
|
-
// Generic method for getting various profile info including profile buffers
|
|
304
|
-
// attributes like the command buffer size and the profiling PMC results.
|
|
305
|
-
// It’s implied that all counters are 64bit values.
|
|
306
|
-
//
|
|
307
|
-
// Profile generic output data:
|
|
308
|
-
typedef struct {
|
|
309
|
-
uint32_t sample_id; // PMC sample or trace buffer index
|
|
310
|
-
union {
|
|
311
|
-
struct {
|
|
312
|
-
hsa_ven_amd_aqlprofile_event_t event; // PMC event
|
|
313
|
-
uint64_t result; // PMC result
|
|
314
|
-
} pmc_data;
|
|
315
|
-
hsa_ven_amd_aqlprofile_descriptor_t trace_data; // Trace output data descriptor
|
|
316
|
-
};
|
|
317
|
-
} hsa_ven_amd_aqlprofile_info_data_t;
|
|
318
|
-
|
|
319
|
-
// ID query type
|
|
320
|
-
typedef struct {
|
|
321
|
-
const char* name;
|
|
322
|
-
uint32_t id;
|
|
323
|
-
uint32_t instance_count;
|
|
324
|
-
} hsa_ven_amd_aqlprofile_id_query_t;
|
|
325
|
-
|
|
326
|
-
// Profile attributes
|
|
327
|
-
typedef enum {
|
|
328
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE = 0, // get_info returns uint32_t value
|
|
329
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE = 1, // get_info returns uint32_t value
|
|
330
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA = 2, // get_info returns PMC uint64_t value
|
|
331
|
-
// in info_data object
|
|
332
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_TRACE_DATA = 3, // get_info returns trace buffer ptr/size
|
|
333
|
-
// in info_data object
|
|
334
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS = 4, // get_info returns number of block counter
|
|
335
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID = 5, // get_info returns block id, instances
|
|
336
|
-
// by name string using _id_query_t
|
|
337
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD = 6, // get_info returns size/pointer for
|
|
338
|
-
// counters enable command buffer
|
|
339
|
-
HSA_VEN_AMD_AQLPROFILE_INFO_DISABLE_CMD = 7, // get_info returns size/pointer for
|
|
340
|
-
// counters disable command buffer
|
|
341
|
-
} hsa_ven_amd_aqlprofile_info_type_t;
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
// Definition of output data iterator callback
|
|
345
|
-
typedef hsa_status_t (*hsa_ven_amd_aqlprofile_data_callback_t)(
|
|
346
|
-
hsa_ven_amd_aqlprofile_info_type_t info_type, // [in] data type, PMC or trace data
|
|
347
|
-
hsa_ven_amd_aqlprofile_info_data_t* info_data, // [in] info_data object
|
|
348
|
-
void* callback_data); // [in/out] data passed to the callback
|
|
349
|
-
|
|
350
|
-
// Method for getting the profile info
|
|
351
|
-
hsa_status_t hsa_ven_amd_aqlprofile_get_info(
|
|
352
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
|
353
|
-
hsa_ven_amd_aqlprofile_info_type_t attribute, // [in] requested profile attribute
|
|
354
|
-
void* value); // [in/out] returned value
|
|
355
|
-
|
|
356
|
-
// Method for iterating the events output data
|
|
357
|
-
hsa_status_t hsa_ven_amd_aqlprofile_iterate_data(
|
|
358
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile, // [in] profile context object
|
|
359
|
-
hsa_ven_amd_aqlprofile_data_callback_t callback, // [in] callback to iterate the output data
|
|
360
|
-
void* data); // [in/out] data passed to the callback
|
|
361
|
-
|
|
362
|
-
// Return error string
|
|
363
|
-
hsa_status_t hsa_ven_amd_aqlprofile_error_string(
|
|
364
|
-
const char** str); // [out] pointer on the error string
|
|
365
|
-
|
|
366
|
-
/**
|
|
367
|
-
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
|
368
|
-
*/
|
|
369
|
-
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_eventname_callback_t)(int id, const char* name);
|
|
370
|
-
/**
|
|
371
|
-
* @brief Iterate over all possible event coordinate IDs and their names.
|
|
372
|
-
*/
|
|
373
|
-
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_ids(hsa_ven_amd_aqlprofile_eventname_callback_t);
|
|
374
|
-
|
|
375
|
-
/**
|
|
376
|
-
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
|
377
|
-
* @param position A counting sequence indicating callback number.
|
|
378
|
-
* @param id Coordinate ID as in _iterate_event_ids.
|
|
379
|
-
* @param extent Coordinate extent indicating maximum allowed instances.
|
|
380
|
-
* @param coordinate The coordinate, in the range [0,extent-1].
|
|
381
|
-
* @param name Coordinate name as in _iterate_event_ids.
|
|
382
|
-
* @param userdata Userdata returned from _iterate_event_coord function.
|
|
383
|
-
*/
|
|
384
|
-
typedef hsa_status_t(*hsa_ven_amd_aqlprofile_coordinate_callback_t)(
|
|
385
|
-
int position,
|
|
386
|
-
int id,
|
|
387
|
-
int extent,
|
|
388
|
-
int coordinate,
|
|
389
|
-
const char* name,
|
|
390
|
-
void* userdata
|
|
391
|
-
);
|
|
392
|
-
|
|
393
|
-
/**
|
|
394
|
-
* @brief Iterate over all event coordinates for a given agent_t and event_t.
|
|
395
|
-
* @param[in] agent HSA agent.
|
|
396
|
-
* @param[in] event The event ID and block ID to iterate for.
|
|
397
|
-
* @param[in] sample_id aqlprofile_info_data_t.sample_id returned from _aqlprofile_iterate_data.
|
|
398
|
-
* @param[in] callback Callback function to return the coordinates.
|
|
399
|
-
* @param[in] userdata Arbitrary data pointer to be sent back to the user via callback.
|
|
400
|
-
*/
|
|
401
|
-
hsa_status_t hsa_ven_amd_aqlprofile_iterate_event_coord(
|
|
402
|
-
hsa_agent_t agent,
|
|
403
|
-
hsa_ven_amd_aqlprofile_event_t event,
|
|
404
|
-
uint32_t sample_id,
|
|
405
|
-
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
|
406
|
-
void* userdata
|
|
407
|
-
);
|
|
408
|
-
|
|
409
|
-
/**
|
|
410
|
-
* @brief Extension version.
|
|
411
|
-
*/
|
|
412
|
-
#define hsa_ven_amd_aqlprofile_VERSION_MAJOR 1
|
|
413
|
-
#define hsa_ven_amd_aqlprofile_LIB(suff) "libhsa-amd-aqlprofile" suff ".so"
|
|
414
|
-
|
|
415
|
-
#ifdef HSA_LARGE_MODEL
|
|
416
|
-
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("64");
|
|
417
|
-
#else
|
|
418
|
-
static const char kAqlProfileLib[] = hsa_ven_amd_aqlprofile_LIB("");
|
|
419
|
-
#endif
|
|
420
|
-
|
|
421
|
-
/**
|
|
422
|
-
* @brief Extension function table.
|
|
423
|
-
*/
|
|
424
|
-
typedef struct hsa_ven_amd_aqlprofile_1_00_pfn_s {
|
|
425
|
-
uint32_t (*hsa_ven_amd_aqlprofile_version_major)();
|
|
426
|
-
uint32_t (*hsa_ven_amd_aqlprofile_version_minor)();
|
|
427
|
-
|
|
428
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_error_string)(
|
|
429
|
-
const char** str);
|
|
430
|
-
|
|
431
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_validate_event)(
|
|
432
|
-
hsa_agent_t agent,
|
|
433
|
-
const hsa_ven_amd_aqlprofile_event_t* event,
|
|
434
|
-
bool* result);
|
|
435
|
-
|
|
436
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_start)(
|
|
437
|
-
hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
438
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_start_packet);
|
|
439
|
-
|
|
440
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_stop)(
|
|
441
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
442
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_stop_packet);
|
|
443
|
-
|
|
444
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_read)(
|
|
445
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
446
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_read_packet);
|
|
447
|
-
|
|
448
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_legacy_get_pm4)(
|
|
449
|
-
const hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
|
450
|
-
void* data);
|
|
451
|
-
|
|
452
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_get_info)(
|
|
453
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
454
|
-
hsa_ven_amd_aqlprofile_info_type_t attribute,
|
|
455
|
-
void* value);
|
|
456
|
-
|
|
457
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_data)(
|
|
458
|
-
const hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
459
|
-
hsa_ven_amd_aqlprofile_data_callback_t callback,
|
|
460
|
-
void* data);
|
|
461
|
-
|
|
462
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_ids)(
|
|
463
|
-
hsa_ven_amd_aqlprofile_eventname_callback_t
|
|
464
|
-
);
|
|
465
|
-
|
|
466
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_iterate_event_coord)(
|
|
467
|
-
hsa_agent_t agent,
|
|
468
|
-
hsa_ven_amd_aqlprofile_event_t event,
|
|
469
|
-
uint32_t sample_id,
|
|
470
|
-
hsa_ven_amd_aqlprofile_coordinate_callback_t callback,
|
|
471
|
-
void* userdata
|
|
472
|
-
);
|
|
473
|
-
|
|
474
|
-
hsa_status_t (*hsa_ven_amd_aqlprofile_att_marker)(
|
|
475
|
-
hsa_ven_amd_aqlprofile_profile_t* profile,
|
|
476
|
-
hsa_ext_amd_aql_pm4_packet_t* aql_packet,
|
|
477
|
-
uint32_t data,
|
|
478
|
-
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
|
479
|
-
);
|
|
480
|
-
} hsa_ven_amd_aqlprofile_1_00_pfn_t;
|
|
481
|
-
|
|
482
|
-
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t hsa_ven_amd_aqlprofile_pfn_t;
|
|
483
|
-
|
|
484
|
-
#ifdef __cplusplus
|
|
485
|
-
}
|
|
486
|
-
#endif // __cplusplus
|
|
487
|
-
|
|
488
|
-
#endif // OPENSRC_HSA_RUNTIME_INC_HSA_VEN_AMD_AQLPROFILE_H_
|