triton-windows 3.3.1.post19__cp310-cp310-win_amd64.whl → 3.5.0.post21__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +11 -2
- triton/_filecheck.py +97 -0
- triton/_internal_testing.py +95 -18
- triton/_utils.py +112 -21
- triton/backends/__init__.py +20 -23
- triton/backends/amd/__init__.py +0 -0
- triton/backends/amd/compiler.py +161 -119
- triton/backends/amd/driver.c +118 -46
- triton/backends/amd/driver.py +274 -96
- triton/backends/compiler.py +7 -21
- triton/backends/driver.py +13 -0
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +163 -106
- triton/backends/nvidia/driver.c +166 -101
- triton/backends/nvidia/driver.py +384 -202
- triton/compiler/__init__.py +5 -2
- triton/compiler/code_generator.py +439 -231
- triton/compiler/compiler.py +152 -84
- triton/experimental/__init__.py +0 -0
- triton/experimental/gluon/__init__.py +5 -0
- triton/experimental/gluon/_compiler.py +0 -0
- triton/experimental/gluon/_runtime.py +102 -0
- triton/experimental/gluon/language/__init__.py +119 -0
- triton/experimental/gluon/language/_core.py +490 -0
- triton/experimental/gluon/language/_layouts.py +583 -0
- triton/experimental/gluon/language/_math.py +20 -0
- triton/experimental/gluon/language/_semantic.py +380 -0
- triton/experimental/gluon/language/_standard.py +80 -0
- triton/experimental/gluon/language/amd/__init__.py +4 -0
- triton/experimental/gluon/language/amd/_layouts.py +96 -0
- triton/experimental/gluon/language/amd/cdna3/__init__.py +100 -0
- triton/experimental/gluon/language/amd/cdna4/__init__.py +48 -0
- triton/experimental/gluon/language/amd/cdna4/async_copy.py +151 -0
- triton/experimental/gluon/language/extra/__init__.py +3 -0
- triton/experimental/gluon/language/nvidia/__init__.py +4 -0
- triton/experimental/gluon/language/nvidia/ampere/__init__.py +3 -0
- triton/experimental/gluon/language/nvidia/ampere/async_copy.py +74 -0
- triton/experimental/gluon/language/nvidia/ampere/mbarrier.py +80 -0
- triton/experimental/gluon/language/nvidia/blackwell/__init__.py +387 -0
- triton/experimental/gluon/language/nvidia/blackwell/tma.py +52 -0
- triton/experimental/gluon/language/nvidia/hopper/__init__.py +132 -0
- triton/experimental/gluon/language/nvidia/hopper/mbarrier.py +34 -0
- triton/experimental/gluon/language/nvidia/hopper/tma.py +97 -0
- triton/experimental/gluon/nvidia/__init__.py +4 -0
- triton/experimental/gluon/nvidia/blackwell.py +3 -0
- triton/experimental/gluon/nvidia/hopper.py +45 -0
- triton/knobs.py +546 -0
- triton/language/__init__.py +50 -19
- triton/language/core.py +909 -572
- triton/language/extra/cuda/__init__.py +10 -7
- triton/language/extra/cuda/gdc.py +42 -0
- triton/language/extra/cuda/libdevice.py +394 -394
- triton/language/extra/cuda/utils.py +21 -21
- triton/language/extra/hip/__init__.py +3 -1
- triton/language/extra/hip/libdevice.py +120 -104
- triton/language/extra/hip/utils.py +35 -0
- triton/language/extra/libdevice.py +4 -0
- triton/language/math.py +65 -66
- triton/language/random.py +12 -2
- triton/language/semantic.py +1757 -1768
- triton/language/standard.py +127 -62
- triton/language/target_info.py +54 -0
- triton/runtime/_allocation.py +15 -3
- triton/runtime/_async_compile.py +55 -0
- triton/runtime/autotuner.py +117 -60
- triton/runtime/build.py +83 -17
- triton/runtime/cache.py +61 -47
- triton/runtime/driver.py +25 -47
- triton/runtime/interpreter.py +95 -50
- triton/runtime/jit.py +445 -248
- triton/runtime/tcc/include/_mingw.h +8 -10
- triton/runtime/tcc/include/assert.h +5 -0
- triton/runtime/tcc/include/errno.h +1 -1
- triton/runtime/tcc/include/float.h +21 -3
- triton/runtime/tcc/include/iso646.h +36 -0
- triton/runtime/tcc/include/limits.h +5 -0
- triton/runtime/tcc/include/malloc.h +2 -2
- triton/runtime/tcc/include/math.h +21 -261
- triton/runtime/tcc/include/stdalign.h +16 -0
- triton/runtime/tcc/include/stdarg.h +5 -70
- triton/runtime/tcc/include/stdatomic.h +171 -0
- triton/runtime/tcc/include/stddef.h +7 -19
- triton/runtime/tcc/include/stdlib.h +15 -4
- triton/runtime/tcc/include/stdnoreturn.h +7 -0
- triton/runtime/tcc/include/sys/stat.h +2 -2
- triton/runtime/tcc/include/sys/types.h +5 -0
- triton/runtime/tcc/include/tcc/tcc_libm.h +444 -27
- triton/runtime/tcc/include/tccdefs.h +342 -0
- triton/runtime/tcc/include/tgmath.h +89 -0
- triton/runtime/tcc/include/uchar.h +33 -0
- triton/runtime/tcc/include/unistd.h +1 -0
- triton/runtime/tcc/include/winapi/qos.h +72 -0
- triton/runtime/tcc/include/winapi/shellapi.h +59 -0
- triton/runtime/tcc/include/winapi/winbase.h +9 -2
- triton/runtime/tcc/include/winapi/wincon.h +8 -0
- triton/runtime/tcc/include/winapi/windows.h +1 -1
- triton/runtime/tcc/include/winapi/winnls.h +778 -0
- triton/runtime/tcc/include/winapi/winnt.h +9 -7
- triton/runtime/tcc/include/winapi/winsock2.h +1474 -0
- triton/runtime/tcc/include/winapi/ws2ipdef.h +21 -0
- triton/runtime/tcc/include/winapi/ws2tcpip.h +391 -0
- triton/runtime/tcc/lib/libtcc1.a +0 -0
- triton/runtime/tcc/lib/python314.def +1800 -0
- triton/runtime/tcc/lib/python314t.def +1809 -0
- triton/runtime/tcc/libtcc.dll +0 -0
- triton/runtime/tcc/tcc.exe +0 -0
- triton/testing.py +16 -12
- triton/tools/compile.py +62 -14
- triton/tools/disasm.py +3 -4
- triton/tools/extra/cuda/compile.c +1 -0
- triton/tools/extra/hip/compile.cpp +66 -0
- triton/tools/extra/hip/compile.h +13 -0
- triton/tools/ragged_tma.py +92 -0
- triton/tools/tensor_descriptor.py +34 -0
- triton/windows_utils.py +52 -81
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/METADATA +8 -4
- triton_windows-3.5.0.post21.dist-info/RECORD +217 -0
- triton_windows-3.5.0.post21.dist-info/entry_points.txt +3 -0
- triton_windows-3.5.0.post21.dist-info/licenses/LICENSE +23 -0
- triton_windows-3.5.0.post21.dist-info/top_level.txt +1 -0
- triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h +0 -358
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +0 -1010
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +0 -1638
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +0 -1814
- triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h +0 -293
- triton/backends/amd/include/hip/amd_detail/amd_hip_common.h +0 -32
- triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h +0 -174
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +0 -835
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h +0 -1809
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +0 -1391
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +0 -108
- triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h +0 -124
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h +0 -405
- triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +0 -196
- triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +0 -565
- triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h +0 -2226
- triton/backends/amd/include/hip/amd_detail/amd_math_functions.h +0 -104
- triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h +0 -244
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +0 -538
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +0 -288
- triton/backends/amd/include/hip/amd_detail/concepts.hpp +0 -30
- triton/backends/amd/include/hip/amd_detail/device_library_decls.h +0 -133
- triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp +0 -218
- triton/backends/amd/include/hip/amd_detail/grid_launch.h +0 -67
- triton/backends/amd/include/hip/amd_detail/grid_launch.hpp +0 -50
- triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp +0 -26
- triton/backends/amd/include/hip/amd_detail/helpers.hpp +0 -137
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +0 -1446
- triton/backends/amd/include/hip/amd_detail/hip_assert.h +0 -101
- triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h +0 -242
- triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h +0 -254
- triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h +0 -96
- triton/backends/amd/include/hip/amd_detail/hip_ldg.h +0 -100
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +0 -10570
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +0 -78
- triton/backends/amd/include/hip/amd_detail/host_defines.h +0 -184
- triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp +0 -102
- triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp +0 -798
- triton/backends/amd/include/hip/amd_detail/math_fwd.h +0 -698
- triton/backends/amd/include/hip/amd_detail/ockl_image.h +0 -177
- triton/backends/amd/include/hip/amd_detail/program_state.hpp +0 -107
- triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h +0 -491
- triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h +0 -478
- triton/backends/amd/include/hip/channel_descriptor.h +0 -39
- triton/backends/amd/include/hip/device_functions.h +0 -38
- triton/backends/amd/include/hip/driver_types.h +0 -468
- triton/backends/amd/include/hip/hip_bf16.h +0 -36
- triton/backends/amd/include/hip/hip_bfloat16.h +0 -44
- triton/backends/amd/include/hip/hip_common.h +0 -100
- triton/backends/amd/include/hip/hip_complex.h +0 -38
- triton/backends/amd/include/hip/hip_cooperative_groups.h +0 -46
- triton/backends/amd/include/hip/hip_deprecated.h +0 -95
- triton/backends/amd/include/hip/hip_ext.h +0 -161
- triton/backends/amd/include/hip/hip_fp16.h +0 -36
- triton/backends/amd/include/hip/hip_fp8.h +0 -33
- triton/backends/amd/include/hip/hip_gl_interop.h +0 -32
- triton/backends/amd/include/hip/hip_hcc.h +0 -24
- triton/backends/amd/include/hip/hip_math_constants.h +0 -36
- triton/backends/amd/include/hip/hip_profile.h +0 -27
- triton/backends/amd/include/hip/hip_runtime.h +0 -75
- triton/backends/amd/include/hip/hip_runtime_api.h +0 -9261
- triton/backends/amd/include/hip/hip_texture_types.h +0 -29
- triton/backends/amd/include/hip/hip_vector_types.h +0 -41
- triton/backends/amd/include/hip/hip_version.h +0 -17
- triton/backends/amd/include/hip/hiprtc.h +0 -421
- triton/backends/amd/include/hip/library_types.h +0 -78
- triton/backends/amd/include/hip/math_functions.h +0 -42
- triton/backends/amd/include/hip/surface_types.h +0 -63
- triton/backends/amd/include/hip/texture_types.h +0 -194
- triton/backends/amd/include/hsa/Brig.h +0 -1131
- triton/backends/amd/include/hsa/amd_hsa_common.h +0 -91
- triton/backends/amd/include/hsa/amd_hsa_elf.h +0 -462
- triton/backends/amd/include/hsa/amd_hsa_kernel_code.h +0 -269
- triton/backends/amd/include/hsa/amd_hsa_queue.h +0 -109
- triton/backends/amd/include/hsa/amd_hsa_signal.h +0 -80
- triton/backends/amd/include/hsa/hsa.h +0 -5738
- triton/backends/amd/include/hsa/hsa_amd_tool.h +0 -91
- triton/backends/amd/include/hsa/hsa_api_trace.h +0 -579
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +0 -68
- triton/backends/amd/include/hsa/hsa_ext_amd.h +0 -3146
- triton/backends/amd/include/hsa/hsa_ext_finalize.h +0 -531
- triton/backends/amd/include/hsa/hsa_ext_image.h +0 -1454
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +0 -488
- triton/backends/amd/include/hsa/hsa_ven_amd_loader.h +0 -667
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +0 -416
- triton/backends/amd/include/roctracer/ext/prof_protocol.h +0 -107
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +0 -4515
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +0 -1727
- triton/backends/amd/include/roctracer/hsa_prof_str.h +0 -3059
- triton/backends/amd/include/roctracer/roctracer.h +0 -779
- triton/backends/amd/include/roctracer/roctracer_ext.h +0 -81
- triton/backends/amd/include/roctracer/roctracer_hcc.h +0 -24
- triton/backends/amd/include/roctracer/roctracer_hip.h +0 -37
- triton/backends/amd/include/roctracer/roctracer_hsa.h +0 -112
- triton/backends/amd/include/roctracer/roctracer_plugin.h +0 -137
- triton/backends/amd/include/roctracer/roctracer_roctx.h +0 -67
- triton/backends/amd/include/roctracer/roctx.h +0 -229
- triton/language/_utils.py +0 -21
- triton/language/extra/cuda/_experimental_tma.py +0 -106
- triton/runtime/tcc/lib/libtcc1-64.a +0 -0
- triton/tools/experimental_descriptor.py +0 -32
- triton_windows-3.3.1.post19.dist-info/RECORD +0 -260
- triton_windows-3.3.1.post19.dist-info/top_level.txt +0 -14
- {triton_windows-3.3.1.post19.dist-info → triton_windows-3.5.0.post21.dist-info}/WHEEL +0 -0
|
@@ -1,667 +0,0 @@
|
|
|
1
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
2
|
-
//
|
|
3
|
-
// The University of Illinois/NCSA
|
|
4
|
-
// Open Source License (NCSA)
|
|
5
|
-
//
|
|
6
|
-
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
7
|
-
//
|
|
8
|
-
// Developed by:
|
|
9
|
-
//
|
|
10
|
-
// AMD Research and AMD HSA Software Development
|
|
11
|
-
//
|
|
12
|
-
// Advanced Micro Devices, Inc.
|
|
13
|
-
//
|
|
14
|
-
// www.amd.com
|
|
15
|
-
//
|
|
16
|
-
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
17
|
-
// of this software and associated documentation files (the "Software"), to
|
|
18
|
-
// deal with the Software without restriction, including without limitation
|
|
19
|
-
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
20
|
-
// and/or sell copies of the Software, and to permit persons to whom the
|
|
21
|
-
// Software is furnished to do so, subject to the following conditions:
|
|
22
|
-
//
|
|
23
|
-
// - Redistributions of source code must retain the above copyright notice,
|
|
24
|
-
// this list of conditions and the following disclaimers.
|
|
25
|
-
// - Redistributions in binary form must reproduce the above copyright
|
|
26
|
-
// notice, this list of conditions and the following disclaimers in
|
|
27
|
-
// the documentation and/or other materials provided with the distribution.
|
|
28
|
-
// - Neither the names of Advanced Micro Devices, Inc,
|
|
29
|
-
// nor the names of its contributors may be used to endorse or promote
|
|
30
|
-
// products derived from this Software without specific prior written
|
|
31
|
-
// permission.
|
|
32
|
-
//
|
|
33
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34
|
-
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35
|
-
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
36
|
-
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
37
|
-
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
38
|
-
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
39
|
-
// DEALINGS WITH THE SOFTWARE.
|
|
40
|
-
//
|
|
41
|
-
////////////////////////////////////////////////////////////////////////////////
|
|
42
|
-
|
|
43
|
-
// HSA AMD extension for additional loader functionality.
|
|
44
|
-
|
|
45
|
-
#ifndef HSA_VEN_AMD_LOADER_H
|
|
46
|
-
#define HSA_VEN_AMD_LOADER_H
|
|
47
|
-
|
|
48
|
-
#include "hsa.h"
|
|
49
|
-
|
|
50
|
-
#ifdef __cplusplus
|
|
51
|
-
extern "C" {
|
|
52
|
-
#endif /* __cplusplus */
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* @brief Queries equivalent host address for given @p device_address, and
|
|
56
|
-
* records it in @p host_address.
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
* @details Contents of memory pointed to by @p host_address would be identical
|
|
60
|
-
* to contents of memory pointed to by @p device_address. Only difference
|
|
61
|
-
* between the two is host accessibility: @p host_address is always accessible
|
|
62
|
-
* from host, @p device_address might not be accessible from host.
|
|
63
|
-
*
|
|
64
|
-
* If @p device_address already points to host accessible memory, then the value
|
|
65
|
-
* of @p device_address is simply copied into @p host_address.
|
|
66
|
-
*
|
|
67
|
-
* The lifetime of @p host_address is the same as the lifetime of @p
|
|
68
|
-
* device_address, and both lifetimes are limited by the lifetime of the
|
|
69
|
-
* executable that is managing these addresses.
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
* @param[in] device_address Device address to query equivalent host address
|
|
73
|
-
* for.
|
|
74
|
-
*
|
|
75
|
-
* @param[out] host_address Pointer to application-allocated buffer to record
|
|
76
|
-
* queried equivalent host address in.
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
|
80
|
-
*
|
|
81
|
-
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
|
82
|
-
*
|
|
83
|
-
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device_address is invalid or
|
|
84
|
-
* null, or @p host_address is null.
|
|
85
|
-
*/
|
|
86
|
-
hsa_status_t hsa_ven_amd_loader_query_host_address(
|
|
87
|
-
const void *device_address,
|
|
88
|
-
const void **host_address);
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* @brief The storage type of the code object that is backing loaded memory
|
|
92
|
-
* segment.
|
|
93
|
-
*/
|
|
94
|
-
typedef enum {
|
|
95
|
-
/**
|
|
96
|
-
* Loaded memory segment is not backed by any code object (anonymous), as the
|
|
97
|
-
* case would be with BSS (uninitialized data).
|
|
98
|
-
*/
|
|
99
|
-
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE = 0,
|
|
100
|
-
/**
|
|
101
|
-
* Loaded memory segment is backed by the code object that is stored in the
|
|
102
|
-
* file.
|
|
103
|
-
*/
|
|
104
|
-
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE = 1,
|
|
105
|
-
/**
|
|
106
|
-
* Loaded memory segment is backed by the code object that is stored in the
|
|
107
|
-
* memory.
|
|
108
|
-
*/
|
|
109
|
-
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY = 2
|
|
110
|
-
} hsa_ven_amd_loader_code_object_storage_type_t;
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* @brief Loaded memory segment descriptor.
|
|
114
|
-
*
|
|
115
|
-
*
|
|
116
|
-
* @details Loaded memory segment descriptor describes underlying loaded memory
|
|
117
|
-
* segment. Loaded memory segment is created/allocated by the executable during
|
|
118
|
-
* the loading of the code object that is backing underlying memory segment.
|
|
119
|
-
*
|
|
120
|
-
* The lifetime of underlying memory segment is limited by the lifetime of the
|
|
121
|
-
* executable that is managing underlying memory segment.
|
|
122
|
-
*/
|
|
123
|
-
typedef struct hsa_ven_amd_loader_segment_descriptor_s {
|
|
124
|
-
/**
|
|
125
|
-
* Agent underlying memory segment is allocated on. If the code object that is
|
|
126
|
-
* backing underlying memory segment is program code object, then 0.
|
|
127
|
-
*/
|
|
128
|
-
hsa_agent_t agent;
|
|
129
|
-
/**
|
|
130
|
-
* Executable that is managing this underlying memory segment.
|
|
131
|
-
*/
|
|
132
|
-
hsa_executable_t executable;
|
|
133
|
-
/**
|
|
134
|
-
* Storage type of the code object that is backing underlying memory segment.
|
|
135
|
-
*/
|
|
136
|
-
hsa_ven_amd_loader_code_object_storage_type_t code_object_storage_type;
|
|
137
|
-
/**
|
|
138
|
-
* If the storage type of the code object that is backing underlying memory
|
|
139
|
-
* segment is:
|
|
140
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then null;
|
|
141
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then null-terminated
|
|
142
|
-
* filepath to the code object;
|
|
143
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then host
|
|
144
|
-
* accessible pointer to the first byte of the code object.
|
|
145
|
-
*/
|
|
146
|
-
const void *code_object_storage_base;
|
|
147
|
-
/**
|
|
148
|
-
* If the storage type of the code object that is backing underlying memory
|
|
149
|
-
* segment is:
|
|
150
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
|
151
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE, then the length of
|
|
152
|
-
* the filepath to the code object (including null-terminating character);
|
|
153
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY, then the size, in
|
|
154
|
-
* bytes, of the memory occupied by the code object.
|
|
155
|
-
*/
|
|
156
|
-
size_t code_object_storage_size;
|
|
157
|
-
/**
|
|
158
|
-
* If the storage type of the code object that is backing underlying memory
|
|
159
|
-
* segment is:
|
|
160
|
-
* - HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE, then 0;
|
|
161
|
-
* - other, then offset, in bytes, from the beginning of the code object to
|
|
162
|
-
* the first byte in the code object data is copied from.
|
|
163
|
-
*/
|
|
164
|
-
size_t code_object_storage_offset;
|
|
165
|
-
/**
|
|
166
|
-
* Starting address of the underlying memory segment.
|
|
167
|
-
*/
|
|
168
|
-
const void *segment_base;
|
|
169
|
-
/**
|
|
170
|
-
* Size, in bytes, of the underlying memory segment.
|
|
171
|
-
*/
|
|
172
|
-
size_t segment_size;
|
|
173
|
-
} hsa_ven_amd_loader_segment_descriptor_t;
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* @brief Either queries loaded memory segment descriptors, or total number of
|
|
177
|
-
* loaded memory segment descriptors.
|
|
178
|
-
*
|
|
179
|
-
*
|
|
180
|
-
* @details If @p segment_descriptors is not null and @p num_segment_descriptors
|
|
181
|
-
* points to number that exactly matches total number of loaded memory segment
|
|
182
|
-
* descriptors, then queries loaded memory segment descriptors, and records them
|
|
183
|
-
* in @p segment_descriptors. If @p segment_descriptors is null and @p
|
|
184
|
-
* num_segment_descriptors points to zero, then queries total number of loaded
|
|
185
|
-
* memory segment descriptors, and records it in @p num_segment_descriptors. In
|
|
186
|
-
* all other cases returns appropriate error code (see below).
|
|
187
|
-
*
|
|
188
|
-
* The caller of this function is responsible for the allocation/deallocation
|
|
189
|
-
* and the lifetime of @p segment_descriptors and @p num_segment_descriptors.
|
|
190
|
-
*
|
|
191
|
-
* The lifetime of loaded memory segments that are described by queried loaded
|
|
192
|
-
* memory segment descriptors is limited by the lifetime of the executable that
|
|
193
|
-
* is managing loaded memory segments.
|
|
194
|
-
*
|
|
195
|
-
* Queried loaded memory segment descriptors are always self-consistent: they
|
|
196
|
-
* describe a complete set of loaded memory segments that are being backed by
|
|
197
|
-
* fully loaded code objects that are present at the time (i.e. this function
|
|
198
|
-
* is blocked until all executable manipulations are fully complete).
|
|
199
|
-
*
|
|
200
|
-
*
|
|
201
|
-
* @param[out] segment_descriptors Pointer to application-allocated buffer to
|
|
202
|
-
* record queried loaded memory segment descriptors in. Can be null if @p
|
|
203
|
-
* num_segment_descriptors points to zero.
|
|
204
|
-
*
|
|
205
|
-
* @param[in,out] num_segment_descriptors Pointer to application-allocated
|
|
206
|
-
* buffer that contains either total number of loaded memory segment descriptors
|
|
207
|
-
* or zero.
|
|
208
|
-
*
|
|
209
|
-
*
|
|
210
|
-
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
|
211
|
-
*
|
|
212
|
-
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
|
213
|
-
*
|
|
214
|
-
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p segment_descriptors is null
|
|
215
|
-
* while @p num_segment_descriptors points to non-zero number, @p
|
|
216
|
-
* segment_descriptors is not null while @p num_segment_descriptors points to
|
|
217
|
-
* zero, or @p num_segment_descriptors is null.
|
|
218
|
-
*
|
|
219
|
-
* @retval HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p num_segment_descriptors
|
|
220
|
-
* does not point to number that exactly matches total number of loaded memory
|
|
221
|
-
* segment descriptors.
|
|
222
|
-
*/
|
|
223
|
-
hsa_status_t hsa_ven_amd_loader_query_segment_descriptors(
|
|
224
|
-
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
|
225
|
-
size_t *num_segment_descriptors);
|
|
226
|
-
|
|
227
|
-
/**
|
|
228
|
-
* @brief Obtains the handle of executable to which the device address belongs.
|
|
229
|
-
*
|
|
230
|
-
* @details This method should not be used to obtain executable handle by using
|
|
231
|
-
* a host address. The executable returned is expected to be alive until its
|
|
232
|
-
* destroyed by the user.
|
|
233
|
-
*
|
|
234
|
-
* @retval HSA_STATUS_SUCCESS Function is executed successfully.
|
|
235
|
-
*
|
|
236
|
-
* @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime is not initialized.
|
|
237
|
-
*
|
|
238
|
-
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT The input is invalid or there
|
|
239
|
-
* is no exectuable found for this kernel code object.
|
|
240
|
-
*/
|
|
241
|
-
hsa_status_t hsa_ven_amd_loader_query_executable(
|
|
242
|
-
const void *device_address,
|
|
243
|
-
hsa_executable_t *executable);
|
|
244
|
-
|
|
245
|
-
//===----------------------------------------------------------------------===//
|
|
246
|
-
|
|
247
|
-
/**
|
|
248
|
-
* @brief Iterate over the loaded code objects in an executable, and invoke
|
|
249
|
-
* an application-defined callback on every iteration.
|
|
250
|
-
*
|
|
251
|
-
* @param[in] executable Executable.
|
|
252
|
-
*
|
|
253
|
-
* @param[in] callback Callback to be invoked once per loaded code object. The
|
|
254
|
-
* HSA runtime passes three arguments to the callback: the executable, a
|
|
255
|
-
* loaded code object, and the application data. If @p callback returns a
|
|
256
|
-
* status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
|
|
257
|
-
* traversal stops and
|
|
258
|
-
* ::hsa_ven_amd_loader_executable_iterate_loaded_code_objects returns that
|
|
259
|
-
* status value.
|
|
260
|
-
*
|
|
261
|
-
* @param[in] data Application data that is passed to @p callback on every
|
|
262
|
-
* iteration. May be NULL.
|
|
263
|
-
*
|
|
264
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
265
|
-
*
|
|
266
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
267
|
-
* initialized.
|
|
268
|
-
*
|
|
269
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
|
|
270
|
-
*
|
|
271
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
|
272
|
-
*/
|
|
273
|
-
hsa_status_t hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
|
274
|
-
hsa_executable_t executable,
|
|
275
|
-
hsa_status_t (*callback)(
|
|
276
|
-
hsa_executable_t executable,
|
|
277
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
278
|
-
void *data),
|
|
279
|
-
void *data);
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* @brief Loaded code object kind.
|
|
283
|
-
*/
|
|
284
|
-
typedef enum {
|
|
285
|
-
/**
|
|
286
|
-
* Program code object.
|
|
287
|
-
*/
|
|
288
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_PROGRAM = 1,
|
|
289
|
-
/**
|
|
290
|
-
* Agent code object.
|
|
291
|
-
*/
|
|
292
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT = 2
|
|
293
|
-
} hsa_ven_amd_loader_loaded_code_object_kind_t;
|
|
294
|
-
|
|
295
|
-
/**
|
|
296
|
-
* @brief Loaded code object attributes.
|
|
297
|
-
*/
|
|
298
|
-
typedef enum hsa_ven_amd_loader_loaded_code_object_info_e {
|
|
299
|
-
/**
|
|
300
|
-
* The executable in which this loaded code object is loaded. The
|
|
301
|
-
* type of this attribute is ::hsa_executable_t.
|
|
302
|
-
*/
|
|
303
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_EXECUTABLE = 1,
|
|
304
|
-
/**
|
|
305
|
-
* The kind of this loaded code object. The type of this attribute is
|
|
306
|
-
* ::uint32_t interpreted as ::hsa_ven_amd_loader_loaded_code_object_kind_t.
|
|
307
|
-
*/
|
|
308
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND = 2,
|
|
309
|
-
/**
|
|
310
|
-
* The agent on which this loaded code object is loaded. The
|
|
311
|
-
* value of this attribute is only defined if
|
|
312
|
-
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_KIND is
|
|
313
|
-
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_KIND_AGENT. The type of this
|
|
314
|
-
* attribute is ::hsa_agent_t.
|
|
315
|
-
*/
|
|
316
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT = 3,
|
|
317
|
-
/**
|
|
318
|
-
* The storage type of the code object reader used to load the loaded code object.
|
|
319
|
-
* The type of this attribute is ::uint32_t interpreted as a
|
|
320
|
-
* ::hsa_ven_amd_loader_code_object_storage_type_t.
|
|
321
|
-
*/
|
|
322
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE = 4,
|
|
323
|
-
/**
|
|
324
|
-
* The memory address of the first byte of the code object that was loaaded.
|
|
325
|
-
* The value of this attribute is only defined if
|
|
326
|
-
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
|
327
|
-
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
|
328
|
-
* attribute is ::uint64_t.
|
|
329
|
-
*/
|
|
330
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE = 5,
|
|
331
|
-
/**
|
|
332
|
-
* The memory size in bytes of the code object that was loaaded.
|
|
333
|
-
* The value of this attribute is only defined if
|
|
334
|
-
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
|
335
|
-
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY. The type of this
|
|
336
|
-
* attribute is ::uint64_t.
|
|
337
|
-
*/
|
|
338
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE = 6,
|
|
339
|
-
/**
|
|
340
|
-
* The file descriptor of the code object that was loaaded.
|
|
341
|
-
* The value of this attribute is only defined if
|
|
342
|
-
* ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE is
|
|
343
|
-
* ::HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE. The type of this
|
|
344
|
-
* attribute is ::int.
|
|
345
|
-
*/
|
|
346
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE = 7,
|
|
347
|
-
/**
|
|
348
|
-
* The signed byte address difference of the memory address at which the code
|
|
349
|
-
* object is loaded minus the virtual address specified in the code object
|
|
350
|
-
* that is loaded. The value of this attribute is only defined if the
|
|
351
|
-
* executable in which the code object is loaded is froozen. The type of this
|
|
352
|
-
* attribute is ::int64_t.
|
|
353
|
-
*/
|
|
354
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA = 8,
|
|
355
|
-
/**
|
|
356
|
-
* The base memory address at which the code object is loaded. This is the
|
|
357
|
-
* base address of the allocation for the lowest addressed segment of the code
|
|
358
|
-
* object that is loaded. Note that any non-loaded segments before the first
|
|
359
|
-
* loaded segment are ignored. The value of this attribute is only defined if
|
|
360
|
-
* the executable in which the code object is loaded is froozen. The type of
|
|
361
|
-
* this attribute is ::uint64_t.
|
|
362
|
-
*/
|
|
363
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE = 9,
|
|
364
|
-
/**
|
|
365
|
-
* The byte size of the loaded code objects contiguous memory allocation. The
|
|
366
|
-
* value of this attribute is only defined if the executable in which the code
|
|
367
|
-
* object is loaded is froozen. The type of this attribute is ::uint64_t.
|
|
368
|
-
*/
|
|
369
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE = 10,
|
|
370
|
-
/**
|
|
371
|
-
* The length of the URI in bytes, not including the NUL terminator. The type
|
|
372
|
-
* of this attribute is uint32_t.
|
|
373
|
-
*/
|
|
374
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH = 11,
|
|
375
|
-
/**
|
|
376
|
-
* The URI name from which the code object was loaded. The type of this
|
|
377
|
-
* attribute is a NUL terminated \p char* with the length equal to the value
|
|
378
|
-
* of ::HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH attribute.
|
|
379
|
-
* The URI name syntax is defined by the following BNF syntax:
|
|
380
|
-
*
|
|
381
|
-
* code_object_uri ::== file_uri | memory_uri
|
|
382
|
-
* file_uri ::== "file://" file_path [ range_specifier ]
|
|
383
|
-
* memory_uri ::== "memory://" process_id range_specifier
|
|
384
|
-
* range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
|
|
385
|
-
* file_path ::== URI_ENCODED_OS_FILE_PATH
|
|
386
|
-
* process_id ::== DECIMAL_NUMBER
|
|
387
|
-
* number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
|
|
388
|
-
*
|
|
389
|
-
* ``number`` is a C integral literal where hexadecimal values are prefixed by
|
|
390
|
-
* "0x" or "0X", and octal values by "0".
|
|
391
|
-
*
|
|
392
|
-
* ``file_path`` is the file's path specified as a URI encoded UTF-8 string.
|
|
393
|
-
* In URI encoding, every character that is not in the regular expression
|
|
394
|
-
* ``[a-zA-Z0-9/_.~-]`` is encoded as two uppercase hexidecimal digits
|
|
395
|
-
* proceeded by "%". Directories in the path are separated by "/".
|
|
396
|
-
*
|
|
397
|
-
* ``offset`` is a 0-based byte offset to the start of the code object. For a
|
|
398
|
-
* file URI, it is from the start of the file specified by the ``file_path``,
|
|
399
|
-
* and if omitted defaults to 0. For a memory URI, it is the memory address
|
|
400
|
-
* and is required.
|
|
401
|
-
*
|
|
402
|
-
* ``size`` is the number of bytes in the code object. For a file URI, if
|
|
403
|
-
* omitted it defaults to the size of the file. It is required for a memory
|
|
404
|
-
* URI.
|
|
405
|
-
*
|
|
406
|
-
* ``process_id`` is the identity of the process owning the memory. For Linux
|
|
407
|
-
* it is the C unsigned integral decimal literal for the process ID (PID).
|
|
408
|
-
*
|
|
409
|
-
* For example:
|
|
410
|
-
*
|
|
411
|
-
* file:///dir1/dir2/file1
|
|
412
|
-
* file:///dir3/dir4/file2#offset=0x2000&size=3000
|
|
413
|
-
* memory://1234#offset=0x20000&size=3000
|
|
414
|
-
*/
|
|
415
|
-
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI = 12,
|
|
416
|
-
} hsa_ven_amd_loader_loaded_code_object_info_t;
|
|
417
|
-
|
|
418
|
-
/**
|
|
419
|
-
* @brief Get the current value of an attribute for a given loaded code
|
|
420
|
-
* object.
|
|
421
|
-
*
|
|
422
|
-
* @param[in] loaded_code_object Loaded code object.
|
|
423
|
-
*
|
|
424
|
-
* @param[in] attribute Attribute to query.
|
|
425
|
-
*
|
|
426
|
-
* @param[out] value Pointer to an application-allocated buffer where to store
|
|
427
|
-
* the value of the attribute. If the buffer passed by the application is not
|
|
428
|
-
* large enough to hold the value of @p attribute, the behavior is undefined.
|
|
429
|
-
*
|
|
430
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
431
|
-
*
|
|
432
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
433
|
-
* initialized.
|
|
434
|
-
*
|
|
435
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The loaded code object is
|
|
436
|
-
* invalid.
|
|
437
|
-
*
|
|
438
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
|
|
439
|
-
* loaded code object attribute, or @p value is NULL.
|
|
440
|
-
*/
|
|
441
|
-
hsa_status_t hsa_ven_amd_loader_loaded_code_object_get_info(
|
|
442
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
443
|
-
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
|
444
|
-
void *value);
|
|
445
|
-
|
|
446
|
-
//===----------------------------------------------------------------------===//
|
|
447
|
-
|
|
448
|
-
/**
|
|
449
|
-
* @brief Create a code object reader to operate on a file with size and offset.
|
|
450
|
-
*
|
|
451
|
-
* @param[in] file File descriptor. The file must have been opened by
|
|
452
|
-
* application with at least read permissions prior calling this function. The
|
|
453
|
-
* file must contain a vendor-specific code object.
|
|
454
|
-
*
|
|
455
|
-
* The file is owned and managed by the application; the lifetime of the file
|
|
456
|
-
* descriptor must exceed that of any associated code object reader.
|
|
457
|
-
*
|
|
458
|
-
* @param[in] size Size of the code object embedded in @p file.
|
|
459
|
-
*
|
|
460
|
-
* @param[in] offset 0-based offset relative to the beginning of the @p file
|
|
461
|
-
* that denotes the beginning of the code object embedded within the @p file.
|
|
462
|
-
*
|
|
463
|
-
* @param[out] code_object_reader Memory location to store the newly created
|
|
464
|
-
* code object reader handle. Must not be NULL.
|
|
465
|
-
*
|
|
466
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
467
|
-
*
|
|
468
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
469
|
-
* initialized.
|
|
470
|
-
*
|
|
471
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_FILE @p file is not opened with at least
|
|
472
|
-
* read permissions. This condition may also be reported as
|
|
473
|
-
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT_READER by the
|
|
474
|
-
* ::hsa_executable_load_agent_code_object function.
|
|
475
|
-
*
|
|
476
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT The bytes starting at offset
|
|
477
|
-
* do not form a valid code object. If file size is 0. Or offset > file size.
|
|
478
|
-
* This condition may also be reported as
|
|
479
|
-
* ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT by the
|
|
480
|
-
* ::hsa_executable_load_agent_code_object function.
|
|
481
|
-
*
|
|
482
|
-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to
|
|
483
|
-
* allocate the required resources.
|
|
484
|
-
*
|
|
485
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p code_object_reader is NULL.
|
|
486
|
-
*/
|
|
487
|
-
hsa_status_t
|
|
488
|
-
hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size(
|
|
489
|
-
hsa_file_t file,
|
|
490
|
-
size_t offset,
|
|
491
|
-
size_t size,
|
|
492
|
-
hsa_code_object_reader_t *code_object_reader);
|
|
493
|
-
|
|
494
|
-
//===----------------------------------------------------------------------===//
|
|
495
|
-
|
|
496
|
-
/**
|
|
497
|
-
* @brief Iterate over the available executables, and invoke an
|
|
498
|
-
* application-defined callback on every iteration. While
|
|
499
|
-
* ::hsa_ven_amd_loader_iterate_executables is executing any calls to
|
|
500
|
-
* ::hsa_executable_create, ::hsa_executable_create_alt, or
|
|
501
|
-
* ::hsa_executable_destroy will be blocked.
|
|
502
|
-
*
|
|
503
|
-
* @param[in] callback Callback to be invoked once per executable. The HSA
|
|
504
|
-
* runtime passes two arguments to the callback: the executable and the
|
|
505
|
-
* application data. If @p callback returns a status other than
|
|
506
|
-
* ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
|
|
507
|
-
* ::hsa_ven_amd_loader_iterate_executables returns that status value. If
|
|
508
|
-
* @p callback invokes ::hsa_executable_create, ::hsa_executable_create_alt, or
|
|
509
|
-
* ::hsa_executable_destroy then the behavior is undefined.
|
|
510
|
-
*
|
|
511
|
-
* @param[in] data Application data that is passed to @p callback on every
|
|
512
|
-
* iteration. May be NULL.
|
|
513
|
-
*
|
|
514
|
-
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
|
515
|
-
*
|
|
516
|
-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
|
517
|
-
* initialized.
|
|
518
|
-
*
|
|
519
|
-
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
|
|
520
|
-
*/
|
|
521
|
-
hsa_status_t
|
|
522
|
-
hsa_ven_amd_loader_iterate_executables(
|
|
523
|
-
hsa_status_t (*callback)(
|
|
524
|
-
hsa_executable_t executable,
|
|
525
|
-
void *data),
|
|
526
|
-
void *data);
|
|
527
|
-
|
|
528
|
-
//===----------------------------------------------------------------------===//
|
|
529
|
-
|
|
530
|
-
/**
|
|
531
|
-
* @brief Extension version.
|
|
532
|
-
*/
|
|
533
|
-
#define hsa_ven_amd_loader 001003
|
|
534
|
-
|
|
535
|
-
/**
|
|
536
|
-
* @brief Extension function table version 1.00.
|
|
537
|
-
*/
|
|
538
|
-
typedef struct hsa_ven_amd_loader_1_00_pfn_s {
|
|
539
|
-
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
|
540
|
-
const void *device_address,
|
|
541
|
-
const void **host_address);
|
|
542
|
-
|
|
543
|
-
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
|
544
|
-
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
|
545
|
-
size_t *num_segment_descriptors);
|
|
546
|
-
|
|
547
|
-
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
|
548
|
-
const void *device_address,
|
|
549
|
-
hsa_executable_t *executable);
|
|
550
|
-
} hsa_ven_amd_loader_1_00_pfn_t;
|
|
551
|
-
|
|
552
|
-
/**
|
|
553
|
-
* @brief Extension function table version 1.01.
|
|
554
|
-
*/
|
|
555
|
-
typedef struct hsa_ven_amd_loader_1_01_pfn_s {
|
|
556
|
-
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
|
557
|
-
const void *device_address,
|
|
558
|
-
const void **host_address);
|
|
559
|
-
|
|
560
|
-
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
|
561
|
-
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
|
562
|
-
size_t *num_segment_descriptors);
|
|
563
|
-
|
|
564
|
-
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
|
565
|
-
const void *device_address,
|
|
566
|
-
hsa_executable_t *executable);
|
|
567
|
-
|
|
568
|
-
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
|
569
|
-
hsa_executable_t executable,
|
|
570
|
-
hsa_status_t (*callback)(
|
|
571
|
-
hsa_executable_t executable,
|
|
572
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
573
|
-
void *data),
|
|
574
|
-
void *data);
|
|
575
|
-
|
|
576
|
-
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
|
577
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
578
|
-
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
|
579
|
-
void *value);
|
|
580
|
-
} hsa_ven_amd_loader_1_01_pfn_t;
|
|
581
|
-
|
|
582
|
-
/**
|
|
583
|
-
* @brief Extension function table version 1.02.
|
|
584
|
-
*/
|
|
585
|
-
typedef struct hsa_ven_amd_loader_1_02_pfn_s {
|
|
586
|
-
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
|
587
|
-
const void *device_address,
|
|
588
|
-
const void **host_address);
|
|
589
|
-
|
|
590
|
-
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
|
591
|
-
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
|
592
|
-
size_t *num_segment_descriptors);
|
|
593
|
-
|
|
594
|
-
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
|
595
|
-
const void *device_address,
|
|
596
|
-
hsa_executable_t *executable);
|
|
597
|
-
|
|
598
|
-
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
|
599
|
-
hsa_executable_t executable,
|
|
600
|
-
hsa_status_t (*callback)(
|
|
601
|
-
hsa_executable_t executable,
|
|
602
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
603
|
-
void *data),
|
|
604
|
-
void *data);
|
|
605
|
-
|
|
606
|
-
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
|
607
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
608
|
-
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
|
609
|
-
void *value);
|
|
610
|
-
|
|
611
|
-
hsa_status_t
|
|
612
|
-
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
|
613
|
-
hsa_file_t file,
|
|
614
|
-
size_t offset,
|
|
615
|
-
size_t size,
|
|
616
|
-
hsa_code_object_reader_t *code_object_reader);
|
|
617
|
-
} hsa_ven_amd_loader_1_02_pfn_t;
|
|
618
|
-
|
|
619
|
-
/**
|
|
620
|
-
* @brief Extension function table version 1.03.
|
|
621
|
-
*/
|
|
622
|
-
typedef struct hsa_ven_amd_loader_1_03_pfn_s {
|
|
623
|
-
hsa_status_t (*hsa_ven_amd_loader_query_host_address)(
|
|
624
|
-
const void *device_address,
|
|
625
|
-
const void **host_address);
|
|
626
|
-
|
|
627
|
-
hsa_status_t (*hsa_ven_amd_loader_query_segment_descriptors)(
|
|
628
|
-
hsa_ven_amd_loader_segment_descriptor_t *segment_descriptors,
|
|
629
|
-
size_t *num_segment_descriptors);
|
|
630
|
-
|
|
631
|
-
hsa_status_t (*hsa_ven_amd_loader_query_executable)(
|
|
632
|
-
const void *device_address,
|
|
633
|
-
hsa_executable_t *executable);
|
|
634
|
-
|
|
635
|
-
hsa_status_t (*hsa_ven_amd_loader_executable_iterate_loaded_code_objects)(
|
|
636
|
-
hsa_executable_t executable,
|
|
637
|
-
hsa_status_t (*callback)(
|
|
638
|
-
hsa_executable_t executable,
|
|
639
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
640
|
-
void *data),
|
|
641
|
-
void *data);
|
|
642
|
-
|
|
643
|
-
hsa_status_t (*hsa_ven_amd_loader_loaded_code_object_get_info)(
|
|
644
|
-
hsa_loaded_code_object_t loaded_code_object,
|
|
645
|
-
hsa_ven_amd_loader_loaded_code_object_info_t attribute,
|
|
646
|
-
void *value);
|
|
647
|
-
|
|
648
|
-
hsa_status_t
|
|
649
|
-
(*hsa_ven_amd_loader_code_object_reader_create_from_file_with_offset_size)(
|
|
650
|
-
hsa_file_t file,
|
|
651
|
-
size_t offset,
|
|
652
|
-
size_t size,
|
|
653
|
-
hsa_code_object_reader_t *code_object_reader);
|
|
654
|
-
|
|
655
|
-
hsa_status_t
|
|
656
|
-
(*hsa_ven_amd_loader_iterate_executables)(
|
|
657
|
-
hsa_status_t (*callback)(
|
|
658
|
-
hsa_executable_t executable,
|
|
659
|
-
void *data),
|
|
660
|
-
void *data);
|
|
661
|
-
} hsa_ven_amd_loader_1_03_pfn_t;
|
|
662
|
-
|
|
663
|
-
#ifdef __cplusplus
|
|
664
|
-
}
|
|
665
|
-
#endif /* __cplusplus */
|
|
666
|
-
|
|
667
|
-
#endif /* HSA_VEN_AMD_LOADER_H */
|