triton-windows 3.2.0.post11__cp39-cp39-win_amd64.whl → 3.3.0a0.post11__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +3 -3
- triton/_internal_testing.py +59 -4
- triton/_utils.py +35 -0
- triton/backends/amd/compiler.py +121 -74
- triton/backends/amd/driver.py +77 -43
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +28 -49
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +35 -9
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +761 -284
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +9 -3
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +1391 -0
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +3 -3
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +44 -0
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +288 -0
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +110 -14
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +504 -103
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +2 -1
- triton/backends/amd/include/hip/amd_detail/host_defines.h +4 -0
- triton/backends/amd/include/hip/hip_ext.h +4 -2
- triton/backends/amd/include/hip/hip_fp8.h +33 -0
- triton/backends/amd/include/hip/hip_runtime_api.h +375 -33
- triton/backends/amd/include/hip/hip_version.h +3 -3
- triton/backends/amd/include/hip/hiprtc.h +25 -25
- triton/backends/amd/include/hsa/amd_hsa_elf.h +40 -14
- triton/backends/amd/include/hsa/hsa.h +11 -2
- triton/backends/amd/include/hsa/hsa_api_trace.h +30 -17
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +68 -0
- triton/backends/amd/include/hsa/hsa_ext_amd.h +83 -27
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +46 -46
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +416 -0
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +84 -4
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +260 -0
- triton/backends/amd/include/roctracer/hsa_prof_str.h +51 -19
- triton/backends/amd/lib/asanrtl.bc +0 -0
- triton/backends/compiler.py +25 -225
- triton/backends/driver.py +7 -2
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +135 -90
- triton/backends/nvidia/driver.c +0 -1
- triton/backends/nvidia/driver.py +135 -49
- triton/backends/nvidia/include/cuda.h +2162 -241
- triton/backends/nvidia/lib/x64/cuda.lib +0 -0
- triton/compiler/__init__.py +2 -2
- triton/compiler/code_generator.py +334 -231
- triton/compiler/compiler.py +77 -66
- triton/language/__init__.py +22 -5
- triton/language/core.py +448 -74
- triton/language/extra/cuda/_experimental_tma.py +3 -5
- triton/language/math.py +1 -1
- triton/language/random.py +2 -1
- triton/language/semantic.py +206 -52
- triton/language/standard.py +35 -18
- triton/runtime/_allocation.py +32 -0
- triton/runtime/autotuner.py +27 -32
- triton/runtime/build.py +1 -48
- triton/runtime/cache.py +6 -6
- triton/runtime/errors.py +10 -0
- triton/runtime/interpreter.py +179 -45
- triton/runtime/jit.py +149 -190
- triton/testing.py +39 -11
- triton/tools/compile.py +27 -20
- triton/tools/{compile.c → extra/cuda/compile.c} +1 -0
- triton/tools/mxfp.py +301 -0
- {triton_windows-3.2.0.post11.dist-info → triton_windows-3.3.0a0.post11.dist-info}/METADATA +5 -2
- {triton_windows-3.2.0.post11.dist-info → triton_windows-3.3.0a0.post11.dist-info}/RECORD +68 -59
- {triton_windows-3.2.0.post11.dist-info → triton_windows-3.3.0a0.post11.dist-info}/top_level.txt +2 -0
- /triton/tools/{compile.h → extra/cuda/compile.h} +0 -0
- {triton_windows-3.2.0.post11.dist-info → triton_windows-3.3.0a0.post11.dist-info}/WHEEL +0 -0
|
@@ -102,7 +102,7 @@ typedef struct hipDeviceProp_t {
|
|
|
102
102
|
char luid[8]; ///< 8-byte unique identifier. Only valid on windows
|
|
103
103
|
unsigned int luidDeviceNodeMask; ///< LUID node mask
|
|
104
104
|
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
|
105
|
-
size_t sharedMemPerBlock; ///< Size of shared memory
|
|
105
|
+
size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes).
|
|
106
106
|
int regsPerBlock; ///< Registers per block.
|
|
107
107
|
int warpSize; ///< Warp size.
|
|
108
108
|
size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies
|
|
@@ -111,7 +111,8 @@ typedef struct hipDeviceProp_t {
|
|
|
111
111
|
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
|
112
112
|
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
|
113
113
|
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
|
114
|
-
size_t totalConstMem; ///< Size of shared memory region
|
|
114
|
+
size_t totalConstMem; ///< Size of shared constant memory region on the device
|
|
115
|
+
///< (in bytes).
|
|
115
116
|
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
|
116
117
|
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
|
117
118
|
///< feature caps.
|
|
@@ -538,6 +539,12 @@ typedef enum hipDeviceAttribute_t {
|
|
|
538
539
|
// Extended attributes for vendors
|
|
539
540
|
} hipDeviceAttribute_t;
|
|
540
541
|
|
|
542
|
+
typedef enum hipDriverProcAddressQueryResult {
|
|
543
|
+
HIP_GET_PROC_ADDRESS_SUCCESS = 0,
|
|
544
|
+
HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1,
|
|
545
|
+
HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2
|
|
546
|
+
} hipDriverProcAddressQueryResult;
|
|
547
|
+
|
|
541
548
|
enum hipComputeMode {
|
|
542
549
|
hipComputeModeDefault = 0,
|
|
543
550
|
hipComputeModeExclusive = 1,
|
|
@@ -740,6 +747,9 @@ enum hipLimit_t {
|
|
|
740
747
|
/** Memory allocated will be uncached. */
|
|
741
748
|
#define hipDeviceMallocUncached 0x3
|
|
742
749
|
|
|
750
|
+
/** Memory allocated will be contiguous. */
|
|
751
|
+
#define hipDeviceMallocContiguous 0x4
|
|
752
|
+
|
|
743
753
|
//Flags that can be used with hipHostRegister.
|
|
744
754
|
/** Memory is Mapped and Portable.*/
|
|
745
755
|
#define hipHostRegisterDefault 0x0
|
|
@@ -798,6 +808,8 @@ enum hipLimit_t {
|
|
|
798
808
|
/** Implicit stream per application thread.*/
|
|
799
809
|
#define hipStreamPerThread ((hipStream_t)2)
|
|
800
810
|
|
|
811
|
+
#define hipStreamLegacy ((hipStream_t)1)
|
|
812
|
+
|
|
801
813
|
// Indicates that the external memory object is a dedicated resource
|
|
802
814
|
#define hipExternalMemoryDedicated 0x1
|
|
803
815
|
/**
|
|
@@ -973,7 +985,8 @@ typedef struct hipMemPoolProps {
|
|
|
973
985
|
* Windows-specific LPSECURITYATTRIBUTES required when @p hipMemHandleTypeWin32 is specified
|
|
974
986
|
*/
|
|
975
987
|
void* win32SecurityAttributes;
|
|
976
|
-
|
|
988
|
+
size_t maxSize; ///< Maximum pool size. When set to 0, defaults to a system dependent value
|
|
989
|
+
unsigned char reserved[56]; ///< Reserved for future use, must be 0
|
|
977
990
|
} hipMemPoolProps;
|
|
978
991
|
/**
|
|
979
992
|
* Opaque data structure for exporting a pool allocation
|
|
@@ -1269,13 +1282,7 @@ typedef struct hipMemAllocNodeParams {
|
|
|
1269
1282
|
void* dptr; ///< Returned device address of the allocation
|
|
1270
1283
|
} hipMemAllocNodeParams;
|
|
1271
1284
|
|
|
1272
|
-
|
|
1273
|
-
* Kernel node attributeID
|
|
1274
|
-
*/
|
|
1275
|
-
typedef enum hipKernelNodeAttrID {
|
|
1276
|
-
hipKernelNodeAttributeAccessPolicyWindow = 1,
|
|
1277
|
-
hipKernelNodeAttributeCooperative = 2,
|
|
1278
|
-
} hipKernelNodeAttrID;
|
|
1285
|
+
|
|
1279
1286
|
typedef enum hipAccessProperty {
|
|
1280
1287
|
hipAccessPropertyNormal = 0,
|
|
1281
1288
|
hipAccessPropertyStreaming = 1,
|
|
@@ -1288,10 +1295,39 @@ typedef struct hipAccessPolicyWindow {
|
|
|
1288
1295
|
hipAccessProperty missProp;
|
|
1289
1296
|
size_t num_bytes;
|
|
1290
1297
|
} hipAccessPolicyWindow;
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* Launch Attribute ID
|
|
1301
|
+
*/
|
|
1302
|
+
typedef enum hipLaunchAttributeID {
|
|
1303
|
+
hipLaunchAttributeAccessPolicyWindow = 1, /**< Valid for Streams, graph nodes, launches*/
|
|
1304
|
+
hipLaunchAttributeCooperative = 2, /**< Valid for graph nodes, launches */
|
|
1305
|
+
hipLaunchAttributePriority = 8, /**< Valid for graph node, streams, launches */
|
|
1306
|
+
} hipLaunchAttributeID;
|
|
1307
|
+
|
|
1308
|
+
/**
|
|
1309
|
+
* Launch Attribute Value
|
|
1310
|
+
*/
|
|
1311
|
+
typedef union hipLaunchAttributeValue {
|
|
1312
|
+
hipAccessPolicyWindow accessPolicyWindow; /**< Value of launch attribute::
|
|
1313
|
+
hipLaunchAttributePolicyWindow. */
|
|
1314
|
+
int cooperative; /**< Value of launch attribute ::hipLaunchAttributeCooperative */
|
|
1315
|
+
int priority; /**< Value of launch attribute :: hipLaunchAttributePriority. Execution
|
|
1316
|
+
priority of kernel. */
|
|
1317
|
+
} hipLaunchAttributeValue;
|
|
1318
|
+
|
|
1319
|
+
/**
|
|
1320
|
+
* Kernel node attributeID
|
|
1321
|
+
*/
|
|
1322
|
+
#define hipKernelNodeAttrID hipLaunchAttributeID
|
|
1323
|
+
#define hipKernelNodeAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow
|
|
1324
|
+
#define hipKernelNodeAttributeCooperative hipLaunchAttributeCooperative
|
|
1325
|
+
#define hipKernelNodeAttributePriority hipLaunchAttributePriority
|
|
1326
|
+
|
|
1327
|
+
/**
|
|
1328
|
+
* Kernel node attribute value
|
|
1329
|
+
*/
|
|
1330
|
+
#define hipKernelNodeAttrValue hipLaunchAttributeValue
|
|
1295
1331
|
|
|
1296
1332
|
/**
|
|
1297
1333
|
* Memset node params
|
|
@@ -1383,6 +1419,34 @@ enum hipGraphDebugDotFlags {
|
|
|
1383
1419
|
hipGraphDebugDotFlagsHandles = 1
|
|
1384
1420
|
<< 10 /**< Adds node handles and every kernel function handle to output */
|
|
1385
1421
|
};
|
|
1422
|
+
|
|
1423
|
+
/**
|
|
1424
|
+
* hipGraphInstantiateWithParams results
|
|
1425
|
+
*/
|
|
1426
|
+
typedef enum hipGraphInstantiateResult {
|
|
1427
|
+
hipGraphInstantiateSuccess = 0, /**< Instantiation Success */
|
|
1428
|
+
hipGraphInstantiateError = 1, /**< Instantiation failed for an
|
|
1429
|
+
unexpected reason which is described in the return value of the function */
|
|
1430
|
+
hipGraphInstantiateInvalidStructure = 2, /**< Instantiation failed due
|
|
1431
|
+
to invalid structure, such as cycles */
|
|
1432
|
+
hipGraphInstantiateNodeOperationNotSupported = 3, /**< Instantiation for device launch failed
|
|
1433
|
+
because the graph contained an unsupported operation */
|
|
1434
|
+
hipGraphInstantiateMultipleDevicesNotSupported = 4, /**< Instantiation for device launch failed
|
|
1435
|
+
due to the nodes belonging to different contexts */
|
|
1436
|
+
}hipGraphInstantiateResult;
|
|
1437
|
+
|
|
1438
|
+
/**
|
|
1439
|
+
* Graph Instantiation parameters
|
|
1440
|
+
*/
|
|
1441
|
+
typedef struct hipGraphInstantiateParams {
|
|
1442
|
+
hipGraphNode_t errNode_out; /**< The node which caused instantiation to fail, if any*/
|
|
1443
|
+
unsigned long long flags; /**< Instantiation flags */
|
|
1444
|
+
hipGraphInstantiateResult result_out; /**< Whether instantiation was successful.
|
|
1445
|
+
If it failed, the reason why */
|
|
1446
|
+
hipStream_t uploadStream; /**< Upload stream */
|
|
1447
|
+
} hipGraphInstantiateParams;
|
|
1448
|
+
|
|
1449
|
+
|
|
1386
1450
|
/**
|
|
1387
1451
|
* Memory allocation properties
|
|
1388
1452
|
*/
|
|
@@ -1557,6 +1621,44 @@ typedef struct hipGraphNodeParams {
|
|
|
1557
1621
|
|
|
1558
1622
|
long long reserved2;
|
|
1559
1623
|
} hipGraphNodeParams;
|
|
1624
|
+
|
|
1625
|
+
/**
|
|
1626
|
+
* This port activates when the kernel has finished executing.
|
|
1627
|
+
*/
|
|
1628
|
+
#define hipGraphKernelNodePortDefault 0
|
|
1629
|
+
|
|
1630
|
+
/**
|
|
1631
|
+
* This port activates when all blocks of the kernel have begun execution.
|
|
1632
|
+
*/
|
|
1633
|
+
#define hipGraphKernelNodePortLaunchCompletion 2
|
|
1634
|
+
|
|
1635
|
+
/**
|
|
1636
|
+
* This port activates when all blocks of the kernel have performed
|
|
1637
|
+
* hipTriggerProgrammaticLaunchCompletion() or have terminated.
|
|
1638
|
+
* It must be used with edge type hipGraphDependencyTypeProgrammatic.
|
|
1639
|
+
*/
|
|
1640
|
+
#define hipGraphKernelNodePortProgrammatic 1
|
|
1641
|
+
|
|
1642
|
+
typedef enum hipGraphDependencyType {
|
|
1643
|
+
hipGraphDependencyTypeDefault = 0,
|
|
1644
|
+
hipGraphDependencyTypeProgrammatic = 1
|
|
1645
|
+
}hipGraphDependencyType;
|
|
1646
|
+
|
|
1647
|
+
typedef struct hipGraphEdgeData {
|
|
1648
|
+
unsigned char
|
|
1649
|
+
from_port; ///< This indicates when the dependency is triggered from the upstream node on the
|
|
1650
|
+
///< edge. The meaning is specfic to the node type. A value of 0 in all cases
|
|
1651
|
+
///< means full completion of the upstream node, with memory visibility to the
|
|
1652
|
+
///< downstream node or portion thereof (indicated by to_port). Only kernel nodes
|
|
1653
|
+
///< define non-zero ports. A kernel node can use the following output port types:
|
|
1654
|
+
///< hipGraphKernelNodePortDefault, hipGraphKernelNodePortProgrammatic, or
|
|
1655
|
+
///< hipGraphKernelNodePortLaunchCompletion.
|
|
1656
|
+
unsigned char reserved[5]; ///< These bytes are unused and must be zeroed
|
|
1657
|
+
unsigned char
|
|
1658
|
+
to_port; ///< Currently no node types define non-zero ports. This field must be set to zero.
|
|
1659
|
+
unsigned char type; ///< This should be populated with a value from hipGraphDependencyType
|
|
1660
|
+
} hipGraphEdgeData;
|
|
1661
|
+
|
|
1560
1662
|
// Doxygen end group GlobalDefs
|
|
1561
1663
|
/**
|
|
1562
1664
|
* @}
|
|
@@ -1585,6 +1687,7 @@ typedef struct hipGraphNodeParams {
|
|
|
1585
1687
|
*/
|
|
1586
1688
|
// TODO-ctx - more description on error codes.
|
|
1587
1689
|
hipError_t hipInit(unsigned int flags);
|
|
1690
|
+
|
|
1588
1691
|
/**
|
|
1589
1692
|
* @brief Returns the approximate HIP driver version.
|
|
1590
1693
|
*
|
|
@@ -1755,6 +1858,18 @@ hipError_t hipDeviceReset(void);
|
|
|
1755
1858
|
* @see #hipGetDevice, #hipGetDeviceCount
|
|
1756
1859
|
*/
|
|
1757
1860
|
hipError_t hipSetDevice(int deviceId);
|
|
1861
|
+
/**
|
|
1862
|
+
* @brief Set a list of devices that can be used.
|
|
1863
|
+
*
|
|
1864
|
+
* @param[in] device_arr List of devices to try
|
|
1865
|
+
* @param[in] len Number of devices in specified list
|
|
1866
|
+
*
|
|
1867
|
+
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
|
|
1868
|
+
*
|
|
1869
|
+
* @see #hipGetDevice, #hipGetDeviceCount. #hipSetDevice. #hipGetDeviceProperties. #hipSetDeviceFlags. #hipChooseDevice
|
|
1870
|
+
*
|
|
1871
|
+
* */
|
|
1872
|
+
hipError_t hipSetValidDevices(int* device_arr, int len);
|
|
1758
1873
|
/**
|
|
1759
1874
|
* @brief Return the default device id for the calling host thread.
|
|
1760
1875
|
*
|
|
@@ -2100,7 +2215,7 @@ hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event);
|
|
|
2100
2215
|
/**
|
|
2101
2216
|
* @brief Opens an interprocess event handles.
|
|
2102
2217
|
*
|
|
2103
|
-
* Opens an interprocess event handle exported from another process with
|
|
2218
|
+
* Opens an interprocess event handle exported from another process with hipIpcGetEventHandle. The returned
|
|
2104
2219
|
* hipEvent_t behaves like a locally created event with the hipEventDisableTiming flag specified. This event
|
|
2105
2220
|
* need be freed with hipEventDestroy. Operations on the imported event after the exported event has been freed
|
|
2106
2221
|
* with hipEventDestroy will result in undefined behavior. If the function is called within the same process where
|
|
@@ -2276,7 +2391,7 @@ hipError_t hipDrvGetErrorString(hipError_t hipError, const char** errorString);
|
|
|
2276
2391
|
* Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
|
|
2277
2392
|
* reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
|
|
2278
2393
|
* the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
|
|
2279
|
-
* used by the stream,
|
|
2394
|
+
* used by the stream, application must call hipStreamDestroy.
|
|
2280
2395
|
*
|
|
2281
2396
|
* @return #hipSuccess, #hipErrorInvalidValue
|
|
2282
2397
|
*
|
|
@@ -2293,7 +2408,7 @@ hipError_t hipStreamCreate(hipStream_t* stream);
|
|
|
2293
2408
|
* Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
|
|
2294
2409
|
* reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
|
|
2295
2410
|
* the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
|
|
2296
|
-
* used by the stream,
|
|
2411
|
+
* used by the stream, application must call hipStreamDestroy. Flags controls behavior of the
|
|
2297
2412
|
* stream. See #hipStreamDefault, #hipStreamNonBlocking.
|
|
2298
2413
|
*
|
|
2299
2414
|
*
|
|
@@ -2311,7 +2426,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags);
|
|
|
2311
2426
|
* Create a new asynchronous stream with the specified priority. @p stream returns an opaque handle
|
|
2312
2427
|
* that can be used to reference the newly created stream in subsequent hipStream* commands. The
|
|
2313
2428
|
* stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope.
|
|
2314
|
-
* To release the memory used by the stream,
|
|
2429
|
+
* To release the memory used by the stream, application must call hipStreamDestroy. Flags controls
|
|
2315
2430
|
* behavior of the stream. See #hipStreamDefault, #hipStreamNonBlocking.
|
|
2316
2431
|
*
|
|
2317
2432
|
*
|
|
@@ -2329,7 +2444,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags,
|
|
|
2329
2444
|
* and greatest stream priority respectively. Stream priorities follow a convention where lower numbers
|
|
2330
2445
|
* imply greater priorities. The range of meaningful stream priorities is given by
|
|
2331
2446
|
* [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value
|
|
2332
|
-
* that is outside the
|
|
2447
|
+
* that is outside the meaningful range as specified by this API, the priority is automatically
|
|
2333
2448
|
* clamped to within the valid range.
|
|
2334
2449
|
*/
|
|
2335
2450
|
hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
|
|
@@ -2401,8 +2516,8 @@ hipError_t hipStreamSynchronize(hipStream_t stream);
|
|
|
2401
2516
|
* All future work submitted to @p stream will wait until @p event reports completion before
|
|
2402
2517
|
* beginning execution.
|
|
2403
2518
|
*
|
|
2404
|
-
* This function only waits for commands in the current stream to complete. Notably
|
|
2405
|
-
* does not
|
|
2519
|
+
* This function only waits for commands in the current stream to complete. Notably, this function
|
|
2520
|
+
* does not implicitly wait for commands in the default stream to complete, even if the specified
|
|
2406
2521
|
* stream is created with hipStreamNonBlocking = 0.
|
|
2407
2522
|
*
|
|
2408
2523
|
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy
|
|
@@ -2688,7 +2803,7 @@ hipError_t hipEventCreate(hipEvent_t* event);
|
|
|
2688
2803
|
*
|
|
2689
2804
|
* If hipEventRecord() has been previously called on this event, then this call will overwrite any
|
|
2690
2805
|
* existing state in event.
|
|
2691
|
-
*
|
|
2806
|
+
*
|
|
2692
2807
|
* If this function is called on an event that is currently being recorded, results are undefined
|
|
2693
2808
|
* - either outstanding recording may save state into the event, and the order is not guaranteed.
|
|
2694
2809
|
*
|
|
@@ -2730,7 +2845,6 @@ hipError_t hipEventDestroy(hipEvent_t event);
|
|
|
2730
2845
|
* If hipEventRecord() has not been called on @p event, this function returns #hipSuccess when no
|
|
2731
2846
|
* event is captured.
|
|
2732
2847
|
*
|
|
2733
|
-
* This function needs to support hipEventBlockingSync parameter.
|
|
2734
2848
|
*
|
|
2735
2849
|
* @param[in] event Event on which to wait.
|
|
2736
2850
|
*
|
|
@@ -3252,7 +3366,7 @@ hipError_t hipStreamAttachMemAsync(hipStream_t stream,
|
|
|
3252
3366
|
*
|
|
3253
3367
|
* Inserts a memory allocation operation into @p stream.
|
|
3254
3368
|
* A pointer to the allocated memory is returned immediately in *dptr.
|
|
3255
|
-
* The allocation must not be accessed until the
|
|
3369
|
+
* The allocation must not be accessed until the allocation operation completes.
|
|
3256
3370
|
* The allocation comes from the memory pool associated with the stream's device.
|
|
3257
3371
|
*
|
|
3258
3372
|
* @note The default memory pool of a device contains device memory from that device.
|
|
@@ -3504,7 +3618,7 @@ hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool);
|
|
|
3504
3618
|
*
|
|
3505
3619
|
* Inserts an allocation operation into @p stream.
|
|
3506
3620
|
* A pointer to the allocated memory is returned immediately in @p dev_ptr.
|
|
3507
|
-
* The allocation must not be accessed until the
|
|
3621
|
+
* The allocation must not be accessed until the allocation operation completes.
|
|
3508
3622
|
* The allocation comes from the specified memory pool.
|
|
3509
3623
|
*
|
|
3510
3624
|
* @note The specified memory pool may be from a device different than that of the specified @p stream.
|
|
@@ -3915,6 +4029,68 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes);
|
|
|
3915
4029
|
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
3916
4030
|
*/
|
|
3917
4031
|
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes);
|
|
4032
|
+
/**
|
|
4033
|
+
* @brief Copies from one 1D array to device memory.
|
|
4034
|
+
*
|
|
4035
|
+
* @param[out] dstDevice Destination device pointer
|
|
4036
|
+
* @param[in] srcArray Source array
|
|
4037
|
+
* @param[in] srcOffset Offset in bytes of source array
|
|
4038
|
+
* @param[in] ByteCount Size of memory copy in bytes
|
|
4039
|
+
*
|
|
4040
|
+
* @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
|
|
4041
|
+
* #hipErrorInvalidValue
|
|
4042
|
+
*
|
|
4043
|
+
* @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
|
|
4044
|
+
* hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
|
|
4045
|
+
* hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
|
|
4046
|
+
* hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
|
|
4047
|
+
* hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
|
|
4048
|
+
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
4049
|
+
*/
|
|
4050
|
+
hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset,
|
|
4051
|
+
size_t ByteCount);
|
|
4052
|
+
/**
|
|
4053
|
+
* @brief Copies from device memory to a 1D array.
|
|
4054
|
+
*
|
|
4055
|
+
* @param[out] dstArray Destination array
|
|
4056
|
+
* @param[in] dstOffset Offset in bytes of destination array
|
|
4057
|
+
* @param[in] srcDevice Source device pointer
|
|
4058
|
+
* @param[in] ByteCount Size of memory copy in bytes
|
|
4059
|
+
*
|
|
4060
|
+
* @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
|
|
4061
|
+
* #hipErrorInvalidValue
|
|
4062
|
+
*
|
|
4063
|
+
* @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
|
|
4064
|
+
* hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
|
|
4065
|
+
* hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
|
|
4066
|
+
* hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
|
|
4067
|
+
* hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
|
|
4068
|
+
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
4069
|
+
*/
|
|
4070
|
+
hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice,
|
|
4071
|
+
size_t ByteCount);
|
|
4072
|
+
|
|
4073
|
+
/**
|
|
4074
|
+
* @brief Copies from one 1D array to another.
|
|
4075
|
+
*
|
|
4076
|
+
* @param[out] dstArray Destination array
|
|
4077
|
+
* @param[in] dstOffset Offset in bytes of destination array
|
|
4078
|
+
* @param[in] srcArray Source array
|
|
4079
|
+
* @param[in] srcOffset Offset in bytes of source array
|
|
4080
|
+
* @param[in] ByteCount Size of memory copy in bytes
|
|
4081
|
+
*
|
|
4082
|
+
* @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
|
|
4083
|
+
* #hipErrorInvalidValue
|
|
4084
|
+
*
|
|
4085
|
+
* @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
|
|
4086
|
+
* hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
|
|
4087
|
+
* hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
|
|
4088
|
+
* hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
|
|
4089
|
+
* hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
|
|
4090
|
+
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
4091
|
+
*/
|
|
4092
|
+
hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray,
|
|
4093
|
+
size_t srcOffset, size_t ByteCount);
|
|
3918
4094
|
/**
|
|
3919
4095
|
* @brief Copy data from Host to Device asynchronously
|
|
3920
4096
|
*
|
|
@@ -3973,7 +4149,48 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
|
|
|
3973
4149
|
*/
|
|
3974
4150
|
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
|
|
3975
4151
|
hipStream_t stream);
|
|
3976
|
-
|
|
4152
|
+
/**
|
|
4153
|
+
* @brief Copies from one 1D array to host memory.
|
|
4154
|
+
*
|
|
4155
|
+
* @param[out] dstHost Destination pointer
|
|
4156
|
+
* @param[in] srcArray Source array
|
|
4157
|
+
* @param[in] srcOffset Offset in bytes of source array
|
|
4158
|
+
* @param[in] ByteCount Size of memory copy in bytes
|
|
4159
|
+
* @param[in] stream Stream identifier
|
|
4160
|
+
*
|
|
4161
|
+
* @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
|
|
4162
|
+
* #hipErrorInvalidValue
|
|
4163
|
+
*
|
|
4164
|
+
* @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
|
|
4165
|
+
* hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
|
|
4166
|
+
* hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
|
|
4167
|
+
* hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
|
|
4168
|
+
* hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
|
|
4169
|
+
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
4170
|
+
*/
|
|
4171
|
+
hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset,
|
|
4172
|
+
size_t ByteCount, hipStream_t stream);
|
|
4173
|
+
/**
|
|
4174
|
+
* @brief Copies from host memory to a 1D array.
|
|
4175
|
+
*
|
|
4176
|
+
* @param[out] dstArray Destination array
|
|
4177
|
+
* @param[in] dstOffset Offset in bytes of destination array
|
|
4178
|
+
* @param[in] srcHost Source host pointer
|
|
4179
|
+
* @param[in] ByteCount Size of memory copy in bytes
|
|
4180
|
+
* @param[in] stream Stream identifier
|
|
4181
|
+
*
|
|
4182
|
+
* @return #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext,
|
|
4183
|
+
* #hipErrorInvalidValue
|
|
4184
|
+
*
|
|
4185
|
+
* @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost,
|
|
4186
|
+
* hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA,
|
|
4187
|
+
* hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD,
|
|
4188
|
+
* hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync,
|
|
4189
|
+
* hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo,
|
|
4190
|
+
* hipMemHostAlloc, hipMemHostGetDevicePointer
|
|
4191
|
+
*/
|
|
4192
|
+
hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost,
|
|
4193
|
+
size_t ByteCount, hipStream_t stream);
|
|
3977
4194
|
/**
|
|
3978
4195
|
* @brief Returns a global pointer from a module.
|
|
3979
4196
|
* Returns in *dptr and *bytes the pointer and size of the global of name name located in module hmod.
|
|
@@ -4002,6 +4219,8 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
|
|
4002
4219
|
*/
|
|
4003
4220
|
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol);
|
|
4004
4221
|
|
|
4222
|
+
|
|
4223
|
+
|
|
4005
4224
|
/**
|
|
4006
4225
|
* @brief Gets the size of the given symbol on the device.
|
|
4007
4226
|
*
|
|
@@ -4013,14 +4232,38 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol);
|
|
|
4013
4232
|
*/
|
|
4014
4233
|
hipError_t hipGetSymbolSize(size_t* size, const void* symbol);
|
|
4015
4234
|
|
|
4235
|
+
/**
|
|
4236
|
+
* @brief Gets the pointer of requested HIP driver function.
|
|
4237
|
+
*
|
|
4238
|
+
* @param[in] symbol The Symbol name of the driver function to request.
|
|
4239
|
+
* @param[out] pfn Output pointer to the requested driver function.
|
|
4240
|
+
* @param[in] hipVersion The HIP version for the requested driver function symbol.
|
|
4241
|
+
* HIP version is defined as 100*version_major + version_minor. For example, in HIP 6.1, the
|
|
4242
|
+
* hipversion is 601, for the symbol function "hipGetDeviceProperties", the specified hipVersion 601
|
|
4243
|
+
* is greater or equal to the version 600, the symbol function will be handle properly as backend
|
|
4244
|
+
* compatible function.
|
|
4245
|
+
*
|
|
4246
|
+
* @param[in] flags Currently only default flag is suppported.
|
|
4247
|
+
* @param[out] symbolStatus Optional enumeration for returned status of searching for symbol driver
|
|
4248
|
+
* function based on the input hipVersion.
|
|
4249
|
+
*
|
|
4250
|
+
* Returns hipSuccess if the returned pfn is addressed to the pointer of found driver function.
|
|
4251
|
+
*
|
|
4252
|
+
* @return #hipSuccess, #hipErrorInvalidValue.
|
|
4253
|
+
*/
|
|
4254
|
+
hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags,
|
|
4255
|
+
hipDriverProcAddressQueryResult* symbolStatus);
|
|
4256
|
+
|
|
4016
4257
|
/**
|
|
4017
4258
|
* @brief Copies data to the given symbol on the device.
|
|
4018
4259
|
* Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on
|
|
4019
4260
|
* the host side. The symbol can be in __constant or device space.
|
|
4020
4261
|
* Note that the symbol name needs to be encased in the HIP_SYMBOL macro.
|
|
4021
4262
|
* This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize.
|
|
4022
|
-
* For
|
|
4023
|
-
* https://
|
|
4263
|
+
* For detailed usage, see the
|
|
4264
|
+
* <a href="https://rocm.docs.amd.com/projects/HIP/en/latest/how-to/hip_porting_guide.html#memcpytosymbol">memcpyToSymbol example</a>
|
|
4265
|
+
* in the HIP Porting Guide.
|
|
4266
|
+
*
|
|
4024
4267
|
*
|
|
4025
4268
|
* @param[out] symbol pointer to the device symbole
|
|
4026
4269
|
* @param[in] src pointer to the source address
|
|
@@ -4520,6 +4763,27 @@ hipError_t hipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, co
|
|
|
4520
4763
|
hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src,
|
|
4521
4764
|
size_t spitch, size_t width, size_t height, hipMemcpyKind kind,
|
|
4522
4765
|
hipStream_t stream __dparm(0));
|
|
4766
|
+
/**
|
|
4767
|
+
* @brief Copies data between host and device.
|
|
4768
|
+
*
|
|
4769
|
+
* @param[in] dst Destination memory address
|
|
4770
|
+
* @param[in] wOffsetDst Destination starting X offset
|
|
4771
|
+
* @param[in] hOffsetDst Destination starting Y offset
|
|
4772
|
+
* @param[in] src Source memory address
|
|
4773
|
+
* @param[in] wOffsetSrc Source starting X offset
|
|
4774
|
+
* @param[in] hOffsetSrc Source starting Y offset (columns in bytes)
|
|
4775
|
+
* @param[in] width Width of matrix transfer (columns in bytes)
|
|
4776
|
+
* @param[in] height Height of matrix transfer (rows)
|
|
4777
|
+
* @param[in] kind Type of transfer
|
|
4778
|
+
*
|
|
4779
|
+
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidMemcpyDirection
|
|
4780
|
+
*
|
|
4781
|
+
* @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
|
|
4782
|
+
* hipMemcpyAsync
|
|
4783
|
+
*/
|
|
4784
|
+
hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst,
|
|
4785
|
+
hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc,
|
|
4786
|
+
size_t width, size_t height, hipMemcpyKind kind);
|
|
4523
4787
|
/**
|
|
4524
4788
|
* @brief Copies data between host and device.
|
|
4525
4789
|
*
|
|
@@ -4734,7 +4998,7 @@ hipError_t hipDeviceDisablePeerAccess(int peerDeviceId);
|
|
|
4734
4998
|
* @param [out] psize - Size of allocation
|
|
4735
4999
|
* @param [in] dptr- Device Pointer
|
|
4736
5000
|
*
|
|
4737
|
-
* @returns #hipSuccess, #
|
|
5001
|
+
* @returns #hipSuccess, #hipErrorNotFound
|
|
4738
5002
|
*
|
|
4739
5003
|
* @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,
|
|
4740
5004
|
* hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice
|
|
@@ -5225,6 +5489,16 @@ hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func
|
|
|
5225
5489
|
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction
|
|
5226
5490
|
*/
|
|
5227
5491
|
hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc);
|
|
5492
|
+
/**
|
|
5493
|
+
* @brief Gets pointer to device entry function that matches entry function symbolPtr.
|
|
5494
|
+
*
|
|
5495
|
+
* @param [out] functionPtr Device entry function
|
|
5496
|
+
* @param [in] symbolPtr Pointer to device entry function to search for
|
|
5497
|
+
*
|
|
5498
|
+
* @returns #hipSuccess, #hipErrorInvalidDeviceFunction
|
|
5499
|
+
*
|
|
5500
|
+
*/
|
|
5501
|
+
hipError_t hipGetFuncBySymbol(hipFunction_t* functionPtr, const void* symbolPtr);
|
|
5228
5502
|
/**
|
|
5229
5503
|
* @brief returns the handle of the texture reference with the name from the module.
|
|
5230
5504
|
*
|
|
@@ -5646,12 +5920,26 @@ hipError_t hipLaunchKernel(const void* function_address,
|
|
|
5646
5920
|
/**
|
|
5647
5921
|
* @brief Enqueues a host function call in a stream.
|
|
5648
5922
|
*
|
|
5649
|
-
* @param [in] stream - stream to enqueue work
|
|
5650
|
-
* @param [in] fn - function to call once
|
|
5923
|
+
* @param [in] stream - The stream to enqueue work in.
|
|
5924
|
+
* @param [in] fn - The function to call once enqueued preceeding operations are complete.
|
|
5651
5925
|
* @param [in] userData - User-specified data to be passed to the function.
|
|
5926
|
+
*
|
|
5652
5927
|
* @returns #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue,
|
|
5653
5928
|
* #hipErrorNotSupported
|
|
5654
|
-
*
|
|
5929
|
+
*
|
|
5930
|
+
* The host function to call in this API will be executed after the preceding operations in
|
|
5931
|
+
* the stream are complete. The function is a blocking operation that blocks operations in the
|
|
5932
|
+
* stream that follow it, until the function is returned.
|
|
5933
|
+
* Event synchronization and internal callback functions make sure enqueued operations will
|
|
5934
|
+
* execute in order, in the stream.
|
|
5935
|
+
*
|
|
5936
|
+
* The host function must not make any HIP API calls. The host function is non-reentrant. It must
|
|
5937
|
+
* not perform sychronization with any operation that may depend on other processing execution
|
|
5938
|
+
* but is not enqueued to run earlier in the stream.
|
|
5939
|
+
*
|
|
5940
|
+
* Host functions that are enqueued respectively in different non-blocking streams can run concurrently.
|
|
5941
|
+
*
|
|
5942
|
+
* @warning This API is marked as beta, meaning, while this is feature complete,
|
|
5655
5943
|
* it is still open to changes and may have outstanding issues.
|
|
5656
5944
|
*/
|
|
5657
5945
|
hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData);
|
|
@@ -6181,7 +6469,7 @@ hipError_t hipGetTextureAlignmentOffset(
|
|
|
6181
6469
|
DEPRECATED(DEPRECATED_MSG)
|
|
6182
6470
|
hipError_t hipUnbindTexture(const textureReference* tex);
|
|
6183
6471
|
/**
|
|
6184
|
-
* @brief Gets the
|
|
6472
|
+
* @brief Gets the address for a texture reference.
|
|
6185
6473
|
*
|
|
6186
6474
|
* @param [out] dev_ptr Pointer of device address.
|
|
6187
6475
|
* @param [in] texRef Pointer of texture reference.
|
|
@@ -6564,6 +6852,30 @@ int hipGetStreamDeviceId(hipStream_t stream);
|
|
|
6564
6852
|
*/
|
|
6565
6853
|
hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode);
|
|
6566
6854
|
|
|
6855
|
+
/**
|
|
6856
|
+
* @brief Begins graph capture on a stream to an existing graph.
|
|
6857
|
+
*
|
|
6858
|
+
* @param [in] stream - Stream to initiate capture.
|
|
6859
|
+
* @param [in] graph - Graph to capture into.
|
|
6860
|
+
* @param [in] dependencies - Dependencies of the first node captured in the stream. Can be NULL if
|
|
6861
|
+
* numDependencies is 0.
|
|
6862
|
+
* @param [in] dependencyData - Optional array of data associated with each dependency.
|
|
6863
|
+
* @param [in] numDependencies - Number of dependencies.
|
|
6864
|
+
* @param [in] mode - Controls the interaction of this capture sequence with other API calls that
|
|
6865
|
+
are not safe.
|
|
6866
|
+
*
|
|
6867
|
+
* @returns #hipSuccess, #hipErrorInvalidValue
|
|
6868
|
+
*
|
|
6869
|
+
* @warning : param "const hipGraphEdgeData* dependencyData" is currently not supported and has to
|
|
6870
|
+
passed as nullptr. This API is marked as beta, meaning, while this is feature complete, it is still
|
|
6871
|
+
open to changes and may have outstanding issues.
|
|
6872
|
+
*
|
|
6873
|
+
*/
|
|
6874
|
+
hipError_t hipStreamBeginCaptureToGraph(hipStream_t stream, hipGraph_t graph,
|
|
6875
|
+
const hipGraphNode_t* dependencies,
|
|
6876
|
+
const hipGraphEdgeData* dependencyData,
|
|
6877
|
+
size_t numDependencies, hipStreamCaptureMode mode);
|
|
6878
|
+
|
|
6567
6879
|
/**
|
|
6568
6880
|
* @brief Ends capture on a stream, returning the captured graph.
|
|
6569
6881
|
*
|
|
@@ -6902,6 +7214,19 @@ hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph,
|
|
|
6902
7214
|
hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph,
|
|
6903
7215
|
unsigned long long flags);
|
|
6904
7216
|
|
|
7217
|
+
/**
|
|
7218
|
+
* @brief Creates an executable graph from a graph.
|
|
7219
|
+
*
|
|
7220
|
+
* @param [out] pGraphExec - pointer to instantiated executable graph that is created.
|
|
7221
|
+
* @param [in] graph - instance of graph to instantiate.
|
|
7222
|
+
* @param [in] instantiateParams - Graph Instantiate Params
|
|
7223
|
+
* @returns #hipSuccess, #hipErrorInvalidValue
|
|
7224
|
+
*
|
|
7225
|
+
* @warning : This API is marked as beta, meaning, while this is feature complete,
|
|
7226
|
+
* it is still open to changes and may have outstanding issues.
|
|
7227
|
+
*/
|
|
7228
|
+
hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph,
|
|
7229
|
+
hipGraphInstantiateParams *instantiateParams);
|
|
6905
7230
|
/**
|
|
6906
7231
|
* @brief launches an executable graph in a stream
|
|
6907
7232
|
*
|
|
@@ -6926,6 +7251,22 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream);
|
|
|
6926
7251
|
*/
|
|
6927
7252
|
hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream);
|
|
6928
7253
|
|
|
7254
|
+
/**
|
|
7255
|
+
* @brief Creates a kernel execution node and adds it to a graph.
|
|
7256
|
+
*
|
|
7257
|
+
* @param [out] pGraphNode - pointer to graph node to create.
|
|
7258
|
+
* @param [in] graph - instance of graph to add the created node.
|
|
7259
|
+
* @param [in] pDependencies - pointer to the dependencies on the kernel execution node.
|
|
7260
|
+
* @param [in] numDependencies - the number of the dependencies.
|
|
7261
|
+
* @param [in] nodeParams - pointer to the parameters for the node.
|
|
7262
|
+
* @returns #hipSuccess, #hipErrorInvalidValue.
|
|
7263
|
+
* @warning : This API is marked as beta, meaning, while this is feature complete,
|
|
7264
|
+
* it is still open to changes and may have outstanding issues.
|
|
7265
|
+
*/
|
|
7266
|
+
hipError_t hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph,
|
|
7267
|
+
const hipGraphNode_t *pDependencies, size_t numDependencies,
|
|
7268
|
+
hipGraphNodeParams *nodeParams);
|
|
7269
|
+
|
|
6929
7270
|
/**
|
|
6930
7271
|
* @brief Destroys an executable graph
|
|
6931
7272
|
*
|
|
@@ -8906,6 +9247,7 @@ static inline hipError_t hipMallocManaged(T** devPtr, size_t size,
|
|
|
8906
9247
|
return hipMallocManaged((void**)devPtr, size, flags);
|
|
8907
9248
|
}
|
|
8908
9249
|
|
|
9250
|
+
|
|
8909
9251
|
#endif
|
|
8910
9252
|
#endif
|
|
8911
9253
|
// doxygen end HIP API
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
#define HIP_VERSION_H
|
|
5
5
|
|
|
6
6
|
#define HIP_VERSION_MAJOR 6
|
|
7
|
-
#define HIP_VERSION_MINOR
|
|
8
|
-
#define HIP_VERSION_PATCH
|
|
9
|
-
#define HIP_VERSION_GITHASH "
|
|
7
|
+
#define HIP_VERSION_MINOR 2
|
|
8
|
+
#define HIP_VERSION_PATCH 41134
|
|
9
|
+
#define HIP_VERSION_GITHASH "65d174c3e"
|
|
10
10
|
#define HIP_VERSION_BUILD_ID 0
|
|
11
11
|
#define HIP_VERSION_BUILD_NAME ""
|
|
12
12
|
#define HIP_VERSION (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|