triton-windows 3.2.0.post12__cp39-cp39-win_amd64.whl → 3.3.0a0.post12__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of triton-windows might be problematic. Click here for more details.
- triton/_C/libtriton.pyd +0 -0
- triton/__init__.py +3 -3
- triton/_internal_testing.py +59 -4
- triton/_utils.py +35 -0
- triton/backends/amd/compiler.py +121 -74
- triton/backends/amd/driver.py +77 -43
- triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +28 -49
- triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +35 -9
- triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +761 -284
- triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +9 -3
- triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +1391 -0
- triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +3 -3
- triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +44 -0
- triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +288 -0
- triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +110 -14
- triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +504 -103
- triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +2 -1
- triton/backends/amd/include/hip/amd_detail/host_defines.h +4 -0
- triton/backends/amd/include/hip/hip_ext.h +4 -2
- triton/backends/amd/include/hip/hip_fp8.h +33 -0
- triton/backends/amd/include/hip/hip_runtime_api.h +375 -33
- triton/backends/amd/include/hip/hip_version.h +3 -3
- triton/backends/amd/include/hip/hiprtc.h +25 -25
- triton/backends/amd/include/hsa/amd_hsa_elf.h +40 -14
- triton/backends/amd/include/hsa/hsa.h +11 -2
- triton/backends/amd/include/hsa/hsa_api_trace.h +30 -17
- triton/backends/amd/include/hsa/hsa_api_trace_version.h +68 -0
- triton/backends/amd/include/hsa/hsa_ext_amd.h +83 -27
- triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +46 -46
- triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +416 -0
- triton/backends/amd/include/roctracer/hip_ostream_ops.h +84 -4
- triton/backends/amd/include/roctracer/hsa_ostream_ops.h +260 -0
- triton/backends/amd/include/roctracer/hsa_prof_str.h +51 -19
- triton/backends/amd/lib/asanrtl.bc +0 -0
- triton/backends/compiler.py +25 -225
- triton/backends/driver.py +7 -2
- triton/backends/nvidia/bin/ptxas.exe +0 -0
- triton/backends/nvidia/compiler.py +135 -90
- triton/backends/nvidia/driver.c +0 -1
- triton/backends/nvidia/driver.py +135 -49
- triton/backends/nvidia/include/cuda.h +2162 -241
- triton/backends/nvidia/lib/x64/cuda.lib +0 -0
- triton/compiler/__init__.py +2 -2
- triton/compiler/code_generator.py +334 -231
- triton/compiler/compiler.py +77 -66
- triton/language/__init__.py +22 -5
- triton/language/core.py +448 -74
- triton/language/extra/cuda/_experimental_tma.py +3 -5
- triton/language/math.py +1 -1
- triton/language/random.py +2 -1
- triton/language/semantic.py +206 -52
- triton/language/standard.py +35 -18
- triton/runtime/_allocation.py +32 -0
- triton/runtime/autotuner.py +27 -32
- triton/runtime/build.py +1 -48
- triton/runtime/cache.py +6 -6
- triton/runtime/errors.py +10 -0
- triton/runtime/interpreter.py +179 -45
- triton/runtime/jit.py +149 -190
- triton/testing.py +39 -11
- triton/tools/compile.py +27 -20
- triton/tools/{compile.c → extra/cuda/compile.c} +1 -0
- triton/tools/mxfp.py +301 -0
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/METADATA +5 -2
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/RECORD +68 -59
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/top_level.txt +2 -0
- /triton/tools/{compile.h → extra/cuda/compile.h} +0 -0
- {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/WHEEL +0 -0
|
@@ -2795,6 +2795,11 @@ inline static std::ostream& operator<<(std::ostream& out, const hipMemPoolProps&
|
|
|
2795
2795
|
roctracer::hip_support::detail::operator<<(out, 0);
|
|
2796
2796
|
std::operator<<(out, ", ");
|
|
2797
2797
|
}
|
|
2798
|
+
if (std::string("hipMemPoolProps::maxSize").find(HIP_structs_regex) != std::string::npos) {
|
|
2799
|
+
std::operator<<(out, "maxSize=");
|
|
2800
|
+
roctracer::hip_support::detail::operator<<(out, v.maxSize);
|
|
2801
|
+
std::operator<<(out, ", ");
|
|
2802
|
+
}
|
|
2798
2803
|
if (std::string("hipMemPoolProps::location").find(HIP_structs_regex) != std::string::npos) {
|
|
2799
2804
|
std::operator<<(out, "location=");
|
|
2800
2805
|
roctracer::hip_support::detail::operator<<(out, v.location);
|
|
@@ -3229,17 +3234,22 @@ inline static std::ostream& operator<<(std::ostream& out, const hipAccessPolicyW
|
|
|
3229
3234
|
std::operator<<(out, '}');
|
|
3230
3235
|
return out;
|
|
3231
3236
|
}
|
|
3232
|
-
inline static std::ostream& operator<<(std::ostream& out, const
|
|
3237
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipLaunchAttributeValue& v)
|
|
3233
3238
|
{
|
|
3234
3239
|
std::operator<<(out, '{');
|
|
3235
3240
|
HIP_depth_max_cnt++;
|
|
3236
3241
|
if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
|
|
3237
|
-
if (std::string("
|
|
3242
|
+
if (std::string("hipLaunchAttributeValue::priority").find(HIP_structs_regex) != std::string::npos) {
|
|
3243
|
+
std::operator<<(out, "priority=");
|
|
3244
|
+
roctracer::hip_support::detail::operator<<(out, v.priority);
|
|
3245
|
+
std::operator<<(out, ", ");
|
|
3246
|
+
}
|
|
3247
|
+
if (std::string("hipLaunchAttributeValue::cooperative").find(HIP_structs_regex) != std::string::npos) {
|
|
3238
3248
|
std::operator<<(out, "cooperative=");
|
|
3239
3249
|
roctracer::hip_support::detail::operator<<(out, v.cooperative);
|
|
3240
3250
|
std::operator<<(out, ", ");
|
|
3241
3251
|
}
|
|
3242
|
-
if (std::string("
|
|
3252
|
+
if (std::string("hipLaunchAttributeValue::accessPolicyWindow").find(HIP_structs_regex) != std::string::npos) {
|
|
3243
3253
|
std::operator<<(out, "accessPolicyWindow=");
|
|
3244
3254
|
roctracer::hip_support::detail::operator<<(out, v.accessPolicyWindow);
|
|
3245
3255
|
}
|
|
@@ -3287,6 +3297,35 @@ inline static std::ostream& operator<<(std::ostream& out, const HIP_MEMSET_NODE_
|
|
|
3287
3297
|
std::operator<<(out, '}');
|
|
3288
3298
|
return out;
|
|
3289
3299
|
}
|
|
3300
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipGraphInstantiateParams& v)
|
|
3301
|
+
{
|
|
3302
|
+
std::operator<<(out, '{');
|
|
3303
|
+
HIP_depth_max_cnt++;
|
|
3304
|
+
if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
|
|
3305
|
+
if (std::string("hipGraphInstantiateParams::uploadStream").find(HIP_structs_regex) != std::string::npos) {
|
|
3306
|
+
std::operator<<(out, "uploadStream=");
|
|
3307
|
+
roctracer::hip_support::detail::operator<<(out, v.uploadStream);
|
|
3308
|
+
std::operator<<(out, ", ");
|
|
3309
|
+
}
|
|
3310
|
+
if (std::string("hipGraphInstantiateParams::result_out").find(HIP_structs_regex) != std::string::npos) {
|
|
3311
|
+
std::operator<<(out, "result_out=");
|
|
3312
|
+
roctracer::hip_support::detail::operator<<(out, v.result_out);
|
|
3313
|
+
std::operator<<(out, ", ");
|
|
3314
|
+
}
|
|
3315
|
+
if (std::string("hipGraphInstantiateParams::flags").find(HIP_structs_regex) != std::string::npos) {
|
|
3316
|
+
std::operator<<(out, "flags=");
|
|
3317
|
+
roctracer::hip_support::detail::operator<<(out, v.flags);
|
|
3318
|
+
std::operator<<(out, ", ");
|
|
3319
|
+
}
|
|
3320
|
+
if (std::string("hipGraphInstantiateParams::errNode_out").find(HIP_structs_regex) != std::string::npos) {
|
|
3321
|
+
std::operator<<(out, "errNode_out=");
|
|
3322
|
+
roctracer::hip_support::detail::operator<<(out, v.errNode_out);
|
|
3323
|
+
}
|
|
3324
|
+
};
|
|
3325
|
+
HIP_depth_max_cnt--;
|
|
3326
|
+
std::operator<<(out, '}');
|
|
3327
|
+
return out;
|
|
3328
|
+
}
|
|
3290
3329
|
inline static std::ostream& operator<<(std::ostream& out, const hipMemAllocationProp& v)
|
|
3291
3330
|
{
|
|
3292
3331
|
std::operator<<(out, '{');
|
|
@@ -3513,6 +3552,35 @@ inline static std::ostream& operator<<(std::ostream& out, const hipGraphNodePara
|
|
|
3513
3552
|
std::operator<<(out, '}');
|
|
3514
3553
|
return out;
|
|
3515
3554
|
}
|
|
3555
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipGraphEdgeData& v)
|
|
3556
|
+
{
|
|
3557
|
+
std::operator<<(out, '{');
|
|
3558
|
+
HIP_depth_max_cnt++;
|
|
3559
|
+
if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
|
|
3560
|
+
if (std::string("hipGraphEdgeData::type").find(HIP_structs_regex) != std::string::npos) {
|
|
3561
|
+
std::operator<<(out, "type=");
|
|
3562
|
+
roctracer::hip_support::detail::operator<<(out, v.type);
|
|
3563
|
+
std::operator<<(out, ", ");
|
|
3564
|
+
}
|
|
3565
|
+
if (std::string("hipGraphEdgeData::to_port").find(HIP_structs_regex) != std::string::npos) {
|
|
3566
|
+
std::operator<<(out, "to_port=");
|
|
3567
|
+
roctracer::hip_support::detail::operator<<(out, v.to_port);
|
|
3568
|
+
std::operator<<(out, ", ");
|
|
3569
|
+
}
|
|
3570
|
+
if (std::string("hipGraphEdgeData::reserved").find(HIP_structs_regex) != std::string::npos) {
|
|
3571
|
+
std::operator<<(out, "reserved=");
|
|
3572
|
+
roctracer::hip_support::detail::operator<<(out, 0);
|
|
3573
|
+
std::operator<<(out, ", ");
|
|
3574
|
+
}
|
|
3575
|
+
if (std::string("hipGraphEdgeData::from_port").find(HIP_structs_regex) != std::string::npos) {
|
|
3576
|
+
std::operator<<(out, "from_port=");
|
|
3577
|
+
roctracer::hip_support::detail::operator<<(out, v.from_port);
|
|
3578
|
+
}
|
|
3579
|
+
};
|
|
3580
|
+
HIP_depth_max_cnt--;
|
|
3581
|
+
std::operator<<(out, '}');
|
|
3582
|
+
return out;
|
|
3583
|
+
}
|
|
3516
3584
|
inline static std::ostream& operator<<(std::ostream& out, const hipDeviceProp_tR0000& v)
|
|
3517
3585
|
{
|
|
3518
3586
|
std::operator<<(out, '{');
|
|
@@ -4352,7 +4420,7 @@ inline static std::ostream& operator<<(std::ostream& out, const hipAccessPolicyW
|
|
|
4352
4420
|
return out;
|
|
4353
4421
|
}
|
|
4354
4422
|
|
|
4355
|
-
inline static std::ostream& operator<<(std::ostream& out, const
|
|
4423
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipLaunchAttributeValue& v)
|
|
4356
4424
|
{
|
|
4357
4425
|
roctracer::hip_support::detail::operator<<(out, v);
|
|
4358
4426
|
return out;
|
|
@@ -4364,6 +4432,12 @@ inline static std::ostream& operator<<(std::ostream& out, const HIP_MEMSET_NODE_
|
|
|
4364
4432
|
return out;
|
|
4365
4433
|
}
|
|
4366
4434
|
|
|
4435
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipGraphInstantiateParams& v)
|
|
4436
|
+
{
|
|
4437
|
+
roctracer::hip_support::detail::operator<<(out, v);
|
|
4438
|
+
return out;
|
|
4439
|
+
}
|
|
4440
|
+
|
|
4367
4441
|
inline static std::ostream& operator<<(std::ostream& out, const hipMemAllocationProp& v)
|
|
4368
4442
|
{
|
|
4369
4443
|
roctracer::hip_support::detail::operator<<(out, v);
|
|
@@ -4424,6 +4498,12 @@ inline static std::ostream& operator<<(std::ostream& out, const hipGraphNodePara
|
|
|
4424
4498
|
return out;
|
|
4425
4499
|
}
|
|
4426
4500
|
|
|
4501
|
+
inline static std::ostream& operator<<(std::ostream& out, const hipGraphEdgeData& v)
|
|
4502
|
+
{
|
|
4503
|
+
roctracer::hip_support::detail::operator<<(out, v);
|
|
4504
|
+
return out;
|
|
4505
|
+
}
|
|
4506
|
+
|
|
4427
4507
|
inline static std::ostream& operator<<(std::ostream& out, const hipDeviceProp_tR0000& v)
|
|
4428
4508
|
{
|
|
4429
4509
|
roctracer::hip_support::detail::operator<<(out, v);
|
|
@@ -785,6 +785,236 @@ inline static std::ostream& operator<<(std::ostream& out, const hsa_ext_images_1
|
|
|
785
785
|
std::operator<<(out, '}');
|
|
786
786
|
return out;
|
|
787
787
|
}
|
|
788
|
+
inline static std::ostream& operator<<(std::ostream& out, const perf_sample_hosttrap_v1_t& v)
|
|
789
|
+
{
|
|
790
|
+
std::operator<<(out, '{');
|
|
791
|
+
HSA_depth_max_cnt++;
|
|
792
|
+
if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
|
|
793
|
+
if (std::string("perf_sample_hosttrap_v1_t::correlation_id").find(HSA_structs_regex) != std::string::npos) {
|
|
794
|
+
std::operator<<(out, "correlation_id=");
|
|
795
|
+
roctracer::hsa_support::detail::operator<<(out, v.correlation_id);
|
|
796
|
+
std::operator<<(out, ", ");
|
|
797
|
+
}
|
|
798
|
+
if (std::string("perf_sample_hosttrap_v1_t::timestamp").find(HSA_structs_regex) != std::string::npos) {
|
|
799
|
+
std::operator<<(out, "timestamp=");
|
|
800
|
+
roctracer::hsa_support::detail::operator<<(out, v.timestamp);
|
|
801
|
+
std::operator<<(out, ", ");
|
|
802
|
+
}
|
|
803
|
+
if (std::string("perf_sample_hosttrap_v1_t::reserved1").find(HSA_structs_regex) != std::string::npos) {
|
|
804
|
+
std::operator<<(out, "reserved1=");
|
|
805
|
+
roctracer::hsa_support::detail::operator<<(out, v.reserved1);
|
|
806
|
+
std::operator<<(out, ", ");
|
|
807
|
+
}
|
|
808
|
+
if (std::string("perf_sample_hosttrap_v1_t::reserved0").find(HSA_structs_regex) != std::string::npos) {
|
|
809
|
+
std::operator<<(out, "reserved0=");
|
|
810
|
+
roctracer::hsa_support::detail::operator<<(out, v.reserved0);
|
|
811
|
+
std::operator<<(out, ", ");
|
|
812
|
+
}
|
|
813
|
+
if (std::string("perf_sample_hosttrap_v1_t::hw_id").find(HSA_structs_regex) != std::string::npos) {
|
|
814
|
+
std::operator<<(out, "hw_id=");
|
|
815
|
+
roctracer::hsa_support::detail::operator<<(out, v.hw_id);
|
|
816
|
+
std::operator<<(out, ", ");
|
|
817
|
+
}
|
|
818
|
+
if (std::string("perf_sample_hosttrap_v1_t::reserved").find(HSA_structs_regex) != std::string::npos) {
|
|
819
|
+
std::operator<<(out, "reserved=");
|
|
820
|
+
roctracer::hsa_support::detail::operator<<(out, v.reserved);
|
|
821
|
+
std::operator<<(out, ", ");
|
|
822
|
+
}
|
|
823
|
+
if (std::string("perf_sample_hosttrap_v1_t::chiplet").find(HSA_structs_regex) != std::string::npos) {
|
|
824
|
+
std::operator<<(out, "chiplet=");
|
|
825
|
+
roctracer::hsa_support::detail::operator<<(out, v.chiplet);
|
|
826
|
+
std::operator<<(out, ", ");
|
|
827
|
+
}
|
|
828
|
+
if (std::string("perf_sample_hosttrap_v1_t::wave_in_wg").find(HSA_structs_regex) != std::string::npos) {
|
|
829
|
+
std::operator<<(out, "wave_in_wg=");
|
|
830
|
+
roctracer::hsa_support::detail::operator<<(out, v.wave_in_wg);
|
|
831
|
+
std::operator<<(out, ", ");
|
|
832
|
+
}
|
|
833
|
+
if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_z").find(HSA_structs_regex) != std::string::npos) {
|
|
834
|
+
std::operator<<(out, "workgroup_id_z=");
|
|
835
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_z);
|
|
836
|
+
std::operator<<(out, ", ");
|
|
837
|
+
}
|
|
838
|
+
if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_y").find(HSA_structs_regex) != std::string::npos) {
|
|
839
|
+
std::operator<<(out, "workgroup_id_y=");
|
|
840
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_y);
|
|
841
|
+
std::operator<<(out, ", ");
|
|
842
|
+
}
|
|
843
|
+
if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_x").find(HSA_structs_regex) != std::string::npos) {
|
|
844
|
+
std::operator<<(out, "workgroup_id_x=");
|
|
845
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_x);
|
|
846
|
+
std::operator<<(out, ", ");
|
|
847
|
+
}
|
|
848
|
+
if (std::string("perf_sample_hosttrap_v1_t::exec_mask").find(HSA_structs_regex) != std::string::npos) {
|
|
849
|
+
std::operator<<(out, "exec_mask=");
|
|
850
|
+
roctracer::hsa_support::detail::operator<<(out, v.exec_mask);
|
|
851
|
+
std::operator<<(out, ", ");
|
|
852
|
+
}
|
|
853
|
+
if (std::string("perf_sample_hosttrap_v1_t::pc").find(HSA_structs_regex) != std::string::npos) {
|
|
854
|
+
std::operator<<(out, "pc=");
|
|
855
|
+
roctracer::hsa_support::detail::operator<<(out, v.pc);
|
|
856
|
+
}
|
|
857
|
+
};
|
|
858
|
+
HSA_depth_max_cnt--;
|
|
859
|
+
std::operator<<(out, '}');
|
|
860
|
+
return out;
|
|
861
|
+
}
|
|
862
|
+
inline static std::ostream& operator<<(std::ostream& out, const perf_sample_snapshot_v1_t& v)
|
|
863
|
+
{
|
|
864
|
+
std::operator<<(out, '{');
|
|
865
|
+
HSA_depth_max_cnt++;
|
|
866
|
+
if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
|
|
867
|
+
if (std::string("perf_sample_snapshot_v1_t::correlation_id").find(HSA_structs_regex) != std::string::npos) {
|
|
868
|
+
std::operator<<(out, "correlation_id=");
|
|
869
|
+
roctracer::hsa_support::detail::operator<<(out, v.correlation_id);
|
|
870
|
+
std::operator<<(out, ", ");
|
|
871
|
+
}
|
|
872
|
+
if (std::string("perf_sample_snapshot_v1_t::timestamp").find(HSA_structs_regex) != std::string::npos) {
|
|
873
|
+
std::operator<<(out, "timestamp=");
|
|
874
|
+
roctracer::hsa_support::detail::operator<<(out, v.timestamp);
|
|
875
|
+
std::operator<<(out, ", ");
|
|
876
|
+
}
|
|
877
|
+
if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data2").find(HSA_structs_regex) != std::string::npos) {
|
|
878
|
+
std::operator<<(out, "perf_snapshot_data2=");
|
|
879
|
+
roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data2);
|
|
880
|
+
std::operator<<(out, ", ");
|
|
881
|
+
}
|
|
882
|
+
if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data1").find(HSA_structs_regex) != std::string::npos) {
|
|
883
|
+
std::operator<<(out, "perf_snapshot_data1=");
|
|
884
|
+
roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data1);
|
|
885
|
+
std::operator<<(out, ", ");
|
|
886
|
+
}
|
|
887
|
+
if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data").find(HSA_structs_regex) != std::string::npos) {
|
|
888
|
+
std::operator<<(out, "perf_snapshot_data=");
|
|
889
|
+
roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data);
|
|
890
|
+
std::operator<<(out, ", ");
|
|
891
|
+
}
|
|
892
|
+
if (std::string("perf_sample_snapshot_v1_t::hw_id").find(HSA_structs_regex) != std::string::npos) {
|
|
893
|
+
std::operator<<(out, "hw_id=");
|
|
894
|
+
roctracer::hsa_support::detail::operator<<(out, v.hw_id);
|
|
895
|
+
std::operator<<(out, ", ");
|
|
896
|
+
}
|
|
897
|
+
if (std::string("perf_sample_snapshot_v1_t::reserved").find(HSA_structs_regex) != std::string::npos) {
|
|
898
|
+
std::operator<<(out, "reserved=");
|
|
899
|
+
roctracer::hsa_support::detail::operator<<(out, v.reserved);
|
|
900
|
+
std::operator<<(out, ", ");
|
|
901
|
+
}
|
|
902
|
+
if (std::string("perf_sample_snapshot_v1_t::chiplet").find(HSA_structs_regex) != std::string::npos) {
|
|
903
|
+
std::operator<<(out, "chiplet=");
|
|
904
|
+
roctracer::hsa_support::detail::operator<<(out, v.chiplet);
|
|
905
|
+
std::operator<<(out, ", ");
|
|
906
|
+
}
|
|
907
|
+
if (std::string("perf_sample_snapshot_v1_t::wave_in_wg").find(HSA_structs_regex) != std::string::npos) {
|
|
908
|
+
std::operator<<(out, "wave_in_wg=");
|
|
909
|
+
roctracer::hsa_support::detail::operator<<(out, v.wave_in_wg);
|
|
910
|
+
std::operator<<(out, ", ");
|
|
911
|
+
}
|
|
912
|
+
if (std::string("perf_sample_snapshot_v1_t::workgroup_id_z").find(HSA_structs_regex) != std::string::npos) {
|
|
913
|
+
std::operator<<(out, "workgroup_id_z=");
|
|
914
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_z);
|
|
915
|
+
std::operator<<(out, ", ");
|
|
916
|
+
}
|
|
917
|
+
if (std::string("perf_sample_snapshot_v1_t::workgroup_id_y").find(HSA_structs_regex) != std::string::npos) {
|
|
918
|
+
std::operator<<(out, "workgroup_id_y=");
|
|
919
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_y);
|
|
920
|
+
std::operator<<(out, ", ");
|
|
921
|
+
}
|
|
922
|
+
if (std::string("perf_sample_snapshot_v1_t::workgroup_id_x").find(HSA_structs_regex) != std::string::npos) {
|
|
923
|
+
std::operator<<(out, "workgroup_id_x=");
|
|
924
|
+
roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_x);
|
|
925
|
+
std::operator<<(out, ", ");
|
|
926
|
+
}
|
|
927
|
+
if (std::string("perf_sample_snapshot_v1_t::exec_mask").find(HSA_structs_regex) != std::string::npos) {
|
|
928
|
+
std::operator<<(out, "exec_mask=");
|
|
929
|
+
roctracer::hsa_support::detail::operator<<(out, v.exec_mask);
|
|
930
|
+
std::operator<<(out, ", ");
|
|
931
|
+
}
|
|
932
|
+
if (std::string("perf_sample_snapshot_v1_t::pc").find(HSA_structs_regex) != std::string::npos) {
|
|
933
|
+
std::operator<<(out, "pc=");
|
|
934
|
+
roctracer::hsa_support::detail::operator<<(out, v.pc);
|
|
935
|
+
}
|
|
936
|
+
};
|
|
937
|
+
HSA_depth_max_cnt--;
|
|
938
|
+
std::operator<<(out, '}');
|
|
939
|
+
return out;
|
|
940
|
+
}
|
|
941
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_t& v)
|
|
942
|
+
{
|
|
943
|
+
std::operator<<(out, '{');
|
|
944
|
+
HSA_depth_max_cnt++;
|
|
945
|
+
if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
|
|
946
|
+
if (std::string("hsa_ven_amd_pcs_t::handle").find(HSA_structs_regex) != std::string::npos) {
|
|
947
|
+
std::operator<<(out, "handle=");
|
|
948
|
+
roctracer::hsa_support::detail::operator<<(out, v.handle);
|
|
949
|
+
}
|
|
950
|
+
};
|
|
951
|
+
HSA_depth_max_cnt--;
|
|
952
|
+
std::operator<<(out, '}');
|
|
953
|
+
return out;
|
|
954
|
+
}
|
|
955
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_configuration_t& v)
|
|
956
|
+
{
|
|
957
|
+
std::operator<<(out, '{');
|
|
958
|
+
HSA_depth_max_cnt++;
|
|
959
|
+
if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
|
|
960
|
+
if (std::string("hsa_ven_amd_pcs_configuration_t::flags").find(HSA_structs_regex) != std::string::npos) {
|
|
961
|
+
std::operator<<(out, "flags=");
|
|
962
|
+
roctracer::hsa_support::detail::operator<<(out, v.flags);
|
|
963
|
+
std::operator<<(out, ", ");
|
|
964
|
+
}
|
|
965
|
+
if (std::string("hsa_ven_amd_pcs_configuration_t::max_interval").find(HSA_structs_regex) != std::string::npos) {
|
|
966
|
+
std::operator<<(out, "max_interval=");
|
|
967
|
+
roctracer::hsa_support::detail::operator<<(out, v.max_interval);
|
|
968
|
+
std::operator<<(out, ", ");
|
|
969
|
+
}
|
|
970
|
+
if (std::string("hsa_ven_amd_pcs_configuration_t::min_interval").find(HSA_structs_regex) != std::string::npos) {
|
|
971
|
+
std::operator<<(out, "min_interval=");
|
|
972
|
+
roctracer::hsa_support::detail::operator<<(out, v.min_interval);
|
|
973
|
+
std::operator<<(out, ", ");
|
|
974
|
+
}
|
|
975
|
+
if (std::string("hsa_ven_amd_pcs_configuration_t::units").find(HSA_structs_regex) != std::string::npos) {
|
|
976
|
+
std::operator<<(out, "units=");
|
|
977
|
+
roctracer::hsa_support::detail::operator<<(out, v.units);
|
|
978
|
+
std::operator<<(out, ", ");
|
|
979
|
+
}
|
|
980
|
+
if (std::string("hsa_ven_amd_pcs_configuration_t::method").find(HSA_structs_regex) != std::string::npos) {
|
|
981
|
+
std::operator<<(out, "method=");
|
|
982
|
+
roctracer::hsa_support::detail::operator<<(out, v.method);
|
|
983
|
+
}
|
|
984
|
+
};
|
|
985
|
+
HSA_depth_max_cnt--;
|
|
986
|
+
std::operator<<(out, '}');
|
|
987
|
+
return out;
|
|
988
|
+
}
|
|
989
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pc_sampling_1_00_pfn_t& v)
|
|
990
|
+
{
|
|
991
|
+
std::operator<<(out, '{');
|
|
992
|
+
HSA_depth_max_cnt++;
|
|
993
|
+
if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
|
|
994
|
+
if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_flush").find(HSA_structs_regex) != std::string::npos) {
|
|
995
|
+
std::operator<<(out, "hsa_ven_amd_pcs_flush=");
|
|
996
|
+
roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_flush);
|
|
997
|
+
std::operator<<(out, ", ");
|
|
998
|
+
}
|
|
999
|
+
if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_stop").find(HSA_structs_regex) != std::string::npos) {
|
|
1000
|
+
std::operator<<(out, "hsa_ven_amd_pcs_stop=");
|
|
1001
|
+
roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_stop);
|
|
1002
|
+
std::operator<<(out, ", ");
|
|
1003
|
+
}
|
|
1004
|
+
if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_start").find(HSA_structs_regex) != std::string::npos) {
|
|
1005
|
+
std::operator<<(out, "hsa_ven_amd_pcs_start=");
|
|
1006
|
+
roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_start);
|
|
1007
|
+
std::operator<<(out, ", ");
|
|
1008
|
+
}
|
|
1009
|
+
if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_destroy").find(HSA_structs_regex) != std::string::npos) {
|
|
1010
|
+
std::operator<<(out, "hsa_ven_amd_pcs_destroy=");
|
|
1011
|
+
roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_destroy);
|
|
1012
|
+
}
|
|
1013
|
+
};
|
|
1014
|
+
HSA_depth_max_cnt--;
|
|
1015
|
+
std::operator<<(out, '}');
|
|
1016
|
+
return out;
|
|
1017
|
+
}
|
|
788
1018
|
inline static std::ostream& operator<<(std::ostream& out, const hsa_amd_vendor_packet_header_t& v)
|
|
789
1019
|
{
|
|
790
1020
|
std::operator<<(out, '{');
|
|
@@ -1360,6 +1590,36 @@ inline static std::ostream& operator<<(std::ostream& out, const hsa_ext_images_1
|
|
|
1360
1590
|
return out;
|
|
1361
1591
|
}
|
|
1362
1592
|
|
|
1593
|
+
inline static std::ostream& operator<<(std::ostream& out, const perf_sample_hosttrap_v1_t& v)
|
|
1594
|
+
{
|
|
1595
|
+
roctracer::hsa_support::detail::operator<<(out, v);
|
|
1596
|
+
return out;
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
inline static std::ostream& operator<<(std::ostream& out, const perf_sample_snapshot_v1_t& v)
|
|
1600
|
+
{
|
|
1601
|
+
roctracer::hsa_support::detail::operator<<(out, v);
|
|
1602
|
+
return out;
|
|
1603
|
+
}
|
|
1604
|
+
|
|
1605
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_t& v)
|
|
1606
|
+
{
|
|
1607
|
+
roctracer::hsa_support::detail::operator<<(out, v);
|
|
1608
|
+
return out;
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_configuration_t& v)
|
|
1612
|
+
{
|
|
1613
|
+
roctracer::hsa_support::detail::operator<<(out, v);
|
|
1614
|
+
return out;
|
|
1615
|
+
}
|
|
1616
|
+
|
|
1617
|
+
inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pc_sampling_1_00_pfn_t& v)
|
|
1618
|
+
{
|
|
1619
|
+
roctracer::hsa_support::detail::operator<<(out, v);
|
|
1620
|
+
return out;
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1363
1623
|
inline static std::ostream& operator<<(std::ostream& out, const hsa_amd_vendor_packet_header_t& v)
|
|
1364
1624
|
{
|
|
1365
1625
|
roctracer::hsa_support::detail::operator<<(out, v);
|
|
@@ -22,9 +22,9 @@
|
|
|
22
22
|
|
|
23
23
|
/* HSA API tracing primitives
|
|
24
24
|
'CoreApi', header 'hsa.h', 125 funcs
|
|
25
|
-
'AmdExt', header 'hsa_ext_amd.h',
|
|
25
|
+
'AmdExt', header 'hsa_ext_amd.h', 70 funcs
|
|
26
26
|
'ImageExt', header 'hsa_ext_image.h', 13 funcs
|
|
27
|
-
'AmdExt', header 'hsa_api_trace.h',
|
|
27
|
+
'AmdExt', header 'hsa_api_trace.h', 70 funcs
|
|
28
28
|
*/
|
|
29
29
|
|
|
30
30
|
#ifndef HSA_PROF_STR_H_
|
|
@@ -229,24 +229,26 @@ enum hsa_api_id_t {
|
|
|
229
229
|
HSA_API_ID_hsa_amd_vmem_retain_alloc_handle = 190,
|
|
230
230
|
HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle = 191,
|
|
231
231
|
HSA_API_ID_hsa_amd_agent_set_async_scratch_limit = 192,
|
|
232
|
+
HSA_API_ID_hsa_amd_queue_get_info = 193,
|
|
233
|
+
HSA_API_ID_hsa_amd_vmem_address_reserve_align = 194,
|
|
232
234
|
|
|
233
235
|
/* block: ImageExt API */
|
|
234
|
-
HSA_API_ID_hsa_ext_image_get_capability =
|
|
235
|
-
HSA_API_ID_hsa_ext_image_data_get_info =
|
|
236
|
-
HSA_API_ID_hsa_ext_image_create =
|
|
237
|
-
HSA_API_ID_hsa_ext_image_import =
|
|
238
|
-
HSA_API_ID_hsa_ext_image_export =
|
|
239
|
-
HSA_API_ID_hsa_ext_image_copy =
|
|
240
|
-
HSA_API_ID_hsa_ext_image_clear =
|
|
241
|
-
HSA_API_ID_hsa_ext_image_destroy =
|
|
242
|
-
HSA_API_ID_hsa_ext_sampler_create =
|
|
243
|
-
HSA_API_ID_hsa_ext_sampler_destroy =
|
|
244
|
-
HSA_API_ID_hsa_ext_image_get_capability_with_layout =
|
|
245
|
-
HSA_API_ID_hsa_ext_image_data_get_info_with_layout =
|
|
246
|
-
HSA_API_ID_hsa_ext_image_create_with_layout =
|
|
236
|
+
HSA_API_ID_hsa_ext_image_get_capability = 195,
|
|
237
|
+
HSA_API_ID_hsa_ext_image_data_get_info = 196,
|
|
238
|
+
HSA_API_ID_hsa_ext_image_create = 197,
|
|
239
|
+
HSA_API_ID_hsa_ext_image_import = 198,
|
|
240
|
+
HSA_API_ID_hsa_ext_image_export = 199,
|
|
241
|
+
HSA_API_ID_hsa_ext_image_copy = 200,
|
|
242
|
+
HSA_API_ID_hsa_ext_image_clear = 201,
|
|
243
|
+
HSA_API_ID_hsa_ext_image_destroy = 202,
|
|
244
|
+
HSA_API_ID_hsa_ext_sampler_create = 203,
|
|
245
|
+
HSA_API_ID_hsa_ext_sampler_destroy = 204,
|
|
246
|
+
HSA_API_ID_hsa_ext_image_get_capability_with_layout = 205,
|
|
247
|
+
HSA_API_ID_hsa_ext_image_data_get_info_with_layout = 206,
|
|
248
|
+
HSA_API_ID_hsa_ext_image_create_with_layout = 207,
|
|
247
249
|
|
|
248
|
-
HSA_API_ID_DISPATCH =
|
|
249
|
-
HSA_API_ID_NUMBER =
|
|
250
|
+
HSA_API_ID_DISPATCH = 208,
|
|
251
|
+
HSA_API_ID_NUMBER = 209,
|
|
250
252
|
};
|
|
251
253
|
/* Declarations of APIs intended for use only by tools. */
|
|
252
254
|
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);
|
|
@@ -261,9 +263,9 @@ struct hsa_api_data_t {
|
|
|
261
263
|
uint32_t phase;
|
|
262
264
|
union {
|
|
263
265
|
uint64_t uint64_t_retval;
|
|
264
|
-
uint32_t uint32_t_retval;
|
|
265
|
-
hsa_signal_value_t hsa_signal_value_t_retval;
|
|
266
266
|
hsa_status_t hsa_status_t_retval;
|
|
267
|
+
hsa_signal_value_t hsa_signal_value_t_retval;
|
|
268
|
+
uint32_t uint32_t_retval;
|
|
267
269
|
};
|
|
268
270
|
union {
|
|
269
271
|
/* block: CoreApi API */
|
|
@@ -1236,6 +1238,18 @@ struct hsa_api_data_t {
|
|
|
1236
1238
|
hsa_agent_t agent;
|
|
1237
1239
|
size_t threshold;
|
|
1238
1240
|
} hsa_amd_agent_set_async_scratch_limit;
|
|
1241
|
+
struct {
|
|
1242
|
+
hsa_queue_t* queue;
|
|
1243
|
+
hsa_queue_info_attribute_t attribute;
|
|
1244
|
+
void* value;
|
|
1245
|
+
} hsa_amd_queue_get_info;
|
|
1246
|
+
struct {
|
|
1247
|
+
void** va;
|
|
1248
|
+
size_t size;
|
|
1249
|
+
uint64_t address;
|
|
1250
|
+
uint64_t alignment;
|
|
1251
|
+
uint64_t flags;
|
|
1252
|
+
} hsa_amd_vmem_address_reserve_align;
|
|
1239
1253
|
|
|
1240
1254
|
/* block: ImageExt API */
|
|
1241
1255
|
struct {
|
|
@@ -2888,6 +2902,24 @@ inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& d
|
|
|
2888
2902
|
out << ") = " << api_data.hsa_status_t_retval;
|
|
2889
2903
|
break;
|
|
2890
2904
|
}
|
|
2905
|
+
case HSA_API_ID_hsa_amd_queue_get_info: {
|
|
2906
|
+
out << "hsa_amd_queue_get_info(";
|
|
2907
|
+
out << api_data.args.hsa_amd_queue_get_info.queue << ", ";
|
|
2908
|
+
out << api_data.args.hsa_amd_queue_get_info.attribute << ", ";
|
|
2909
|
+
out << api_data.args.hsa_amd_queue_get_info.value;
|
|
2910
|
+
out << ") = " << api_data.hsa_status_t_retval;
|
|
2911
|
+
break;
|
|
2912
|
+
}
|
|
2913
|
+
case HSA_API_ID_hsa_amd_vmem_address_reserve_align: {
|
|
2914
|
+
out << "hsa_amd_vmem_address_reserve_align(";
|
|
2915
|
+
out << api_data.args.hsa_amd_vmem_address_reserve_align.va << ", ";
|
|
2916
|
+
out << api_data.args.hsa_amd_vmem_address_reserve_align.size << ", ";
|
|
2917
|
+
out << api_data.args.hsa_amd_vmem_address_reserve_align.address << ", ";
|
|
2918
|
+
out << api_data.args.hsa_amd_vmem_address_reserve_align.alignment << ", ";
|
|
2919
|
+
out << api_data.args.hsa_amd_vmem_address_reserve_align.flags;
|
|
2920
|
+
out << ") = " << api_data.hsa_status_t_retval;
|
|
2921
|
+
break;
|
|
2922
|
+
}
|
|
2891
2923
|
|
|
2892
2924
|
/* block: ImageExt API */
|
|
2893
2925
|
case HSA_API_ID_hsa_ext_image_get_capability: {
|
|
Binary file
|