triton-windows 3.2.0.post12__cp313-cp313-win_amd64.whl → 3.3.0a0.post12__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of triton-windows might be problematic. Click here for more details.

Files changed (68) hide show
  1. triton/_C/libtriton.pyd +0 -0
  2. triton/__init__.py +3 -3
  3. triton/_internal_testing.py +59 -4
  4. triton/_utils.py +35 -0
  5. triton/backends/amd/compiler.py +121 -74
  6. triton/backends/amd/driver.py +77 -43
  7. triton/backends/amd/include/hip/amd_detail/amd_device_functions.h +28 -49
  8. triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h +35 -9
  9. triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h +761 -284
  10. triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h +9 -3
  11. triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h +1391 -0
  12. triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h +3 -3
  13. triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h +44 -0
  14. triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h +288 -0
  15. triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp +110 -14
  16. triton/backends/amd/include/hip/amd_detail/hip_prof_str.h +504 -103
  17. triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h +2 -1
  18. triton/backends/amd/include/hip/amd_detail/host_defines.h +4 -0
  19. triton/backends/amd/include/hip/hip_ext.h +4 -2
  20. triton/backends/amd/include/hip/hip_fp8.h +33 -0
  21. triton/backends/amd/include/hip/hip_runtime_api.h +375 -33
  22. triton/backends/amd/include/hip/hip_version.h +3 -3
  23. triton/backends/amd/include/hip/hiprtc.h +25 -25
  24. triton/backends/amd/include/hsa/amd_hsa_elf.h +40 -14
  25. triton/backends/amd/include/hsa/hsa.h +11 -2
  26. triton/backends/amd/include/hsa/hsa_api_trace.h +30 -17
  27. triton/backends/amd/include/hsa/hsa_api_trace_version.h +68 -0
  28. triton/backends/amd/include/hsa/hsa_ext_amd.h +83 -27
  29. triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h +46 -46
  30. triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h +416 -0
  31. triton/backends/amd/include/roctracer/hip_ostream_ops.h +84 -4
  32. triton/backends/amd/include/roctracer/hsa_ostream_ops.h +260 -0
  33. triton/backends/amd/include/roctracer/hsa_prof_str.h +51 -19
  34. triton/backends/amd/lib/asanrtl.bc +0 -0
  35. triton/backends/compiler.py +25 -225
  36. triton/backends/driver.py +7 -2
  37. triton/backends/nvidia/bin/ptxas.exe +0 -0
  38. triton/backends/nvidia/compiler.py +135 -90
  39. triton/backends/nvidia/driver.c +0 -1
  40. triton/backends/nvidia/driver.py +135 -49
  41. triton/backends/nvidia/include/cuda.h +2162 -241
  42. triton/backends/nvidia/lib/x64/cuda.lib +0 -0
  43. triton/compiler/__init__.py +2 -2
  44. triton/compiler/code_generator.py +334 -231
  45. triton/compiler/compiler.py +77 -66
  46. triton/language/__init__.py +22 -5
  47. triton/language/core.py +448 -74
  48. triton/language/extra/cuda/_experimental_tma.py +3 -5
  49. triton/language/math.py +1 -1
  50. triton/language/random.py +2 -1
  51. triton/language/semantic.py +206 -52
  52. triton/language/standard.py +35 -18
  53. triton/runtime/_allocation.py +32 -0
  54. triton/runtime/autotuner.py +27 -32
  55. triton/runtime/build.py +1 -48
  56. triton/runtime/cache.py +6 -6
  57. triton/runtime/errors.py +10 -0
  58. triton/runtime/interpreter.py +179 -45
  59. triton/runtime/jit.py +149 -190
  60. triton/testing.py +39 -11
  61. triton/tools/compile.py +27 -20
  62. triton/tools/{compile.c → extra/cuda/compile.c} +1 -0
  63. triton/tools/mxfp.py +301 -0
  64. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/METADATA +5 -2
  65. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/RECORD +68 -59
  66. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/top_level.txt +2 -0
  67. /triton/tools/{compile.h → extra/cuda/compile.h} +0 -0
  68. {triton_windows-3.2.0.post12.dist-info → triton_windows-3.3.0a0.post12.dist-info}/WHEEL +0 -0
@@ -2795,6 +2795,11 @@ inline static std::ostream& operator<<(std::ostream& out, const hipMemPoolProps&
2795
2795
  roctracer::hip_support::detail::operator<<(out, 0);
2796
2796
  std::operator<<(out, ", ");
2797
2797
  }
2798
+ if (std::string("hipMemPoolProps::maxSize").find(HIP_structs_regex) != std::string::npos) {
2799
+ std::operator<<(out, "maxSize=");
2800
+ roctracer::hip_support::detail::operator<<(out, v.maxSize);
2801
+ std::operator<<(out, ", ");
2802
+ }
2798
2803
  if (std::string("hipMemPoolProps::location").find(HIP_structs_regex) != std::string::npos) {
2799
2804
  std::operator<<(out, "location=");
2800
2805
  roctracer::hip_support::detail::operator<<(out, v.location);
@@ -3229,17 +3234,22 @@ inline static std::ostream& operator<<(std::ostream& out, const hipAccessPolicyW
3229
3234
  std::operator<<(out, '}');
3230
3235
  return out;
3231
3236
  }
3232
- inline static std::ostream& operator<<(std::ostream& out, const hipKernelNodeAttrValue& v)
3237
+ inline static std::ostream& operator<<(std::ostream& out, const hipLaunchAttributeValue& v)
3233
3238
  {
3234
3239
  std::operator<<(out, '{');
3235
3240
  HIP_depth_max_cnt++;
3236
3241
  if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
3237
- if (std::string("hipKernelNodeAttrValue::cooperative").find(HIP_structs_regex) != std::string::npos) {
3242
+ if (std::string("hipLaunchAttributeValue::priority").find(HIP_structs_regex) != std::string::npos) {
3243
+ std::operator<<(out, "priority=");
3244
+ roctracer::hip_support::detail::operator<<(out, v.priority);
3245
+ std::operator<<(out, ", ");
3246
+ }
3247
+ if (std::string("hipLaunchAttributeValue::cooperative").find(HIP_structs_regex) != std::string::npos) {
3238
3248
  std::operator<<(out, "cooperative=");
3239
3249
  roctracer::hip_support::detail::operator<<(out, v.cooperative);
3240
3250
  std::operator<<(out, ", ");
3241
3251
  }
3242
- if (std::string("hipKernelNodeAttrValue::accessPolicyWindow").find(HIP_structs_regex) != std::string::npos) {
3252
+ if (std::string("hipLaunchAttributeValue::accessPolicyWindow").find(HIP_structs_regex) != std::string::npos) {
3243
3253
  std::operator<<(out, "accessPolicyWindow=");
3244
3254
  roctracer::hip_support::detail::operator<<(out, v.accessPolicyWindow);
3245
3255
  }
@@ -3287,6 +3297,35 @@ inline static std::ostream& operator<<(std::ostream& out, const HIP_MEMSET_NODE_
3287
3297
  std::operator<<(out, '}');
3288
3298
  return out;
3289
3299
  }
3300
+ inline static std::ostream& operator<<(std::ostream& out, const hipGraphInstantiateParams& v)
3301
+ {
3302
+ std::operator<<(out, '{');
3303
+ HIP_depth_max_cnt++;
3304
+ if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
3305
+ if (std::string("hipGraphInstantiateParams::uploadStream").find(HIP_structs_regex) != std::string::npos) {
3306
+ std::operator<<(out, "uploadStream=");
3307
+ roctracer::hip_support::detail::operator<<(out, v.uploadStream);
3308
+ std::operator<<(out, ", ");
3309
+ }
3310
+ if (std::string("hipGraphInstantiateParams::result_out").find(HIP_structs_regex) != std::string::npos) {
3311
+ std::operator<<(out, "result_out=");
3312
+ roctracer::hip_support::detail::operator<<(out, v.result_out);
3313
+ std::operator<<(out, ", ");
3314
+ }
3315
+ if (std::string("hipGraphInstantiateParams::flags").find(HIP_structs_regex) != std::string::npos) {
3316
+ std::operator<<(out, "flags=");
3317
+ roctracer::hip_support::detail::operator<<(out, v.flags);
3318
+ std::operator<<(out, ", ");
3319
+ }
3320
+ if (std::string("hipGraphInstantiateParams::errNode_out").find(HIP_structs_regex) != std::string::npos) {
3321
+ std::operator<<(out, "errNode_out=");
3322
+ roctracer::hip_support::detail::operator<<(out, v.errNode_out);
3323
+ }
3324
+ };
3325
+ HIP_depth_max_cnt--;
3326
+ std::operator<<(out, '}');
3327
+ return out;
3328
+ }
3290
3329
  inline static std::ostream& operator<<(std::ostream& out, const hipMemAllocationProp& v)
3291
3330
  {
3292
3331
  std::operator<<(out, '{');
@@ -3513,6 +3552,35 @@ inline static std::ostream& operator<<(std::ostream& out, const hipGraphNodePara
3513
3552
  std::operator<<(out, '}');
3514
3553
  return out;
3515
3554
  }
3555
+ inline static std::ostream& operator<<(std::ostream& out, const hipGraphEdgeData& v)
3556
+ {
3557
+ std::operator<<(out, '{');
3558
+ HIP_depth_max_cnt++;
3559
+ if (HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) {
3560
+ if (std::string("hipGraphEdgeData::type").find(HIP_structs_regex) != std::string::npos) {
3561
+ std::operator<<(out, "type=");
3562
+ roctracer::hip_support::detail::operator<<(out, v.type);
3563
+ std::operator<<(out, ", ");
3564
+ }
3565
+ if (std::string("hipGraphEdgeData::to_port").find(HIP_structs_regex) != std::string::npos) {
3566
+ std::operator<<(out, "to_port=");
3567
+ roctracer::hip_support::detail::operator<<(out, v.to_port);
3568
+ std::operator<<(out, ", ");
3569
+ }
3570
+ if (std::string("hipGraphEdgeData::reserved").find(HIP_structs_regex) != std::string::npos) {
3571
+ std::operator<<(out, "reserved=");
3572
+ roctracer::hip_support::detail::operator<<(out, 0);
3573
+ std::operator<<(out, ", ");
3574
+ }
3575
+ if (std::string("hipGraphEdgeData::from_port").find(HIP_structs_regex) != std::string::npos) {
3576
+ std::operator<<(out, "from_port=");
3577
+ roctracer::hip_support::detail::operator<<(out, v.from_port);
3578
+ }
3579
+ };
3580
+ HIP_depth_max_cnt--;
3581
+ std::operator<<(out, '}');
3582
+ return out;
3583
+ }
3516
3584
  inline static std::ostream& operator<<(std::ostream& out, const hipDeviceProp_tR0000& v)
3517
3585
  {
3518
3586
  std::operator<<(out, '{');
@@ -4352,7 +4420,7 @@ inline static std::ostream& operator<<(std::ostream& out, const hipAccessPolicyW
4352
4420
  return out;
4353
4421
  }
4354
4422
 
4355
- inline static std::ostream& operator<<(std::ostream& out, const hipKernelNodeAttrValue& v)
4423
+ inline static std::ostream& operator<<(std::ostream& out, const hipLaunchAttributeValue& v)
4356
4424
  {
4357
4425
  roctracer::hip_support::detail::operator<<(out, v);
4358
4426
  return out;
@@ -4364,6 +4432,12 @@ inline static std::ostream& operator<<(std::ostream& out, const HIP_MEMSET_NODE_
4364
4432
  return out;
4365
4433
  }
4366
4434
 
4435
+ inline static std::ostream& operator<<(std::ostream& out, const hipGraphInstantiateParams& v)
4436
+ {
4437
+ roctracer::hip_support::detail::operator<<(out, v);
4438
+ return out;
4439
+ }
4440
+
4367
4441
  inline static std::ostream& operator<<(std::ostream& out, const hipMemAllocationProp& v)
4368
4442
  {
4369
4443
  roctracer::hip_support::detail::operator<<(out, v);
@@ -4424,6 +4498,12 @@ inline static std::ostream& operator<<(std::ostream& out, const hipGraphNodePara
4424
4498
  return out;
4425
4499
  }
4426
4500
 
4501
+ inline static std::ostream& operator<<(std::ostream& out, const hipGraphEdgeData& v)
4502
+ {
4503
+ roctracer::hip_support::detail::operator<<(out, v);
4504
+ return out;
4505
+ }
4506
+
4427
4507
  inline static std::ostream& operator<<(std::ostream& out, const hipDeviceProp_tR0000& v)
4428
4508
  {
4429
4509
  roctracer::hip_support::detail::operator<<(out, v);
@@ -785,6 +785,236 @@ inline static std::ostream& operator<<(std::ostream& out, const hsa_ext_images_1
785
785
  std::operator<<(out, '}');
786
786
  return out;
787
787
  }
788
+ inline static std::ostream& operator<<(std::ostream& out, const perf_sample_hosttrap_v1_t& v)
789
+ {
790
+ std::operator<<(out, '{');
791
+ HSA_depth_max_cnt++;
792
+ if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
793
+ if (std::string("perf_sample_hosttrap_v1_t::correlation_id").find(HSA_structs_regex) != std::string::npos) {
794
+ std::operator<<(out, "correlation_id=");
795
+ roctracer::hsa_support::detail::operator<<(out, v.correlation_id);
796
+ std::operator<<(out, ", ");
797
+ }
798
+ if (std::string("perf_sample_hosttrap_v1_t::timestamp").find(HSA_structs_regex) != std::string::npos) {
799
+ std::operator<<(out, "timestamp=");
800
+ roctracer::hsa_support::detail::operator<<(out, v.timestamp);
801
+ std::operator<<(out, ", ");
802
+ }
803
+ if (std::string("perf_sample_hosttrap_v1_t::reserved1").find(HSA_structs_regex) != std::string::npos) {
804
+ std::operator<<(out, "reserved1=");
805
+ roctracer::hsa_support::detail::operator<<(out, v.reserved1);
806
+ std::operator<<(out, ", ");
807
+ }
808
+ if (std::string("perf_sample_hosttrap_v1_t::reserved0").find(HSA_structs_regex) != std::string::npos) {
809
+ std::operator<<(out, "reserved0=");
810
+ roctracer::hsa_support::detail::operator<<(out, v.reserved0);
811
+ std::operator<<(out, ", ");
812
+ }
813
+ if (std::string("perf_sample_hosttrap_v1_t::hw_id").find(HSA_structs_regex) != std::string::npos) {
814
+ std::operator<<(out, "hw_id=");
815
+ roctracer::hsa_support::detail::operator<<(out, v.hw_id);
816
+ std::operator<<(out, ", ");
817
+ }
818
+ if (std::string("perf_sample_hosttrap_v1_t::reserved").find(HSA_structs_regex) != std::string::npos) {
819
+ std::operator<<(out, "reserved=");
820
+ roctracer::hsa_support::detail::operator<<(out, v.reserved);
821
+ std::operator<<(out, ", ");
822
+ }
823
+ if (std::string("perf_sample_hosttrap_v1_t::chiplet").find(HSA_structs_regex) != std::string::npos) {
824
+ std::operator<<(out, "chiplet=");
825
+ roctracer::hsa_support::detail::operator<<(out, v.chiplet);
826
+ std::operator<<(out, ", ");
827
+ }
828
+ if (std::string("perf_sample_hosttrap_v1_t::wave_in_wg").find(HSA_structs_regex) != std::string::npos) {
829
+ std::operator<<(out, "wave_in_wg=");
830
+ roctracer::hsa_support::detail::operator<<(out, v.wave_in_wg);
831
+ std::operator<<(out, ", ");
832
+ }
833
+ if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_z").find(HSA_structs_regex) != std::string::npos) {
834
+ std::operator<<(out, "workgroup_id_z=");
835
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_z);
836
+ std::operator<<(out, ", ");
837
+ }
838
+ if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_y").find(HSA_structs_regex) != std::string::npos) {
839
+ std::operator<<(out, "workgroup_id_y=");
840
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_y);
841
+ std::operator<<(out, ", ");
842
+ }
843
+ if (std::string("perf_sample_hosttrap_v1_t::workgroup_id_x").find(HSA_structs_regex) != std::string::npos) {
844
+ std::operator<<(out, "workgroup_id_x=");
845
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_x);
846
+ std::operator<<(out, ", ");
847
+ }
848
+ if (std::string("perf_sample_hosttrap_v1_t::exec_mask").find(HSA_structs_regex) != std::string::npos) {
849
+ std::operator<<(out, "exec_mask=");
850
+ roctracer::hsa_support::detail::operator<<(out, v.exec_mask);
851
+ std::operator<<(out, ", ");
852
+ }
853
+ if (std::string("perf_sample_hosttrap_v1_t::pc").find(HSA_structs_regex) != std::string::npos) {
854
+ std::operator<<(out, "pc=");
855
+ roctracer::hsa_support::detail::operator<<(out, v.pc);
856
+ }
857
+ };
858
+ HSA_depth_max_cnt--;
859
+ std::operator<<(out, '}');
860
+ return out;
861
+ }
862
+ inline static std::ostream& operator<<(std::ostream& out, const perf_sample_snapshot_v1_t& v)
863
+ {
864
+ std::operator<<(out, '{');
865
+ HSA_depth_max_cnt++;
866
+ if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
867
+ if (std::string("perf_sample_snapshot_v1_t::correlation_id").find(HSA_structs_regex) != std::string::npos) {
868
+ std::operator<<(out, "correlation_id=");
869
+ roctracer::hsa_support::detail::operator<<(out, v.correlation_id);
870
+ std::operator<<(out, ", ");
871
+ }
872
+ if (std::string("perf_sample_snapshot_v1_t::timestamp").find(HSA_structs_regex) != std::string::npos) {
873
+ std::operator<<(out, "timestamp=");
874
+ roctracer::hsa_support::detail::operator<<(out, v.timestamp);
875
+ std::operator<<(out, ", ");
876
+ }
877
+ if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data2").find(HSA_structs_regex) != std::string::npos) {
878
+ std::operator<<(out, "perf_snapshot_data2=");
879
+ roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data2);
880
+ std::operator<<(out, ", ");
881
+ }
882
+ if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data1").find(HSA_structs_regex) != std::string::npos) {
883
+ std::operator<<(out, "perf_snapshot_data1=");
884
+ roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data1);
885
+ std::operator<<(out, ", ");
886
+ }
887
+ if (std::string("perf_sample_snapshot_v1_t::perf_snapshot_data").find(HSA_structs_regex) != std::string::npos) {
888
+ std::operator<<(out, "perf_snapshot_data=");
889
+ roctracer::hsa_support::detail::operator<<(out, v.perf_snapshot_data);
890
+ std::operator<<(out, ", ");
891
+ }
892
+ if (std::string("perf_sample_snapshot_v1_t::hw_id").find(HSA_structs_regex) != std::string::npos) {
893
+ std::operator<<(out, "hw_id=");
894
+ roctracer::hsa_support::detail::operator<<(out, v.hw_id);
895
+ std::operator<<(out, ", ");
896
+ }
897
+ if (std::string("perf_sample_snapshot_v1_t::reserved").find(HSA_structs_regex) != std::string::npos) {
898
+ std::operator<<(out, "reserved=");
899
+ roctracer::hsa_support::detail::operator<<(out, v.reserved);
900
+ std::operator<<(out, ", ");
901
+ }
902
+ if (std::string("perf_sample_snapshot_v1_t::chiplet").find(HSA_structs_regex) != std::string::npos) {
903
+ std::operator<<(out, "chiplet=");
904
+ roctracer::hsa_support::detail::operator<<(out, v.chiplet);
905
+ std::operator<<(out, ", ");
906
+ }
907
+ if (std::string("perf_sample_snapshot_v1_t::wave_in_wg").find(HSA_structs_regex) != std::string::npos) {
908
+ std::operator<<(out, "wave_in_wg=");
909
+ roctracer::hsa_support::detail::operator<<(out, v.wave_in_wg);
910
+ std::operator<<(out, ", ");
911
+ }
912
+ if (std::string("perf_sample_snapshot_v1_t::workgroup_id_z").find(HSA_structs_regex) != std::string::npos) {
913
+ std::operator<<(out, "workgroup_id_z=");
914
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_z);
915
+ std::operator<<(out, ", ");
916
+ }
917
+ if (std::string("perf_sample_snapshot_v1_t::workgroup_id_y").find(HSA_structs_regex) != std::string::npos) {
918
+ std::operator<<(out, "workgroup_id_y=");
919
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_y);
920
+ std::operator<<(out, ", ");
921
+ }
922
+ if (std::string("perf_sample_snapshot_v1_t::workgroup_id_x").find(HSA_structs_regex) != std::string::npos) {
923
+ std::operator<<(out, "workgroup_id_x=");
924
+ roctracer::hsa_support::detail::operator<<(out, v.workgroup_id_x);
925
+ std::operator<<(out, ", ");
926
+ }
927
+ if (std::string("perf_sample_snapshot_v1_t::exec_mask").find(HSA_structs_regex) != std::string::npos) {
928
+ std::operator<<(out, "exec_mask=");
929
+ roctracer::hsa_support::detail::operator<<(out, v.exec_mask);
930
+ std::operator<<(out, ", ");
931
+ }
932
+ if (std::string("perf_sample_snapshot_v1_t::pc").find(HSA_structs_regex) != std::string::npos) {
933
+ std::operator<<(out, "pc=");
934
+ roctracer::hsa_support::detail::operator<<(out, v.pc);
935
+ }
936
+ };
937
+ HSA_depth_max_cnt--;
938
+ std::operator<<(out, '}');
939
+ return out;
940
+ }
941
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_t& v)
942
+ {
943
+ std::operator<<(out, '{');
944
+ HSA_depth_max_cnt++;
945
+ if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
946
+ if (std::string("hsa_ven_amd_pcs_t::handle").find(HSA_structs_regex) != std::string::npos) {
947
+ std::operator<<(out, "handle=");
948
+ roctracer::hsa_support::detail::operator<<(out, v.handle);
949
+ }
950
+ };
951
+ HSA_depth_max_cnt--;
952
+ std::operator<<(out, '}');
953
+ return out;
954
+ }
955
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_configuration_t& v)
956
+ {
957
+ std::operator<<(out, '{');
958
+ HSA_depth_max_cnt++;
959
+ if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
960
+ if (std::string("hsa_ven_amd_pcs_configuration_t::flags").find(HSA_structs_regex) != std::string::npos) {
961
+ std::operator<<(out, "flags=");
962
+ roctracer::hsa_support::detail::operator<<(out, v.flags);
963
+ std::operator<<(out, ", ");
964
+ }
965
+ if (std::string("hsa_ven_amd_pcs_configuration_t::max_interval").find(HSA_structs_regex) != std::string::npos) {
966
+ std::operator<<(out, "max_interval=");
967
+ roctracer::hsa_support::detail::operator<<(out, v.max_interval);
968
+ std::operator<<(out, ", ");
969
+ }
970
+ if (std::string("hsa_ven_amd_pcs_configuration_t::min_interval").find(HSA_structs_regex) != std::string::npos) {
971
+ std::operator<<(out, "min_interval=");
972
+ roctracer::hsa_support::detail::operator<<(out, v.min_interval);
973
+ std::operator<<(out, ", ");
974
+ }
975
+ if (std::string("hsa_ven_amd_pcs_configuration_t::units").find(HSA_structs_regex) != std::string::npos) {
976
+ std::operator<<(out, "units=");
977
+ roctracer::hsa_support::detail::operator<<(out, v.units);
978
+ std::operator<<(out, ", ");
979
+ }
980
+ if (std::string("hsa_ven_amd_pcs_configuration_t::method").find(HSA_structs_regex) != std::string::npos) {
981
+ std::operator<<(out, "method=");
982
+ roctracer::hsa_support::detail::operator<<(out, v.method);
983
+ }
984
+ };
985
+ HSA_depth_max_cnt--;
986
+ std::operator<<(out, '}');
987
+ return out;
988
+ }
989
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pc_sampling_1_00_pfn_t& v)
990
+ {
991
+ std::operator<<(out, '{');
992
+ HSA_depth_max_cnt++;
993
+ if (HSA_depth_max == -1 || HSA_depth_max_cnt <= HSA_depth_max) {
994
+ if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_flush").find(HSA_structs_regex) != std::string::npos) {
995
+ std::operator<<(out, "hsa_ven_amd_pcs_flush=");
996
+ roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_flush);
997
+ std::operator<<(out, ", ");
998
+ }
999
+ if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_stop").find(HSA_structs_regex) != std::string::npos) {
1000
+ std::operator<<(out, "hsa_ven_amd_pcs_stop=");
1001
+ roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_stop);
1002
+ std::operator<<(out, ", ");
1003
+ }
1004
+ if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_start").find(HSA_structs_regex) != std::string::npos) {
1005
+ std::operator<<(out, "hsa_ven_amd_pcs_start=");
1006
+ roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_start);
1007
+ std::operator<<(out, ", ");
1008
+ }
1009
+ if (std::string("hsa_ven_amd_pc_sampling_1_00_pfn_t::hsa_ven_amd_pcs_destroy").find(HSA_structs_regex) != std::string::npos) {
1010
+ std::operator<<(out, "hsa_ven_amd_pcs_destroy=");
1011
+ roctracer::hsa_support::detail::operator<<(out, v.hsa_ven_amd_pcs_destroy);
1012
+ }
1013
+ };
1014
+ HSA_depth_max_cnt--;
1015
+ std::operator<<(out, '}');
1016
+ return out;
1017
+ }
788
1018
  inline static std::ostream& operator<<(std::ostream& out, const hsa_amd_vendor_packet_header_t& v)
789
1019
  {
790
1020
  std::operator<<(out, '{');
@@ -1360,6 +1590,36 @@ inline static std::ostream& operator<<(std::ostream& out, const hsa_ext_images_1
1360
1590
  return out;
1361
1591
  }
1362
1592
 
1593
+ inline static std::ostream& operator<<(std::ostream& out, const perf_sample_hosttrap_v1_t& v)
1594
+ {
1595
+ roctracer::hsa_support::detail::operator<<(out, v);
1596
+ return out;
1597
+ }
1598
+
1599
+ inline static std::ostream& operator<<(std::ostream& out, const perf_sample_snapshot_v1_t& v)
1600
+ {
1601
+ roctracer::hsa_support::detail::operator<<(out, v);
1602
+ return out;
1603
+ }
1604
+
1605
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_t& v)
1606
+ {
1607
+ roctracer::hsa_support::detail::operator<<(out, v);
1608
+ return out;
1609
+ }
1610
+
1611
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pcs_configuration_t& v)
1612
+ {
1613
+ roctracer::hsa_support::detail::operator<<(out, v);
1614
+ return out;
1615
+ }
1616
+
1617
+ inline static std::ostream& operator<<(std::ostream& out, const hsa_ven_amd_pc_sampling_1_00_pfn_t& v)
1618
+ {
1619
+ roctracer::hsa_support::detail::operator<<(out, v);
1620
+ return out;
1621
+ }
1622
+
1363
1623
  inline static std::ostream& operator<<(std::ostream& out, const hsa_amd_vendor_packet_header_t& v)
1364
1624
  {
1365
1625
  roctracer::hsa_support::detail::operator<<(out, v);
@@ -22,9 +22,9 @@
22
22
 
23
23
  /* HSA API tracing primitives
24
24
  'CoreApi', header 'hsa.h', 125 funcs
25
- 'AmdExt', header 'hsa_ext_amd.h', 68 funcs
25
+ 'AmdExt', header 'hsa_ext_amd.h', 70 funcs
26
26
  'ImageExt', header 'hsa_ext_image.h', 13 funcs
27
- 'AmdExt', header 'hsa_api_trace.h', 68 funcs
27
+ 'AmdExt', header 'hsa_api_trace.h', 70 funcs
28
28
  */
29
29
 
30
30
  #ifndef HSA_PROF_STR_H_
@@ -229,24 +229,26 @@ enum hsa_api_id_t {
229
229
  HSA_API_ID_hsa_amd_vmem_retain_alloc_handle = 190,
230
230
  HSA_API_ID_hsa_amd_vmem_get_alloc_properties_from_handle = 191,
231
231
  HSA_API_ID_hsa_amd_agent_set_async_scratch_limit = 192,
232
+ HSA_API_ID_hsa_amd_queue_get_info = 193,
233
+ HSA_API_ID_hsa_amd_vmem_address_reserve_align = 194,
232
234
 
233
235
  /* block: ImageExt API */
234
- HSA_API_ID_hsa_ext_image_get_capability = 193,
235
- HSA_API_ID_hsa_ext_image_data_get_info = 194,
236
- HSA_API_ID_hsa_ext_image_create = 195,
237
- HSA_API_ID_hsa_ext_image_import = 196,
238
- HSA_API_ID_hsa_ext_image_export = 197,
239
- HSA_API_ID_hsa_ext_image_copy = 198,
240
- HSA_API_ID_hsa_ext_image_clear = 199,
241
- HSA_API_ID_hsa_ext_image_destroy = 200,
242
- HSA_API_ID_hsa_ext_sampler_create = 201,
243
- HSA_API_ID_hsa_ext_sampler_destroy = 202,
244
- HSA_API_ID_hsa_ext_image_get_capability_with_layout = 203,
245
- HSA_API_ID_hsa_ext_image_data_get_info_with_layout = 204,
246
- HSA_API_ID_hsa_ext_image_create_with_layout = 205,
236
+ HSA_API_ID_hsa_ext_image_get_capability = 195,
237
+ HSA_API_ID_hsa_ext_image_data_get_info = 196,
238
+ HSA_API_ID_hsa_ext_image_create = 197,
239
+ HSA_API_ID_hsa_ext_image_import = 198,
240
+ HSA_API_ID_hsa_ext_image_export = 199,
241
+ HSA_API_ID_hsa_ext_image_copy = 200,
242
+ HSA_API_ID_hsa_ext_image_clear = 201,
243
+ HSA_API_ID_hsa_ext_image_destroy = 202,
244
+ HSA_API_ID_hsa_ext_sampler_create = 203,
245
+ HSA_API_ID_hsa_ext_sampler_destroy = 204,
246
+ HSA_API_ID_hsa_ext_image_get_capability_with_layout = 205,
247
+ HSA_API_ID_hsa_ext_image_data_get_info_with_layout = 206,
248
+ HSA_API_ID_hsa_ext_image_create_with_layout = 207,
247
249
 
248
- HSA_API_ID_DISPATCH = 206,
249
- HSA_API_ID_NUMBER = 207,
250
+ HSA_API_ID_DISPATCH = 208,
251
+ HSA_API_ID_NUMBER = 209,
250
252
  };
251
253
  /* Declarations of APIs intended for use only by tools. */
252
254
  typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);
@@ -261,9 +263,9 @@ struct hsa_api_data_t {
261
263
  uint32_t phase;
262
264
  union {
263
265
  uint64_t uint64_t_retval;
264
- uint32_t uint32_t_retval;
265
- hsa_signal_value_t hsa_signal_value_t_retval;
266
266
  hsa_status_t hsa_status_t_retval;
267
+ hsa_signal_value_t hsa_signal_value_t_retval;
268
+ uint32_t uint32_t_retval;
267
269
  };
268
270
  union {
269
271
  /* block: CoreApi API */
@@ -1236,6 +1238,18 @@ struct hsa_api_data_t {
1236
1238
  hsa_agent_t agent;
1237
1239
  size_t threshold;
1238
1240
  } hsa_amd_agent_set_async_scratch_limit;
1241
+ struct {
1242
+ hsa_queue_t* queue;
1243
+ hsa_queue_info_attribute_t attribute;
1244
+ void* value;
1245
+ } hsa_amd_queue_get_info;
1246
+ struct {
1247
+ void** va;
1248
+ size_t size;
1249
+ uint64_t address;
1250
+ uint64_t alignment;
1251
+ uint64_t flags;
1252
+ } hsa_amd_vmem_address_reserve_align;
1239
1253
 
1240
1254
  /* block: ImageExt API */
1241
1255
  struct {
@@ -2888,6 +2902,24 @@ inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& d
2888
2902
  out << ") = " << api_data.hsa_status_t_retval;
2889
2903
  break;
2890
2904
  }
2905
+ case HSA_API_ID_hsa_amd_queue_get_info: {
2906
+ out << "hsa_amd_queue_get_info(";
2907
+ out << api_data.args.hsa_amd_queue_get_info.queue << ", ";
2908
+ out << api_data.args.hsa_amd_queue_get_info.attribute << ", ";
2909
+ out << api_data.args.hsa_amd_queue_get_info.value;
2910
+ out << ") = " << api_data.hsa_status_t_retval;
2911
+ break;
2912
+ }
2913
+ case HSA_API_ID_hsa_amd_vmem_address_reserve_align: {
2914
+ out << "hsa_amd_vmem_address_reserve_align(";
2915
+ out << api_data.args.hsa_amd_vmem_address_reserve_align.va << ", ";
2916
+ out << api_data.args.hsa_amd_vmem_address_reserve_align.size << ", ";
2917
+ out << api_data.args.hsa_amd_vmem_address_reserve_align.address << ", ";
2918
+ out << api_data.args.hsa_amd_vmem_address_reserve_align.alignment << ", ";
2919
+ out << api_data.args.hsa_amd_vmem_address_reserve_align.flags;
2920
+ out << ") = " << api_data.hsa_status_t_retval;
2921
+ break;
2922
+ }
2891
2923
 
2892
2924
  /* block: ImageExt API */
2893
2925
  case HSA_API_ID_hsa_ext_image_get_capability: {
Binary file