tt-perf-report 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -742,8 +742,28 @@ def merge_device_rows(df):
742
742
  device_ids = sorted(block_by_device.keys())
743
743
  merged_blocks = []
744
744
 
745
- for blocks in zip(*[block_by_device[device_id] for device_id in device_ids]):
746
- op_name = blocks[0][0]
745
+ global_index = 0
746
+ while max(len(block_by_device[device_id]) for device_id in device_ids) > 0:
747
+ blocks = []
748
+ op_name = None
749
+ missing_devices = []
750
+ for device_id in device_ids:
751
+ if not len(block_by_device[device_id]):
752
+ print(colored(f"Warning: Device {device_id} is missing operation {op_name} at index {global_index}", "yellow"))
753
+ continue
754
+ if op_name is None:
755
+ op_name = block_by_device[device_id][0][0]
756
+ elif op_name != block_by_device[device_id][0][0]:
757
+ missing_devices.append(device_id)
758
+ continue
759
+
760
+ blocks.append(block_by_device[device_id].pop(0))
761
+
762
+ if missing_devices:
763
+ print(colored(f"Warning: {op_name} at index {global_index} not present in CSV for {len(missing_devices)} devices {missing_devices} - do not trust data for this op or directly subsequent ops with the same name", "yellow"))
764
+
765
+ if not blocks:
766
+ break
747
767
 
748
768
  if "AllGather" in op_name or "ReduceScatter" in op_name:
749
769
  # For collective ops, take the row with minimum duration
@@ -754,6 +774,8 @@ def merge_device_rows(df):
754
774
  max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
755
775
  merged_blocks.append(max_duration_block[1])
756
776
 
777
+ global_index += 1
778
+
757
779
  return pd.DataFrame(merged_blocks)
758
780
 
759
781
 
@@ -792,11 +814,15 @@ def filter_by_id_range(rows, id_range):
792
814
  return rows
793
815
 
794
816
 
817
+ def filter_host_ops(rows):
818
+ return [row for row in rows if not is_host_op(row)]
819
+
820
+
795
821
  def main():
796
822
  args, id_range = parse_args()
797
823
  generate_perf_report(
798
- args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode, args.raw_op_codes,
799
- )
824
+ args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice,
825
+ args.tracing_mode, args.raw_op_codes, args.no_host_ops)
800
826
 
801
827
 
802
828
  def parse_args():
@@ -818,6 +844,8 @@ def parse_args():
818
844
  parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
819
845
  parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
820
846
  parser.add_argument("--raw-op-codes", action="store_true", help="Include raw op codes in output")
847
+ parser.add_argument("--no-host-ops", action="store_true", help="Do not include host ops in output")
848
+
821
849
  args = parser.parse_args()
822
850
 
823
851
  # Set the global color_output variable
@@ -833,7 +861,9 @@ def parse_args():
833
861
  return args, id_range
834
862
 
835
863
 
836
- def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode, raw_op_codes):
864
+ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
865
+ id_range, csv_output_file, no_advice, tracing_mode,
866
+ raw_op_codes, no_host_ops):
837
867
  df = pd.read_csv(csv_file, low_memory=False)
838
868
 
839
869
  # Add a column for original row numbers
@@ -867,7 +897,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
867
897
  prev_row = row
868
898
 
869
899
  # Count device and host ops
870
- if "(torch)" in op_data["OP Code"].raw_value:
900
+ if is_host_op(op_data):
871
901
  host_ops += 1
872
902
  else:
873
903
  device_ops += 1
@@ -878,6 +908,9 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
878
908
  # Filter rows based on id_range
879
909
  rows = filter_by_id_range(rows, id_range)
880
910
 
911
+ if no_host_ops:
912
+ rows = filter_host_ops(rows)
913
+
881
914
  # Recalculate derived columns after filtering
882
915
  add_derived_columns(rows)
883
916
 
@@ -933,5 +966,9 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
933
966
  print_advice_section(rows, visible_headers, col_widths)
934
967
 
935
968
 
969
+ def is_host_op(op_data):
970
+ return "(torch)" in op_data["OP Code"].raw_value
971
+
972
+
936
973
  if __name__ == "__main__":
937
974
  main()
@@ -0,0 +1,3 @@
1
+ For the avoidance of doubt, this software assists in programming Tenstorrent products.
2
+
3
+ However, making, using, or selling hardware, models, or IP may require the license of rights (such as patent rights) from Tenstorrent or others.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: tt-perf-report
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -216,6 +216,7 @@ Project-URL: Repository, https://github.com/tenstorrent/tt-perf-report
216
216
  Keywords: tenstorrent,tt-metal
217
217
  Description-Content-Type: text/markdown
218
218
  License-File: LICENSE
219
+ License-File: LICENSE_understanding.txt
219
220
  Requires-Dist: pandas
220
221
 
221
222
  # Performance Report Analysis Tool
@@ -0,0 +1,9 @@
1
+ tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
+ tt_perf_report/perf_report.py,sha256=OAHGjdxnq5BOYgodTR1v4Ec3GksoKuBXAGA6eeJi4zY,38265
3
+ tt_perf_report-1.0.6.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
+ tt_perf_report-1.0.6.dist-info/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
5
+ tt_perf_report-1.0.6.dist-info/METADATA,sha256=NJ8q_4zz2URY9Tey2zcpv_YccFRuCGtWM-0BTnXLxVw,18345
6
+ tt_perf_report-1.0.6.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
7
+ tt_perf_report-1.0.6.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
8
+ tt_perf_report-1.0.6.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
9
+ tt_perf_report-1.0.6.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
- tt_perf_report/perf_report.py,sha256=MJHgosbw7kD7MrJ6-ZtLhdny_iX9bR_UB-ZaHoyjTsE,36952
3
- tt_perf_report-1.0.4.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
- tt_perf_report-1.0.4.dist-info/METADATA,sha256=fZKE2o9edCU0N6v-_tkUqSZ39j8mimeGBU_SIuuFi84,18305
5
- tt_perf_report-1.0.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
6
- tt_perf_report-1.0.4.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
7
- tt_perf_report-1.0.4.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
8
- tt_perf_report-1.0.4.dist-info/RECORD,,