tt-perf-report 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -695,7 +695,7 @@ def color_row(op_data, percentage, min_percentage):
695
695
  return op_data
696
696
 
697
697
 
698
- def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
698
+ def print_performance_table(rows, headers, col_widths, device_ops, host_ops, signpost_count):
699
699
  print("\n🚀 Performance Report 🚀\n========================\n")
700
700
 
701
701
  print(" ".join(pad_string(header, col_widths[i], align="left") for i, header in enumerate(headers)))
@@ -716,7 +716,7 @@ def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
716
716
  "ID": Cell(""),
717
717
  "Total %": Cell(100.0, unit="%", decimals=1),
718
718
  "Bound": Cell(""),
719
- "OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops"),
719
+ "OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops, {signpost_count} signposts"),
720
720
  "Device Time": Cell(total_device_time, unit="us", decimals=0),
721
721
  "Op-to-Op Gap": Cell(total_visible_gap, unit="us", decimals=0),
722
722
  }
@@ -866,11 +866,14 @@ def generate_matmul_advice(op_data):
866
866
 
867
867
 
868
868
  def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool = False):
869
+ # Ensure we filter out signpost rows before processing because they aren't useful in the stacked report
870
+ filtered_rows = filter_signposts(rows)
871
+
869
872
  if stack_by_input0_layout:
870
873
  visible_headers.append("Input 0 Memory")
871
874
 
872
875
  # Create a pandas DataFrame from rows and headers
873
- data = {header: [row[header].raw_value for row in rows] for header in visible_headers}
876
+ data = {header: [row[header].raw_value for row in filtered_rows] for header in visible_headers}
874
877
  df = pd.DataFrame(data)
875
878
 
876
879
  if (stack_by_input0_layout):
@@ -882,7 +885,7 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
882
885
  # Group by the joined OP Code and aggregate the data
883
886
  stacked_df = df.groupby("OP Code Joined").agg(
884
887
  Device_Time_Sum_us=("Device Time", "sum"),
885
- Ops_Count=("Device Time", "count"),
888
+ Ops_Count=("Device Time", "size"),
886
889
  Flops_min=("FLOPs %", "min"),
887
890
  Flops_max=("FLOPs %", "max"),
888
891
  Flops_mean=("FLOPs %", "mean"),
@@ -897,7 +900,6 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
897
900
  else:
898
901
  stacked_df["%"] = 0
899
902
 
900
- stacked_df["%"] = (stacked_df["Device_Time_Sum_us"] / total_device_time) * 100
901
903
  # Reorder columns to move Device_Time_Percentage to be the 3rd column
902
904
  cols = stacked_df.columns.tolist()
903
905
  cols.insert(0, cols.pop(cols.index("%")))
@@ -1054,6 +1056,8 @@ def filter_by_id_range(rows, id_range):
1054
1056
  def filter_host_ops(rows):
1055
1057
  return [row for row in rows if not is_host_op(row)]
1056
1058
 
1059
+ def filter_signposts(rows):
1060
+ return [row for row in rows if not is_signpost_op(row)]
1057
1061
 
1058
1062
  def main():
1059
1063
  args, id_range = parse_args()
@@ -1137,18 +1141,26 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1137
1141
  prev_row = None
1138
1142
  device_ops = 0
1139
1143
  host_ops = 0
1144
+ signpost_count = 0
1140
1145
  for _, row in df.iterrows():
1141
1146
  op_data, current_gap = analyze_op(row, prev_row, csv_format)
1142
1147
  op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
1143
1148
  op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
1144
1149
  if raw_op_codes:
1145
1150
  op_data["Raw OP Code"] = Cell(row["OP CODE"])
1151
+
1152
+ # OP TYPE column is only present in raw format/df and is not part of the op_data/rows dictionary used later
1153
+ # append " (signpost)" to the OP Code if this row is a signpost to distinguish it
1154
+ if "signpost" in row["OP TYPE"]:
1155
+ op_data["OP Code"].raw_value = f"{row['OP CODE']} (signpost)"
1146
1156
  rows.append(op_data)
1147
1157
  prev_row = row
1148
1158
 
1149
- # Count device and host ops
1159
+ # Count device and host ops, ignore signposts
1150
1160
  if is_host_op(op_data):
1151
1161
  host_ops += 1
1162
+ elif is_signpost_op(op_data):
1163
+ signpost_count += 1
1152
1164
  else:
1153
1165
  device_ops += 1
1154
1166
 
@@ -1216,7 +1228,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1216
1228
  max(max(visible_length(str(row[header])) for row in rows), visible_length(header))
1217
1229
  for header in visible_headers
1218
1230
  ]
1219
- print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops)
1231
+ print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops, signpost_count)
1220
1232
  if not no_advice:
1221
1233
  print_advice_section(rows, visible_headers, col_widths)
1222
1234
 
@@ -1240,6 +1252,8 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1240
1252
  def is_host_op(op_data):
1241
1253
  return "(torch)" in op_data["OP Code"].raw_value
1242
1254
 
1255
+ def is_signpost_op(op_data):
1256
+ return "signpost" in op_data["OP Code"].raw_value
1243
1257
 
1244
1258
  if __name__ == "__main__":
1245
1259
  main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tt-perf-report
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -0,0 +1,9 @@
1
+ tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
+ tt_perf_report/perf_report.py,sha256=V1oJ_cyJyFm4UGMzwMBn3XcDgvE1YoYnvdGvMldjLQE,50697
3
+ tt_perf_report-1.1.7.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
+ tt_perf_report-1.1.7.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
5
+ tt_perf_report-1.1.7.dist-info/METADATA,sha256=VP1UPkbAKmOXhj6h4KBU1BofDAZ17JKJRLnT3qHI2zY,18393
6
+ tt_perf_report-1.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ tt_perf_report-1.1.7.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
8
+ tt_perf_report-1.1.7.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
9
+ tt_perf_report-1.1.7.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
- tt_perf_report/perf_report.py,sha256=Ziln0oeY7zQVUcVLaHwkh17taz6CGyR9r32Yd3PhKko,49951
3
- tt_perf_report-1.1.5.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
- tt_perf_report-1.1.5.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
5
- tt_perf_report-1.1.5.dist-info/METADATA,sha256=iGf6JsPN_wjHpDlSLC99Pso9J9Ez2x7wZHGK0JY-yR0,18393
6
- tt_perf_report-1.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- tt_perf_report-1.1.5.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
8
- tt_perf_report-1.1.5.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
9
- tt_perf_report-1.1.5.dist-info/RECORD,,