tt-perf-report 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- tt_perf_report/perf_report.py +21 -7
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/METADATA +1 -1
- tt_perf_report-1.1.7.dist-info/RECORD +9 -0
- tt_perf_report-1.1.5.dist-info/RECORD +0 -9
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/WHEEL +0 -0
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/entry_points.txt +0 -0
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/licenses/LICENSE +0 -0
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/licenses/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/top_level.txt +0 -0
tt_perf_report/perf_report.py
CHANGED
|
@@ -695,7 +695,7 @@ def color_row(op_data, percentage, min_percentage):
|
|
|
695
695
|
return op_data
|
|
696
696
|
|
|
697
697
|
|
|
698
|
-
def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
|
|
698
|
+
def print_performance_table(rows, headers, col_widths, device_ops, host_ops, signpost_count):
|
|
699
699
|
print("\n🚀 Performance Report 🚀\n========================\n")
|
|
700
700
|
|
|
701
701
|
print(" ".join(pad_string(header, col_widths[i], align="left") for i, header in enumerate(headers)))
|
|
@@ -716,7 +716,7 @@ def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
|
|
|
716
716
|
"ID": Cell(""),
|
|
717
717
|
"Total %": Cell(100.0, unit="%", decimals=1),
|
|
718
718
|
"Bound": Cell(""),
|
|
719
|
-
"OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops"),
|
|
719
|
+
"OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops, {signpost_count} signposts"),
|
|
720
720
|
"Device Time": Cell(total_device_time, unit="us", decimals=0),
|
|
721
721
|
"Op-to-Op Gap": Cell(total_visible_gap, unit="us", decimals=0),
|
|
722
722
|
}
|
|
@@ -866,11 +866,14 @@ def generate_matmul_advice(op_data):
|
|
|
866
866
|
|
|
867
867
|
|
|
868
868
|
def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool = False):
|
|
869
|
+
# Ensure we filter out signpost rows before processing because they aren't useful in the stacked report
|
|
870
|
+
filtered_rows = filter_signposts(rows)
|
|
871
|
+
|
|
869
872
|
if stack_by_input0_layout:
|
|
870
873
|
visible_headers.append("Input 0 Memory")
|
|
871
874
|
|
|
872
875
|
# Create a pandas DataFrame from rows and headers
|
|
873
|
-
data = {header: [row[header].raw_value for row in
|
|
876
|
+
data = {header: [row[header].raw_value for row in filtered_rows] for header in visible_headers}
|
|
874
877
|
df = pd.DataFrame(data)
|
|
875
878
|
|
|
876
879
|
if (stack_by_input0_layout):
|
|
@@ -882,7 +885,7 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
|
|
|
882
885
|
# Group by the joined OP Code and aggregate the data
|
|
883
886
|
stacked_df = df.groupby("OP Code Joined").agg(
|
|
884
887
|
Device_Time_Sum_us=("Device Time", "sum"),
|
|
885
|
-
Ops_Count=("Device Time", "
|
|
888
|
+
Ops_Count=("Device Time", "size"),
|
|
886
889
|
Flops_min=("FLOPs %", "min"),
|
|
887
890
|
Flops_max=("FLOPs %", "max"),
|
|
888
891
|
Flops_mean=("FLOPs %", "mean"),
|
|
@@ -897,7 +900,6 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
|
|
|
897
900
|
else:
|
|
898
901
|
stacked_df["%"] = 0
|
|
899
902
|
|
|
900
|
-
stacked_df["%"] = (stacked_df["Device_Time_Sum_us"] / total_device_time) * 100
|
|
901
903
|
# Reorder columns to move Device_Time_Percentage to be the 3rd column
|
|
902
904
|
cols = stacked_df.columns.tolist()
|
|
903
905
|
cols.insert(0, cols.pop(cols.index("%")))
|
|
@@ -1054,6 +1056,8 @@ def filter_by_id_range(rows, id_range):
|
|
|
1054
1056
|
def filter_host_ops(rows):
|
|
1055
1057
|
return [row for row in rows if not is_host_op(row)]
|
|
1056
1058
|
|
|
1059
|
+
def filter_signposts(rows):
|
|
1060
|
+
return [row for row in rows if not is_signpost_op(row)]
|
|
1057
1061
|
|
|
1058
1062
|
def main():
|
|
1059
1063
|
args, id_range = parse_args()
|
|
@@ -1137,18 +1141,26 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1137
1141
|
prev_row = None
|
|
1138
1142
|
device_ops = 0
|
|
1139
1143
|
host_ops = 0
|
|
1144
|
+
signpost_count = 0
|
|
1140
1145
|
for _, row in df.iterrows():
|
|
1141
1146
|
op_data, current_gap = analyze_op(row, prev_row, csv_format)
|
|
1142
1147
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
1143
1148
|
op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
|
|
1144
1149
|
if raw_op_codes:
|
|
1145
1150
|
op_data["Raw OP Code"] = Cell(row["OP CODE"])
|
|
1151
|
+
|
|
1152
|
+
# OP TYPE column is only present in raw format/df and is not part of the op_data/rows dictionary used later
|
|
1153
|
+
# append " (signpost)" to the OP Code if this row is a signpost to distinguish it
|
|
1154
|
+
if "signpost" in row["OP TYPE"]:
|
|
1155
|
+
op_data["OP Code"].raw_value = f"{row['OP CODE']} (signpost)"
|
|
1146
1156
|
rows.append(op_data)
|
|
1147
1157
|
prev_row = row
|
|
1148
1158
|
|
|
1149
|
-
# Count device and host ops
|
|
1159
|
+
# Count device and host ops, ignore signposts
|
|
1150
1160
|
if is_host_op(op_data):
|
|
1151
1161
|
host_ops += 1
|
|
1162
|
+
elif is_signpost_op(op_data):
|
|
1163
|
+
signpost_count += 1
|
|
1152
1164
|
else:
|
|
1153
1165
|
device_ops += 1
|
|
1154
1166
|
|
|
@@ -1216,7 +1228,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1216
1228
|
max(max(visible_length(str(row[header])) for row in rows), visible_length(header))
|
|
1217
1229
|
for header in visible_headers
|
|
1218
1230
|
]
|
|
1219
|
-
print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops)
|
|
1231
|
+
print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops, signpost_count)
|
|
1220
1232
|
if not no_advice:
|
|
1221
1233
|
print_advice_section(rows, visible_headers, col_widths)
|
|
1222
1234
|
|
|
@@ -1240,6 +1252,8 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1240
1252
|
def is_host_op(op_data):
|
|
1241
1253
|
return "(torch)" in op_data["OP Code"].raw_value
|
|
1242
1254
|
|
|
1255
|
+
def is_signpost_op(op_data):
|
|
1256
|
+
return "signpost" in op_data["OP Code"].raw_value
|
|
1243
1257
|
|
|
1244
1258
|
if __name__ == "__main__":
|
|
1245
1259
|
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
+
tt_perf_report/perf_report.py,sha256=V1oJ_cyJyFm4UGMzwMBn3XcDgvE1YoYnvdGvMldjLQE,50697
|
|
3
|
+
tt_perf_report-1.1.7.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
+
tt_perf_report-1.1.7.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
|
|
5
|
+
tt_perf_report-1.1.7.dist-info/METADATA,sha256=VP1UPkbAKmOXhj6h4KBU1BofDAZ17JKJRLnT3qHI2zY,18393
|
|
6
|
+
tt_perf_report-1.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
tt_perf_report-1.1.7.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
8
|
+
tt_perf_report-1.1.7.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
9
|
+
tt_perf_report-1.1.7.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
-
tt_perf_report/perf_report.py,sha256=Ziln0oeY7zQVUcVLaHwkh17taz6CGyR9r32Yd3PhKko,49951
|
|
3
|
-
tt_perf_report-1.1.5.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
-
tt_perf_report-1.1.5.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
|
|
5
|
-
tt_perf_report-1.1.5.dist-info/METADATA,sha256=iGf6JsPN_wjHpDlSLC99Pso9J9Ez2x7wZHGK0JY-yR0,18393
|
|
6
|
-
tt_perf_report-1.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
-
tt_perf_report-1.1.5.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
8
|
-
tt_perf_report-1.1.5.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
9
|
-
tt_perf_report-1.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.1.5.dist-info → tt_perf_report-1.1.7.dist-info}/licenses/LICENSE_understanding.txt
RENAMED
|
File without changes
|
|
File without changes
|