tt-perf-report 1.1.6__tar.gz → 1.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.1.6/src/tt_perf_report.egg-info → tt_perf_report-1.1.7}/PKG-INFO +1 -1
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/pyproject.toml +1 -1
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report/perf_report.py +17 -5
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7/src/tt_perf_report.egg-info}/PKG-INFO +1 -1
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/LICENSE +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/README.md +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/setup.cfg +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.7"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -695,7 +695,7 @@ def color_row(op_data, percentage, min_percentage):
|
|
|
695
695
|
return op_data
|
|
696
696
|
|
|
697
697
|
|
|
698
|
-
def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
|
|
698
|
+
def print_performance_table(rows, headers, col_widths, device_ops, host_ops, signpost_count):
|
|
699
699
|
print("\n🚀 Performance Report 🚀\n========================\n")
|
|
700
700
|
|
|
701
701
|
print(" ".join(pad_string(header, col_widths[i], align="left") for i, header in enumerate(headers)))
|
|
@@ -716,7 +716,7 @@ def print_performance_table(rows, headers, col_widths, device_ops, host_ops):
|
|
|
716
716
|
"ID": Cell(""),
|
|
717
717
|
"Total %": Cell(100.0, unit="%", decimals=1),
|
|
718
718
|
"Bound": Cell(""),
|
|
719
|
-
"OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops"),
|
|
719
|
+
"OP Code": Cell(f"{device_ops} device ops, {host_ops} host ops, {signpost_count} signposts"),
|
|
720
720
|
"Device Time": Cell(total_device_time, unit="us", decimals=0),
|
|
721
721
|
"Op-to-Op Gap": Cell(total_visible_gap, unit="us", decimals=0),
|
|
722
722
|
}
|
|
@@ -867,7 +867,7 @@ def generate_matmul_advice(op_data):
|
|
|
867
867
|
|
|
868
868
|
def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool = False):
|
|
869
869
|
# Ensure we filter out signpost rows before processing because they aren't useful in the stacked report
|
|
870
|
-
filtered_rows =
|
|
870
|
+
filtered_rows = filter_signposts(rows)
|
|
871
871
|
|
|
872
872
|
if stack_by_input0_layout:
|
|
873
873
|
visible_headers.append("Input 0 Memory")
|
|
@@ -1056,6 +1056,8 @@ def filter_by_id_range(rows, id_range):
|
|
|
1056
1056
|
def filter_host_ops(rows):
|
|
1057
1057
|
return [row for row in rows if not is_host_op(row)]
|
|
1058
1058
|
|
|
1059
|
+
def filter_signposts(rows):
|
|
1060
|
+
return [row for row in rows if not is_signpost_op(row)]
|
|
1059
1061
|
|
|
1060
1062
|
def main():
|
|
1061
1063
|
args, id_range = parse_args()
|
|
@@ -1139,18 +1141,26 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1139
1141
|
prev_row = None
|
|
1140
1142
|
device_ops = 0
|
|
1141
1143
|
host_ops = 0
|
|
1144
|
+
signpost_count = 0
|
|
1142
1145
|
for _, row in df.iterrows():
|
|
1143
1146
|
op_data, current_gap = analyze_op(row, prev_row, csv_format)
|
|
1144
1147
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
1145
1148
|
op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
|
|
1146
1149
|
if raw_op_codes:
|
|
1147
1150
|
op_data["Raw OP Code"] = Cell(row["OP CODE"])
|
|
1151
|
+
|
|
1152
|
+
# OP TYPE column is only present in raw format/df and is not part of the op_data/rows dictionary used later
|
|
1153
|
+
# append " (signpost)" to the OP Code if this row is a signpost to distinguish it
|
|
1154
|
+
if "signpost" in row["OP TYPE"]:
|
|
1155
|
+
op_data["OP Code"].raw_value = f"{row['OP CODE']} (signpost)"
|
|
1148
1156
|
rows.append(op_data)
|
|
1149
1157
|
prev_row = row
|
|
1150
1158
|
|
|
1151
|
-
# Count device and host ops
|
|
1159
|
+
# Count device and host ops, ignore signposts
|
|
1152
1160
|
if is_host_op(op_data):
|
|
1153
1161
|
host_ops += 1
|
|
1162
|
+
elif is_signpost_op(op_data):
|
|
1163
|
+
signpost_count += 1
|
|
1154
1164
|
else:
|
|
1155
1165
|
device_ops += 1
|
|
1156
1166
|
|
|
@@ -1218,7 +1228,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1218
1228
|
max(max(visible_length(str(row[header])) for row in rows), visible_length(header))
|
|
1219
1229
|
for header in visible_headers
|
|
1220
1230
|
]
|
|
1221
|
-
print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops)
|
|
1231
|
+
print_performance_table(rows, visible_headers, col_widths, device_ops, host_ops, signpost_count)
|
|
1222
1232
|
if not no_advice:
|
|
1223
1233
|
print_advice_section(rows, visible_headers, col_widths)
|
|
1224
1234
|
|
|
@@ -1242,6 +1252,8 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1242
1252
|
def is_host_op(op_data):
|
|
1243
1253
|
return "(torch)" in op_data["OP Code"].raw_value
|
|
1244
1254
|
|
|
1255
|
+
def is_signpost_op(op_data):
|
|
1256
|
+
return "signpost" in op_data["OP Code"].raw_value
|
|
1245
1257
|
|
|
1246
1258
|
if __name__ == "__main__":
|
|
1247
1259
|
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.1.6 → tt_perf_report-1.1.7}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|