tt-perf-report 1.1.4__tar.gz → 1.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.1.4/src/tt_perf_report.egg-info → tt_perf_report-1.1.6}/PKG-INFO +1 -1
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/pyproject.toml +1 -1
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report/perf_report.py +11 -3
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6/src/tt_perf_report.egg-info}/PKG-INFO +1 -1
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/LICENSE +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/README.md +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/setup.cfg +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.6
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.6"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -866,11 +866,14 @@ def generate_matmul_advice(op_data):
|
|
|
866
866
|
|
|
867
867
|
|
|
868
868
|
def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool = False):
|
|
869
|
+
# Ensure we filter out signpost rows before processing because they aren't useful in the stacked report
|
|
870
|
+
filtered_rows = [row for row in rows if row["OP TYPE"].raw_value != "signpost"]
|
|
871
|
+
|
|
869
872
|
if stack_by_input0_layout:
|
|
870
873
|
visible_headers.append("Input 0 Memory")
|
|
871
874
|
|
|
872
875
|
# Create a pandas DataFrame from rows and headers
|
|
873
|
-
data = {header: [row[header].raw_value for row in
|
|
876
|
+
data = {header: [row[header].raw_value for row in filtered_rows] for header in visible_headers}
|
|
874
877
|
df = pd.DataFrame(data)
|
|
875
878
|
|
|
876
879
|
if (stack_by_input0_layout):
|
|
@@ -882,7 +885,7 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
|
|
|
882
885
|
# Group by the joined OP Code and aggregate the data
|
|
883
886
|
stacked_df = df.groupby("OP Code Joined").agg(
|
|
884
887
|
Device_Time_Sum_us=("Device Time", "sum"),
|
|
885
|
-
Ops_Count=("Device Time", "
|
|
888
|
+
Ops_Count=("Device Time", "size"),
|
|
886
889
|
Flops_min=("FLOPs %", "min"),
|
|
887
890
|
Flops_max=("FLOPs %", "max"),
|
|
888
891
|
Flops_mean=("FLOPs %", "mean"),
|
|
@@ -891,7 +894,12 @@ def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool =
|
|
|
891
894
|
|
|
892
895
|
# Calculate the percentage of device time
|
|
893
896
|
total_device_time = stacked_df["Device_Time_Sum_us"].sum()
|
|
894
|
-
|
|
897
|
+
|
|
898
|
+
if total_device_time != 0:
|
|
899
|
+
stacked_df["%"] = (stacked_df["Device_Time_Sum_us"] / total_device_time) * 100
|
|
900
|
+
else:
|
|
901
|
+
stacked_df["%"] = 0
|
|
902
|
+
|
|
895
903
|
# Reorder columns to move Device_Time_Percentage to be the 3rd column
|
|
896
904
|
cols = stacked_df.columns.tolist()
|
|
897
905
|
cols.insert(0, cols.pop(cols.index("%")))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.6
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.1.4 → tt_perf_report-1.1.6}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|