tt-perf-report 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/PKG-INFO +1 -1
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/pyproject.toml +1 -1
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report/perf_report.py +15 -10
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/PKG-INFO +1 -1
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/LICENSE +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/README.md +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/setup.cfg +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.1"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import sys
|
|
7
7
|
import argparse
|
|
8
8
|
import re
|
|
9
|
-
from typing import Any, Optional
|
|
9
|
+
from typing import Any, Optional, Union
|
|
10
10
|
from collections import defaultdict
|
|
11
11
|
import pandas as pd
|
|
12
12
|
|
|
@@ -280,7 +280,7 @@ def analyze_op(row, prev_row):
|
|
|
280
280
|
op_code = Cell(row["OP CODE"])
|
|
281
281
|
cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
|
|
282
282
|
device_time = Cell(
|
|
283
|
-
row["DEVICE
|
|
283
|
+
row["DEVICE KERNEL DURATION [ns]"] / 1000 if pd.notna(row["DEVICE KERNEL DURATION [ns]"]) else None,
|
|
284
284
|
unit="us",
|
|
285
285
|
decimals=0,
|
|
286
286
|
)
|
|
@@ -294,9 +294,12 @@ def analyze_op(row, prev_row):
|
|
|
294
294
|
else:
|
|
295
295
|
op_to_op_gap = Cell(None, unit="us", decimals=0)
|
|
296
296
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
297
|
+
def get_entry(k: str) -> Union[str, None]:
|
|
298
|
+
return row[k] if k in row else None
|
|
299
|
+
|
|
300
|
+
output_datatype = get_entry("OUTPUT_0_DATATYPE")
|
|
301
|
+
input_0_datatype = get_entry("INPUT_0_DATATYPE")
|
|
302
|
+
input_1_datatype = get_entry("INPUT_1_DATATYPE")
|
|
300
303
|
output_datatype_cell = Cell(output_datatype)
|
|
301
304
|
input_0_datatype_cell = Cell(input_0_datatype)
|
|
302
305
|
input_1_datatype_cell = Cell(input_1_datatype)
|
|
@@ -682,11 +685,11 @@ def merge_device_rows(df):
|
|
|
682
685
|
|
|
683
686
|
if "AllGather" in op_name or "ReduceScatter" in op_name:
|
|
684
687
|
# For collective ops, take the row with minimum duration
|
|
685
|
-
min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE
|
|
688
|
+
min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
|
|
686
689
|
merged_blocks.append(min_duration_block[1])
|
|
687
690
|
else:
|
|
688
691
|
# For non-collective ops, take the row with maximum duration
|
|
689
|
-
max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE
|
|
692
|
+
max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
|
|
690
693
|
merged_blocks.append(max_duration_block[1])
|
|
691
694
|
|
|
692
695
|
return pd.DataFrame(merged_blocks)
|
|
@@ -730,7 +733,7 @@ def filter_by_id_range(rows, id_range):
|
|
|
730
733
|
def main():
|
|
731
734
|
args, id_range = parse_args()
|
|
732
735
|
generate_perf_report(
|
|
733
|
-
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice
|
|
736
|
+
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode
|
|
734
737
|
)
|
|
735
738
|
|
|
736
739
|
|
|
@@ -751,6 +754,7 @@ def parse_args():
|
|
|
751
754
|
parser.add_argument("--no-color", action="store_true", help="Force output without color")
|
|
752
755
|
parser.add_argument("--csv", type=str, help="Output filename for CSV format", metavar="OUTPUT_FILE")
|
|
753
756
|
parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
|
|
757
|
+
parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
|
|
754
758
|
args = parser.parse_args()
|
|
755
759
|
|
|
756
760
|
# Set the global color_output variable
|
|
@@ -766,14 +770,15 @@ def parse_args():
|
|
|
766
770
|
return args, id_range
|
|
767
771
|
|
|
768
772
|
|
|
769
|
-
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice):
|
|
773
|
+
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode):
|
|
770
774
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
771
775
|
|
|
772
776
|
# Add a column for original row numbers
|
|
773
777
|
df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
|
|
774
778
|
|
|
775
779
|
# Sort the DataFrame by "HOST START TS" column
|
|
776
|
-
|
|
780
|
+
# Sorting by HOST START TS is incorrect when using tracing mode since the tracing ops timestamps are the ones when captured and not executed
|
|
781
|
+
if "HOST START TS" in df.columns and not tracing_mode:
|
|
777
782
|
print(colored("Sorting CSV by 'HOST START TS' column...", "cyan"))
|
|
778
783
|
df = df.sort_values(by="HOST START TS")
|
|
779
784
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|