PyPI - tt-perf-report - Versions diffs - 1.0.0__tar.gz → 1.0.1__tar.gz - Mend

tt-perf-report 1.0.0tar.gz → 1.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tt-perf-report might be problematic. Click here for more details.

Files changed (13) hide show

{tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: tt-perf-report
-Version: 1.0.0
+Version: 1.0.1
 Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
 License:                                  Apache License
                                    Version 2.0, January 2004

{tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tt-perf-report"
-version = "1.0.0"
+version = "1.0.1"
 description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
 license = {file = "LICENSE"}
 readme = "README.md"

{tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report/perf_report.py RENAMED Viewed

@@ -6,7 +6,7 @@
 import sys
 import argparse
 import re
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from collections import defaultdict
 import pandas as pd
@@ -280,7 +280,7 @@ def analyze_op(row, prev_row):
     op_code = Cell(row["OP CODE"])
     cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
     device_time = Cell(
-        row["DEVICE FW DURATION [ns]"] / 1000 if pd.notna(row["DEVICE FW DURATION [ns]"]) else None,
+        row["DEVICE KERNEL DURATION [ns]"] / 1000 if pd.notna(row["DEVICE KERNEL DURATION [ns]"]) else None,
         unit="us",
         decimals=0,
     )
@@ -294,9 +294,12 @@ def analyze_op(row, prev_row):
     else:
         op_to_op_gap = Cell(None, unit="us", decimals=0)
-    output_datatype = row["OUTPUT_0_DATATYPE"]
-    input_0_datatype = row["INPUT_0_DATATYPE"]
-    input_1_datatype = row["INPUT_1_DATATYPE"]
+    def get_entry(k: str) -> Union[str, None]:
+        return row[k] if k in row else None
+    output_datatype = get_entry("OUTPUT_0_DATATYPE")
+    input_0_datatype = get_entry("INPUT_0_DATATYPE")
+    input_1_datatype = get_entry("INPUT_1_DATATYPE")
     output_datatype_cell = Cell(output_datatype)
     input_0_datatype_cell = Cell(input_0_datatype)
     input_1_datatype_cell = Cell(input_1_datatype)
@@ -682,11 +685,11 @@ def merge_device_rows(df):
         if "AllGather" in op_name or "ReduceScatter" in op_name:
             # For collective ops, take the row with minimum duration
-            min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE FW DURATION [ns]"])
+            min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
             merged_blocks.append(min_duration_block[1])
         else:
             # For non-collective ops, take the row with maximum duration
-            max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE FW DURATION [ns]"])
+            max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
             merged_blocks.append(max_duration_block[1])
     return pd.DataFrame(merged_blocks)
@@ -730,7 +733,7 @@ def filter_by_id_range(rows, id_range):
 def main():
     args, id_range = parse_args()
     generate_perf_report(
-        args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice
+        args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode
     )
@@ -751,6 +754,7 @@ def parse_args():
     parser.add_argument("--no-color", action="store_true", help="Force output without color")
     parser.add_argument("--csv", type=str, help="Output filename for CSV format", metavar="OUTPUT_FILE")
     parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
+    parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
     args = parser.parse_args()
     # Set the global color_output variable
@@ -766,14 +770,15 @@ def parse_args():
     return args, id_range
-def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice):
+def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode):
     df = pd.read_csv(csv_file, low_memory=False)
     # Add a column for original row numbers
     df["ORIGINAL_ROW"] = df.index + 2  # +2 to match Excel row numbers (1-based + header)
     # Sort the DataFrame by "HOST START TS" column
-    if "HOST START TS" in df.columns:
+    # Sorting by HOST START TS is incorrect when using tracing mode since the tracing ops timestamps are the ones when captured and not executed
+    if "HOST START TS" in df.columns and not tracing_mode:
         print(colored("Sorting CSV by 'HOST START TS' column...", "cyan"))
         df = df.sort_values(by="HOST START TS")
     else:

{tt_perf_report-1.0.0 → tt_perf_report-1.0.1}/src/tt_perf_report.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: tt-perf-report
-Version: 1.0.0
+Version: 1.0.1
 Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
 License:                                  Apache License
                                    Version 2.0, January 2004