tt-perf-report 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: tt-perf-report
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tt-perf-report"
7
- version = "1.0.0"
7
+ version = "1.0.1"
8
8
  description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
9
9
  license = {file = "LICENSE"}
10
10
  readme = "README.md"
@@ -6,7 +6,7 @@
6
6
  import sys
7
7
  import argparse
8
8
  import re
9
- from typing import Any, Optional
9
+ from typing import Any, Optional, Union
10
10
  from collections import defaultdict
11
11
  import pandas as pd
12
12
 
@@ -280,7 +280,7 @@ def analyze_op(row, prev_row):
280
280
  op_code = Cell(row["OP CODE"])
281
281
  cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
282
282
  device_time = Cell(
283
- row["DEVICE FW DURATION [ns]"] / 1000 if pd.notna(row["DEVICE FW DURATION [ns]"]) else None,
283
+ row["DEVICE KERNEL DURATION [ns]"] / 1000 if pd.notna(row["DEVICE KERNEL DURATION [ns]"]) else None,
284
284
  unit="us",
285
285
  decimals=0,
286
286
  )
@@ -294,9 +294,12 @@ def analyze_op(row, prev_row):
294
294
  else:
295
295
  op_to_op_gap = Cell(None, unit="us", decimals=0)
296
296
 
297
- output_datatype = row["OUTPUT_0_DATATYPE"]
298
- input_0_datatype = row["INPUT_0_DATATYPE"]
299
- input_1_datatype = row["INPUT_1_DATATYPE"]
297
+ def get_entry(k: str) -> Union[str, None]:
298
+ return row[k] if k in row else None
299
+
300
+ output_datatype = get_entry("OUTPUT_0_DATATYPE")
301
+ input_0_datatype = get_entry("INPUT_0_DATATYPE")
302
+ input_1_datatype = get_entry("INPUT_1_DATATYPE")
300
303
  output_datatype_cell = Cell(output_datatype)
301
304
  input_0_datatype_cell = Cell(input_0_datatype)
302
305
  input_1_datatype_cell = Cell(input_1_datatype)
@@ -682,11 +685,11 @@ def merge_device_rows(df):
682
685
 
683
686
  if "AllGather" in op_name or "ReduceScatter" in op_name:
684
687
  # For collective ops, take the row with minimum duration
685
- min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE FW DURATION [ns]"])
688
+ min_duration_block = min(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
686
689
  merged_blocks.append(min_duration_block[1])
687
690
  else:
688
691
  # For non-collective ops, take the row with maximum duration
689
- max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE FW DURATION [ns]"])
692
+ max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
690
693
  merged_blocks.append(max_duration_block[1])
691
694
 
692
695
  return pd.DataFrame(merged_blocks)
@@ -730,7 +733,7 @@ def filter_by_id_range(rows, id_range):
730
733
  def main():
731
734
  args, id_range = parse_args()
732
735
  generate_perf_report(
733
- args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice
736
+ args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode
734
737
  )
735
738
 
736
739
 
@@ -751,6 +754,7 @@ def parse_args():
751
754
  parser.add_argument("--no-color", action="store_true", help="Force output without color")
752
755
  parser.add_argument("--csv", type=str, help="Output filename for CSV format", metavar="OUTPUT_FILE")
753
756
  parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
757
+ parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
754
758
  args = parser.parse_args()
755
759
 
756
760
  # Set the global color_output variable
@@ -766,14 +770,15 @@ def parse_args():
766
770
  return args, id_range
767
771
 
768
772
 
769
- def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice):
773
+ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode):
770
774
  df = pd.read_csv(csv_file, low_memory=False)
771
775
 
772
776
  # Add a column for original row numbers
773
777
  df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
774
778
 
775
779
  # Sort the DataFrame by "HOST START TS" column
776
- if "HOST START TS" in df.columns:
780
+ # Sorting by HOST START TS is incorrect when using tracing mode since the tracing ops timestamps are the ones when captured and not executed
781
+ if "HOST START TS" in df.columns and not tracing_mode:
777
782
  print(colored("Sorting CSV by 'HOST START TS' column...", "cyan"))
778
783
  df = df.sort_values(by="HOST START TS")
779
784
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: tt-perf-report
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
File without changes
File without changes
File without changes