tt-perf-report 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -2,7 +2,7 @@
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  # SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
5
-
5
+ import csv
6
6
  import sys
7
7
  import argparse
8
8
  import re
@@ -582,88 +582,98 @@ def print_op_to_op_gap_advice(rows, headers, col_widths):
582
582
  )
583
583
 
584
584
 
585
+ def is_matmul_op(op_data):
586
+ return "Matmul" in op_data["OP Code"].raw_value
587
+
588
+
585
589
  def print_matmul_advice(rows, headers, col_widths):
586
- matmul_ops = [op_data for op_data in rows if "Matmul" in op_data["OP Code"].raw_value]
590
+ matmul_ops = [op_data for op_data in rows if is_matmul_op(op_data)]
587
591
 
588
592
  if matmul_ops:
589
593
  print("Matmul Optimization\n-------------------")
590
594
  for op_data in matmul_ops:
591
595
  print_row(op_data, col_widths, headers)
592
- advice = []
596
+ advice = generate_matmul_advice(op_data)
593
597
  color = "grey" if op_data["OP Code"].color == "grey" else "white"
594
598
 
595
- math_fidelity = (
596
- op_data["Math Fidelity"].raw_value.split()[0] if op_data["Math Fidelity"].raw_value else None
599
+ if advice:
600
+ for item in advice:
601
+ print(colored(f"- {item}", color))
602
+ else:
603
+ print(colored("✅ Optimized", color))
604
+ print() # Add a blank line between matmuls
605
+
606
+
607
+ def generate_matmul_advice(op_data):
608
+ advice = []
609
+
610
+ math_fidelity = (
611
+ op_data["Math Fidelity"].raw_value.split()[0] if op_data["Math Fidelity"].raw_value else None
612
+ )
613
+ output_datatype = op_data["Output Datatype"].raw_value
614
+ input_0_datatype = op_data["Input 0 Datatype"].raw_value
615
+ input_1_datatype = op_data["Input 1 Datatype"].raw_value
616
+ cores = op_data["Cores"].raw_value
617
+ fidelity_evaluation, fidelity_advice = evaluate_fidelity(
618
+ input_0_datatype, input_1_datatype, output_datatype, math_fidelity
619
+ )
620
+
621
+ if op_data["Bound"].raw_value in ["DRAM", "BOTH"]:
622
+ if not op_data["DRAM Sharded"].raw_value:
623
+ advice.append(
624
+ "Try a DRAM-sharded program config (MatmulMultiCoreReuseMultiCastDRAMShardedProgramConfig) to improve throughput further"
597
625
  )
598
- output_datatype = op_data["Output Datatype"].raw_value
599
- input_0_datatype = op_data["Input 0 Datatype"].raw_value
600
- input_1_datatype = op_data["Input 1 Datatype"].raw_value
601
- cores = op_data["Cores"].raw_value
602
- fidelity_evaluation, fidelity_advice = evaluate_fidelity(
603
- input_0_datatype, input_1_datatype, output_datatype, math_fidelity
626
+ if fidelity_evaluation == "too_low" and op_data["FLOPs %"].raw_value < 40:
627
+ advice.append(fidelity_advice)
628
+ if fidelity_evaluation == "too_high":
629
+ advice.append(fidelity_advice)
630
+ elif op_data["Bound"].raw_value in ["FLOP", "BOTH"]:
631
+ if cores < 64:
632
+ advice.append(f"Increase grid size (currently using {cores})")
633
+ if fidelity_evaluation == "too_high":
634
+ advice.append(fidelity_advice)
635
+ elif op_data["Bound"].raw_value == "SLOW":
636
+ input_0_memory = op_data["Input 0 Memory"].raw_value
637
+ if input_0_memory and "L1" not in input_0_memory:
638
+ advice.append(f"If possible place input 0 in L1 (currently in {input_0_memory})")
639
+
640
+ inner_dim_block = op_data["Inner Dim Block Size"].raw_value
641
+ out_h = op_data["Output Subblock H"].raw_value
642
+ out_w = op_data["Output Subblock W"].raw_value
643
+
644
+ if inner_dim_block is None and out_h is None and out_w is None:
645
+ advice.append(
646
+ "No program_config specified, try using one to override in0_block_w and out_subblock_h/w"
604
647
  )
648
+ else:
649
+ all_good = True
650
+ if inner_dim_block is not None:
651
+ if inner_dim_block < 2:
652
+ advice.append(f"in0_block_w={inner_dim_block} is small, try in0_block_w=2 or above")
653
+ all_good = False
654
+ else:
655
+ advice.append("No inner dim block size found")
656
+ all_good = False
605
657
 
606
- if op_data["Bound"].raw_value in ["DRAM", "BOTH"]:
607
- if not op_data["DRAM Sharded"].raw_value:
658
+ if out_h is not None and out_w is not None:
659
+ out_area = out_h * out_w
660
+ if out_area < 2:
608
661
  advice.append(
609
- "- Try a DRAM-sharded program config (MatmulMultiCoreReuseMultiCastDRAMShardedProgramConfig) to improve throughput further"
662
+ f"Output subblock {out_h}x{out_w} is small, try out_subblock_h * out_subblock_w >= 2 if possible"
610
663
  )
611
- if fidelity_evaluation == "too_low" and op_data["FLOPs %"].raw_value < 40:
612
- advice.append(f"- {fidelity_advice}")
613
- if fidelity_evaluation == "too_high":
614
- advice.append(f"- {fidelity_advice}")
615
- elif op_data["Bound"].raw_value in ["FLOP", "BOTH"]:
616
- if cores < 64:
617
- advice.append(f"- Increase grid size (currently using {cores})")
618
- if fidelity_evaluation == "too_high":
619
- advice.append(f"- {fidelity_advice}")
620
- elif op_data["Bound"].raw_value == "SLOW":
621
- input_0_memory = op_data["Input 0 Memory"].raw_value
622
- if input_0_memory and "L1" not in input_0_memory:
623
- advice.append(f"- If possible place input 0 in L1 (currently in {input_0_memory})")
624
-
625
- inner_dim_block = op_data["Inner Dim Block Size"].raw_value
626
- out_h = op_data["Output Subblock H"].raw_value
627
- out_w = op_data["Output Subblock W"].raw_value
628
-
629
- if inner_dim_block is None and out_h is None and out_w is None:
630
- advice.append(
631
- "- No program_config specified, try using one to override in0_block_w and out_subblock_h/w"
632
- )
633
- else:
634
- all_good = True
635
- if inner_dim_block is not None:
636
- if inner_dim_block < 2:
637
- advice.append(f"- in0_block_w={inner_dim_block} is small, try in0_block_w=2 or above")
638
- all_good = False
639
- else:
640
- advice.append("- No inner dim block size found")
641
- all_good = False
642
-
643
- if out_h is not None and out_w is not None:
644
- out_area = out_h * out_w
645
- if out_area < 2:
646
- advice.append(
647
- f"- Output subblock {out_h}x{out_w} is small, try out_subblock_h * out_subblock_w >= 2 if possible"
648
- )
649
- all_good = False
650
- else:
651
- advice.append("- No output subblock size found")
652
- all_good = False
653
-
654
- if all_good:
655
- advice.append(
656
- f"- in0_block_w={inner_dim_block} and output subblock {out_h}x{out_w} look good 🤷"
657
- )
658
- if fidelity_advice:
659
- advice.append(f"- {fidelity_advice}")
660
-
661
- if advice:
662
- for item in advice:
663
- print(colored(item, color))
664
+ all_good = False
664
665
  else:
665
- print(colored(" Optimized", color))
666
- print() # Add a blank line between matmuls
666
+ advice.append("No output subblock size found")
667
+ all_good = False
668
+
669
+ if all_good:
670
+ advice.append(
671
+ f"in0_block_w={inner_dim_block} and output subblock {out_h}x{out_w} look good 🤷"
672
+ )
673
+ if fidelity_advice:
674
+ advice.append(fidelity_advice)
675
+
676
+ return advice
667
677
 
668
678
 
669
679
  def merge_device_rows(df):
@@ -844,11 +854,18 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
844
854
  "Output Subblock H",
845
855
  "Output Subblock W",
846
856
  ]
857
+ if not no_advice:
858
+ all_headers.append("Advice")
847
859
  print(colored(f"Writing CSV output to {csv_output_file}", "cyan"))
848
860
  with open(csv_output_file, "w") as f:
849
- f.write(",".join(all_headers) + "\n")
861
+ csv_writer = csv.DictWriter(f, fieldnames=all_headers)
862
+ csv_writer.writeheader()
850
863
  for op_data in rows:
851
- f.write(",".join(str(op_data[header].raw_value) for header in all_headers) + "\n")
864
+ row = {header: op_data[header].raw_value for header in all_headers if header in op_data}
865
+ if not no_advice:
866
+ advice = generate_matmul_advice(op_data) if is_matmul_op(op_data) else ""
867
+ row["Advice"] = " • ".join(advice)
868
+ csv_writer.writerow(row)
852
869
  else:
853
870
  col_widths = [
854
871
  max(max(visible_length(str(row[header])) for row in rows), visible_length(header))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: tt-perf-report
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -0,0 +1,8 @@
1
+ tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
+ tt_perf_report/perf_report.py,sha256=K_AXtz8ZFFkhLLIoHz2jbuw6aFg1qsJbsHF7kQwL2GI,34620
3
+ tt_perf_report-1.0.3.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
+ tt_perf_report-1.0.3.dist-info/METADATA,sha256=mCbrFtPNT_MbvLO-Vt7ugY6dS-FhhhrtifOM98PbE6g,18305
5
+ tt_perf_report-1.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ tt_perf_report-1.0.3.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
7
+ tt_perf_report-1.0.3.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
8
+ tt_perf_report-1.0.3.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
- tt_perf_report/perf_report.py,sha256=q9TkHbLwqSrYQwAZGCvJWhGil2KJAf-z4s5V9II1IjI,34611
3
- tt_perf_report-1.0.1.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
- tt_perf_report-1.0.1.dist-info/METADATA,sha256=DqvSFCBy4O6mNByns9lwwArihTXr3VJhAqSbhq9Bu1w,18305
5
- tt_perf_report-1.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- tt_perf_report-1.0.1.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
7
- tt_perf_report-1.0.1.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
8
- tt_perf_report-1.0.1.dist-info/RECORD,,