tt-perf-report 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- tt_perf_report/perf_report.py +88 -71
- {tt_perf_report-1.0.1.dist-info → tt_perf_report-1.0.3.dist-info}/METADATA +1 -1
- tt_perf_report-1.0.3.dist-info/RECORD +8 -0
- tt_perf_report-1.0.1.dist-info/RECORD +0 -8
- {tt_perf_report-1.0.1.dist-info → tt_perf_report-1.0.3.dist-info}/LICENSE +0 -0
- {tt_perf_report-1.0.1.dist-info → tt_perf_report-1.0.3.dist-info}/WHEEL +0 -0
- {tt_perf_report-1.0.1.dist-info → tt_perf_report-1.0.3.dist-info}/entry_points.txt +0 -0
- {tt_perf_report-1.0.1.dist-info → tt_perf_report-1.0.3.dist-info}/top_level.txt +0 -0
tt_perf_report/perf_report.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
|
|
5
|
-
|
|
5
|
+
import csv
|
|
6
6
|
import sys
|
|
7
7
|
import argparse
|
|
8
8
|
import re
|
|
@@ -582,88 +582,98 @@ def print_op_to_op_gap_advice(rows, headers, col_widths):
|
|
|
582
582
|
)
|
|
583
583
|
|
|
584
584
|
|
|
585
|
+
def is_matmul_op(op_data):
|
|
586
|
+
return "Matmul" in op_data["OP Code"].raw_value
|
|
587
|
+
|
|
588
|
+
|
|
585
589
|
def print_matmul_advice(rows, headers, col_widths):
|
|
586
|
-
matmul_ops = [op_data for op_data in rows if
|
|
590
|
+
matmul_ops = [op_data for op_data in rows if is_matmul_op(op_data)]
|
|
587
591
|
|
|
588
592
|
if matmul_ops:
|
|
589
593
|
print("Matmul Optimization\n-------------------")
|
|
590
594
|
for op_data in matmul_ops:
|
|
591
595
|
print_row(op_data, col_widths, headers)
|
|
592
|
-
advice =
|
|
596
|
+
advice = generate_matmul_advice(op_data)
|
|
593
597
|
color = "grey" if op_data["OP Code"].color == "grey" else "white"
|
|
594
598
|
|
|
595
|
-
|
|
596
|
-
|
|
599
|
+
if advice:
|
|
600
|
+
for item in advice:
|
|
601
|
+
print(colored(f"- {item}", color))
|
|
602
|
+
else:
|
|
603
|
+
print(colored("✅ Optimized", color))
|
|
604
|
+
print() # Add a blank line between matmuls
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def generate_matmul_advice(op_data):
|
|
608
|
+
advice = []
|
|
609
|
+
|
|
610
|
+
math_fidelity = (
|
|
611
|
+
op_data["Math Fidelity"].raw_value.split()[0] if op_data["Math Fidelity"].raw_value else None
|
|
612
|
+
)
|
|
613
|
+
output_datatype = op_data["Output Datatype"].raw_value
|
|
614
|
+
input_0_datatype = op_data["Input 0 Datatype"].raw_value
|
|
615
|
+
input_1_datatype = op_data["Input 1 Datatype"].raw_value
|
|
616
|
+
cores = op_data["Cores"].raw_value
|
|
617
|
+
fidelity_evaluation, fidelity_advice = evaluate_fidelity(
|
|
618
|
+
input_0_datatype, input_1_datatype, output_datatype, math_fidelity
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
if op_data["Bound"].raw_value in ["DRAM", "BOTH"]:
|
|
622
|
+
if not op_data["DRAM Sharded"].raw_value:
|
|
623
|
+
advice.append(
|
|
624
|
+
"Try a DRAM-sharded program config (MatmulMultiCoreReuseMultiCastDRAMShardedProgramConfig) to improve throughput further"
|
|
597
625
|
)
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
626
|
+
if fidelity_evaluation == "too_low" and op_data["FLOPs %"].raw_value < 40:
|
|
627
|
+
advice.append(fidelity_advice)
|
|
628
|
+
if fidelity_evaluation == "too_high":
|
|
629
|
+
advice.append(fidelity_advice)
|
|
630
|
+
elif op_data["Bound"].raw_value in ["FLOP", "BOTH"]:
|
|
631
|
+
if cores < 64:
|
|
632
|
+
advice.append(f"Increase grid size (currently using {cores})")
|
|
633
|
+
if fidelity_evaluation == "too_high":
|
|
634
|
+
advice.append(fidelity_advice)
|
|
635
|
+
elif op_data["Bound"].raw_value == "SLOW":
|
|
636
|
+
input_0_memory = op_data["Input 0 Memory"].raw_value
|
|
637
|
+
if input_0_memory and "L1" not in input_0_memory:
|
|
638
|
+
advice.append(f"If possible place input 0 in L1 (currently in {input_0_memory})")
|
|
639
|
+
|
|
640
|
+
inner_dim_block = op_data["Inner Dim Block Size"].raw_value
|
|
641
|
+
out_h = op_data["Output Subblock H"].raw_value
|
|
642
|
+
out_w = op_data["Output Subblock W"].raw_value
|
|
643
|
+
|
|
644
|
+
if inner_dim_block is None and out_h is None and out_w is None:
|
|
645
|
+
advice.append(
|
|
646
|
+
"No program_config specified, try using one to override in0_block_w and out_subblock_h/w"
|
|
604
647
|
)
|
|
648
|
+
else:
|
|
649
|
+
all_good = True
|
|
650
|
+
if inner_dim_block is not None:
|
|
651
|
+
if inner_dim_block < 2:
|
|
652
|
+
advice.append(f"in0_block_w={inner_dim_block} is small, try in0_block_w=2 or above")
|
|
653
|
+
all_good = False
|
|
654
|
+
else:
|
|
655
|
+
advice.append("No inner dim block size found")
|
|
656
|
+
all_good = False
|
|
605
657
|
|
|
606
|
-
if
|
|
607
|
-
|
|
658
|
+
if out_h is not None and out_w is not None:
|
|
659
|
+
out_area = out_h * out_w
|
|
660
|
+
if out_area < 2:
|
|
608
661
|
advice.append(
|
|
609
|
-
"
|
|
662
|
+
f"Output subblock {out_h}x{out_w} is small, try out_subblock_h * out_subblock_w >= 2 if possible"
|
|
610
663
|
)
|
|
611
|
-
|
|
612
|
-
advice.append(f"- {fidelity_advice}")
|
|
613
|
-
if fidelity_evaluation == "too_high":
|
|
614
|
-
advice.append(f"- {fidelity_advice}")
|
|
615
|
-
elif op_data["Bound"].raw_value in ["FLOP", "BOTH"]:
|
|
616
|
-
if cores < 64:
|
|
617
|
-
advice.append(f"- Increase grid size (currently using {cores})")
|
|
618
|
-
if fidelity_evaluation == "too_high":
|
|
619
|
-
advice.append(f"- {fidelity_advice}")
|
|
620
|
-
elif op_data["Bound"].raw_value == "SLOW":
|
|
621
|
-
input_0_memory = op_data["Input 0 Memory"].raw_value
|
|
622
|
-
if input_0_memory and "L1" not in input_0_memory:
|
|
623
|
-
advice.append(f"- If possible place input 0 in L1 (currently in {input_0_memory})")
|
|
624
|
-
|
|
625
|
-
inner_dim_block = op_data["Inner Dim Block Size"].raw_value
|
|
626
|
-
out_h = op_data["Output Subblock H"].raw_value
|
|
627
|
-
out_w = op_data["Output Subblock W"].raw_value
|
|
628
|
-
|
|
629
|
-
if inner_dim_block is None and out_h is None and out_w is None:
|
|
630
|
-
advice.append(
|
|
631
|
-
"- No program_config specified, try using one to override in0_block_w and out_subblock_h/w"
|
|
632
|
-
)
|
|
633
|
-
else:
|
|
634
|
-
all_good = True
|
|
635
|
-
if inner_dim_block is not None:
|
|
636
|
-
if inner_dim_block < 2:
|
|
637
|
-
advice.append(f"- in0_block_w={inner_dim_block} is small, try in0_block_w=2 or above")
|
|
638
|
-
all_good = False
|
|
639
|
-
else:
|
|
640
|
-
advice.append("- No inner dim block size found")
|
|
641
|
-
all_good = False
|
|
642
|
-
|
|
643
|
-
if out_h is not None and out_w is not None:
|
|
644
|
-
out_area = out_h * out_w
|
|
645
|
-
if out_area < 2:
|
|
646
|
-
advice.append(
|
|
647
|
-
f"- Output subblock {out_h}x{out_w} is small, try out_subblock_h * out_subblock_w >= 2 if possible"
|
|
648
|
-
)
|
|
649
|
-
all_good = False
|
|
650
|
-
else:
|
|
651
|
-
advice.append("- No output subblock size found")
|
|
652
|
-
all_good = False
|
|
653
|
-
|
|
654
|
-
if all_good:
|
|
655
|
-
advice.append(
|
|
656
|
-
f"- in0_block_w={inner_dim_block} and output subblock {out_h}x{out_w} look good 🤷"
|
|
657
|
-
)
|
|
658
|
-
if fidelity_advice:
|
|
659
|
-
advice.append(f"- {fidelity_advice}")
|
|
660
|
-
|
|
661
|
-
if advice:
|
|
662
|
-
for item in advice:
|
|
663
|
-
print(colored(item, color))
|
|
664
|
+
all_good = False
|
|
664
665
|
else:
|
|
665
|
-
|
|
666
|
-
|
|
666
|
+
advice.append("No output subblock size found")
|
|
667
|
+
all_good = False
|
|
668
|
+
|
|
669
|
+
if all_good:
|
|
670
|
+
advice.append(
|
|
671
|
+
f"in0_block_w={inner_dim_block} and output subblock {out_h}x{out_w} look good 🤷"
|
|
672
|
+
)
|
|
673
|
+
if fidelity_advice:
|
|
674
|
+
advice.append(fidelity_advice)
|
|
675
|
+
|
|
676
|
+
return advice
|
|
667
677
|
|
|
668
678
|
|
|
669
679
|
def merge_device_rows(df):
|
|
@@ -844,11 +854,18 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
844
854
|
"Output Subblock H",
|
|
845
855
|
"Output Subblock W",
|
|
846
856
|
]
|
|
857
|
+
if not no_advice:
|
|
858
|
+
all_headers.append("Advice")
|
|
847
859
|
print(colored(f"Writing CSV output to {csv_output_file}", "cyan"))
|
|
848
860
|
with open(csv_output_file, "w") as f:
|
|
849
|
-
|
|
861
|
+
csv_writer = csv.DictWriter(f, fieldnames=all_headers)
|
|
862
|
+
csv_writer.writeheader()
|
|
850
863
|
for op_data in rows:
|
|
851
|
-
|
|
864
|
+
row = {header: op_data[header].raw_value for header in all_headers if header in op_data}
|
|
865
|
+
if not no_advice:
|
|
866
|
+
advice = generate_matmul_advice(op_data) if is_matmul_op(op_data) else ""
|
|
867
|
+
row["Advice"] = " • ".join(advice)
|
|
868
|
+
csv_writer.writerow(row)
|
|
852
869
|
else:
|
|
853
870
|
col_widths = [
|
|
854
871
|
max(max(visible_length(str(row[header])) for row in rows), visible_length(header))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
+
tt_perf_report/perf_report.py,sha256=K_AXtz8ZFFkhLLIoHz2jbuw6aFg1qsJbsHF7kQwL2GI,34620
|
|
3
|
+
tt_perf_report-1.0.3.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
+
tt_perf_report-1.0.3.dist-info/METADATA,sha256=mCbrFtPNT_MbvLO-Vt7ugY6dS-FhhhrtifOM98PbE6g,18305
|
|
5
|
+
tt_perf_report-1.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
6
|
+
tt_perf_report-1.0.3.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
7
|
+
tt_perf_report-1.0.3.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
8
|
+
tt_perf_report-1.0.3.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
-
tt_perf_report/perf_report.py,sha256=q9TkHbLwqSrYQwAZGCvJWhGil2KJAf-z4s5V9II1IjI,34611
|
|
3
|
-
tt_perf_report-1.0.1.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
-
tt_perf_report-1.0.1.dist-info/METADATA,sha256=DqvSFCBy4O6mNByns9lwwArihTXr3VJhAqSbhq9Bu1w,18305
|
|
5
|
-
tt_perf_report-1.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
6
|
-
tt_perf_report-1.0.1.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
7
|
-
tt_perf_report-1.0.1.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
8
|
-
tt_perf_report-1.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|