tt-perf-report 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- tt_perf_report/perf_report.py +72 -15
- {tt_perf_report-1.0.3.dist-info → tt_perf_report-1.0.4.dist-info}/METADATA +1 -1
- tt_perf_report-1.0.4.dist-info/RECORD +8 -0
- {tt_perf_report-1.0.3.dist-info → tt_perf_report-1.0.4.dist-info}/WHEEL +1 -1
- tt_perf_report-1.0.3.dist-info/RECORD +0 -8
- {tt_perf_report-1.0.3.dist-info → tt_perf_report-1.0.4.dist-info}/LICENSE +0 -0
- {tt_perf_report-1.0.3.dist-info → tt_perf_report-1.0.4.dist-info}/entry_points.txt +0 -0
- {tt_perf_report-1.0.3.dist-info → tt_perf_report-1.0.4.dist-info}/top_level.txt +0 -0
tt_perf_report/perf_report.py
CHANGED
|
@@ -38,6 +38,7 @@ def colored(text, color):
|
|
|
38
38
|
"yellow": "\033[38;5;11m",
|
|
39
39
|
"blue": "\033[38;5;12m",
|
|
40
40
|
"magenta": "\033[38;5;13m",
|
|
41
|
+
"orange": "\033[38;5;208m",
|
|
41
42
|
"cyan": "\033[38;5;14m",
|
|
42
43
|
"white": "\033[38;5;15m",
|
|
43
44
|
"end": "\033[0m",
|
|
@@ -70,7 +71,7 @@ class Cell:
|
|
|
70
71
|
if self.raw_value is None or pd.isna(self.raw_value):
|
|
71
72
|
return ""
|
|
72
73
|
|
|
73
|
-
if isinstance(self.raw_value, str) and "Matmul" in self.raw_value:
|
|
74
|
+
if isinstance(self.raw_value, str) and ("Matmul" in self.raw_value or "OptimizedConvNew" in self.raw_value):
|
|
74
75
|
parts = self.raw_value.split(maxsplit=1)
|
|
75
76
|
op_name = parts[0]
|
|
76
77
|
size = parts[1] if len(parts) > 1 else ""
|
|
@@ -275,6 +276,37 @@ def analyze_matmul(row):
|
|
|
275
276
|
core_count, # Return the potentially adjusted core count
|
|
276
277
|
)
|
|
277
278
|
|
|
279
|
+
def analyze_conv(row):
|
|
280
|
+
duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
|
|
281
|
+
|
|
282
|
+
core_count = 64 # we decided to normalize to the max core count
|
|
283
|
+
math_fidelity = row["MATH FIDELITY"]
|
|
284
|
+
|
|
285
|
+
# Check for DRAM-sharded program config
|
|
286
|
+
attributes = row["ATTRIBUTES"] if pd.notna(row["ATTRIBUTES"]) else ""
|
|
287
|
+
|
|
288
|
+
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
289
|
+
|
|
290
|
+
NHW = int(row["OUTPUT_0_Y"])
|
|
291
|
+
CH_IN = int(row["INPUT_0_X"])
|
|
292
|
+
W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
|
|
293
|
+
CH_OUT = int(row["INPUT_1_X"])
|
|
294
|
+
|
|
295
|
+
M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
|
|
296
|
+
flops = (M * K * N * 2) / duration_s
|
|
297
|
+
|
|
298
|
+
size = f"{M} x {K} x {N}"
|
|
299
|
+
memory_info = f"({row['INPUT_0_DATATYPE']} {row['INPUT_0_MEMORY'].replace('DEV_0_', '')} @ {row['INPUT_1_DATATYPE']} {row['INPUT_1_MEMORY'].replace('DEV_0_', '')} => {row['OUTPUT_0_DATATYPE']} {row['OUTPUT_0_MEMORY'].replace('DEV_0_', '')})"
|
|
300
|
+
|
|
301
|
+
flops_percentage = (flops / peak_flops_value) * 100
|
|
302
|
+
|
|
303
|
+
return (
|
|
304
|
+
flops,
|
|
305
|
+
flops_percentage,
|
|
306
|
+
size,
|
|
307
|
+
memory_info,
|
|
308
|
+
math_fidelity
|
|
309
|
+
)
|
|
278
310
|
|
|
279
311
|
def analyze_op(row, prev_row):
|
|
280
312
|
op_code = Cell(row["OP CODE"])
|
|
@@ -305,6 +337,19 @@ def analyze_op(row, prev_row):
|
|
|
305
337
|
input_1_datatype_cell = Cell(input_1_datatype)
|
|
306
338
|
short_name = lambda n: {"BFLOAT16": "BF16", "BFLOAT8_B": "BFP8", "BFLOAT4_B": "BFP4"}.get(n, n)
|
|
307
339
|
|
|
340
|
+
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
341
|
+
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
342
|
+
flops = Cell(None, unit="TFLOPs", decimals=1)
|
|
343
|
+
flops_percentage = Cell(None, unit="%", decimals=1)
|
|
344
|
+
|
|
345
|
+
math_fidelity = ""
|
|
346
|
+
math_fidelity += f"{short_name(input_0_datatype)}" if pd.notna(input_0_datatype) else ""
|
|
347
|
+
math_fidelity += f", {short_name(input_1_datatype)}" if pd.notna(input_1_datatype) else ""
|
|
348
|
+
math_fidelity += f" => {short_name(output_datatype)}" if pd.notna(output_datatype) else ""
|
|
349
|
+
math_fidelity_cell = Cell(math_fidelity.strip())
|
|
350
|
+
|
|
351
|
+
is_dram_sharded = False
|
|
352
|
+
|
|
308
353
|
if "Matmul" in op_code.raw_value:
|
|
309
354
|
(
|
|
310
355
|
dram_speed,
|
|
@@ -329,19 +374,24 @@ def analyze_op(row, prev_row):
|
|
|
329
374
|
if math_fidelity
|
|
330
375
|
else None
|
|
331
376
|
)
|
|
332
|
-
|
|
377
|
+
elif "OptimizedConvNew" in op_code.raw_value:
|
|
378
|
+
(
|
|
379
|
+
flops,
|
|
380
|
+
flops_percentage,
|
|
381
|
+
size,
|
|
382
|
+
memory_info,
|
|
383
|
+
math_fidelity,
|
|
384
|
+
) = analyze_conv(row)
|
|
385
|
+
op_code = Cell(f"{op_code.raw_value} {size}")
|
|
333
386
|
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
334
387
|
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
335
|
-
flops = Cell(None, unit="TFLOPs", decimals=1)
|
|
336
|
-
flops_percentage = Cell(
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
math_fidelity_cell = Cell(math_fidelity.strip())
|
|
343
|
-
|
|
344
|
-
is_dram_sharded = False
|
|
388
|
+
flops = Cell(flops / 1e12 if pd.notna(flops) else None, unit="TFLOPs", decimals=1)
|
|
389
|
+
flops_percentage = Cell(flops_percentage, unit="%", decimals=1)
|
|
390
|
+
math_fidelity_cell = Cell(
|
|
391
|
+
f"{math_fidelity} {short_name(input_0_datatype)} x {short_name(input_1_datatype)} => {short_name(output_datatype)}".strip()
|
|
392
|
+
if math_fidelity
|
|
393
|
+
else None
|
|
394
|
+
)
|
|
345
395
|
|
|
346
396
|
output = {
|
|
347
397
|
"ID": None,
|
|
@@ -434,6 +484,7 @@ def color_row(op_data, percentage, min_percentage):
|
|
|
434
484
|
op_colors = {
|
|
435
485
|
"(torch)": "red",
|
|
436
486
|
"Matmul": "magenta",
|
|
487
|
+
"OptimizedConvNew" : "orange",
|
|
437
488
|
"LayerNorm": "cyan",
|
|
438
489
|
"AllGather": "cyan",
|
|
439
490
|
"AllReduce": "cyan",
|
|
@@ -484,7 +535,8 @@ def color_row(op_data, percentage, min_percentage):
|
|
|
484
535
|
if op_data["Op-to-Op Gap"].raw_value is not None and op_data["Op-to-Op Gap"].raw_value > 6.5:
|
|
485
536
|
op_data["Op-to-Op Gap"].color = "red"
|
|
486
537
|
|
|
487
|
-
if "Matmul" in op_data["OP Code"].raw_value
|
|
538
|
+
if ("Matmul" in op_data["OP Code"].raw_value
|
|
539
|
+
or "OptimizedConvNew" in op_data["OP Code"].raw_value) and op_data["Math Fidelity"].raw_value:
|
|
488
540
|
math_fidelity = op_data["Math Fidelity"].raw_value.split()[0]
|
|
489
541
|
input_0_datatype = op_data["Input 0 Datatype"].raw_value
|
|
490
542
|
input_1_datatype = op_data["Input 1 Datatype"].raw_value
|
|
@@ -743,7 +795,7 @@ def filter_by_id_range(rows, id_range):
|
|
|
743
795
|
def main():
|
|
744
796
|
args, id_range = parse_args()
|
|
745
797
|
generate_perf_report(
|
|
746
|
-
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode
|
|
798
|
+
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice, args.tracing_mode, args.raw_op_codes,
|
|
747
799
|
)
|
|
748
800
|
|
|
749
801
|
|
|
@@ -765,6 +817,7 @@ def parse_args():
|
|
|
765
817
|
parser.add_argument("--csv", type=str, help="Output filename for CSV format", metavar="OUTPUT_FILE")
|
|
766
818
|
parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
|
|
767
819
|
parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
|
|
820
|
+
parser.add_argument("--raw-op-codes", action="store_true", help="Include raw op codes in output")
|
|
768
821
|
args = parser.parse_args()
|
|
769
822
|
|
|
770
823
|
# Set the global color_output variable
|
|
@@ -780,7 +833,7 @@ def parse_args():
|
|
|
780
833
|
return args, id_range
|
|
781
834
|
|
|
782
835
|
|
|
783
|
-
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode):
|
|
836
|
+
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, id_range, csv_output_file, no_advice, tracing_mode, raw_op_codes):
|
|
784
837
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
785
838
|
|
|
786
839
|
# Add a column for original row numbers
|
|
@@ -808,6 +861,8 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
808
861
|
for _, row in df.iterrows():
|
|
809
862
|
op_data, current_gap = analyze_op(row, prev_row)
|
|
810
863
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
864
|
+
if raw_op_codes:
|
|
865
|
+
op_data["Raw OP Code"] = Cell(row["OP CODE"])
|
|
811
866
|
rows.append(op_data)
|
|
812
867
|
prev_row = row
|
|
813
868
|
|
|
@@ -856,6 +911,8 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
856
911
|
]
|
|
857
912
|
if not no_advice:
|
|
858
913
|
all_headers.append("Advice")
|
|
914
|
+
if raw_op_codes:
|
|
915
|
+
all_headers.append("Raw OP Code")
|
|
859
916
|
print(colored(f"Writing CSV output to {csv_output_file}", "cyan"))
|
|
860
917
|
with open(csv_output_file, "w") as f:
|
|
861
918
|
csv_writer = csv.DictWriter(f, fieldnames=all_headers)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
+
tt_perf_report/perf_report.py,sha256=MJHgosbw7kD7MrJ6-ZtLhdny_iX9bR_UB-ZaHoyjTsE,36952
|
|
3
|
+
tt_perf_report-1.0.4.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
+
tt_perf_report-1.0.4.dist-info/METADATA,sha256=fZKE2o9edCU0N6v-_tkUqSZ39j8mimeGBU_SIuuFi84,18305
|
|
5
|
+
tt_perf_report-1.0.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
6
|
+
tt_perf_report-1.0.4.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
7
|
+
tt_perf_report-1.0.4.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
8
|
+
tt_perf_report-1.0.4.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
|
|
2
|
-
tt_perf_report/perf_report.py,sha256=K_AXtz8ZFFkhLLIoHz2jbuw6aFg1qsJbsHF7kQwL2GI,34620
|
|
3
|
-
tt_perf_report-1.0.3.dist-info/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
|
|
4
|
-
tt_perf_report-1.0.3.dist-info/METADATA,sha256=mCbrFtPNT_MbvLO-Vt7ugY6dS-FhhhrtifOM98PbE6g,18305
|
|
5
|
-
tt_perf_report-1.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
6
|
-
tt_perf_report-1.0.3.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
|
|
7
|
-
tt_perf_report-1.0.3.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
|
|
8
|
-
tt_perf_report-1.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|