tt-perf-report 1.0.7__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.0.7/src/tt_perf_report.egg-info → tt_perf_report-1.1.1}/PKG-INFO +2 -1
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/pyproject.toml +2 -2
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report/perf_report.py +179 -28
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1/src/tt_perf_report.egg-info}/PKG-INFO +2 -1
- tt_perf_report-1.1.1/src/tt_perf_report.egg-info/requires.txt +2 -0
- tt_perf_report-1.0.7/src/tt_perf_report.egg-info/requires.txt +0 -1
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/LICENSE +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/README.md +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/setup.cfg +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -218,6 +218,7 @@ Description-Content-Type: text/markdown
|
|
|
218
218
|
License-File: LICENSE
|
|
219
219
|
License-File: LICENSE_understanding.txt
|
|
220
220
|
Requires-Dist: pandas
|
|
221
|
+
Requires-Dist: matplotlib
|
|
221
222
|
Dynamic: license-file
|
|
222
223
|
|
|
223
224
|
# Performance Report Analysis Tool
|
|
@@ -4,12 +4,12 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.1.1"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
keywords = ["tenstorrent", "tt-metal"]
|
|
12
|
-
dependencies = ["pandas"]
|
|
12
|
+
dependencies = ["pandas", "matplotlib"]
|
|
13
13
|
|
|
14
14
|
[project.scripts]
|
|
15
15
|
tt-perf-report = "tt_perf_report.perf_report:main"
|
|
@@ -2,18 +2,54 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
|
|
5
|
-
import csv
|
|
6
|
-
import sys
|
|
7
5
|
import argparse
|
|
6
|
+
import csv
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
import os
|
|
8
9
|
import re
|
|
10
|
+
import sys
|
|
9
11
|
from typing import Any, Optional, Union
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
11
14
|
import pandas as pd
|
|
12
15
|
|
|
13
16
|
# Global variable to store color preference
|
|
14
17
|
color_output = None # None means auto-detect, True forces color, False forces no color
|
|
15
18
|
|
|
16
19
|
|
|
20
|
+
def get_value_physical_logical(input, is_physical : bool = True):
|
|
21
|
+
# Handle numeric inputs (old format)
|
|
22
|
+
if isinstance(input, (int, float)):
|
|
23
|
+
return int(input)
|
|
24
|
+
|
|
25
|
+
# Handle string inputs (new format)
|
|
26
|
+
if isinstance(input, str) and "[" in input and "]" in input:
|
|
27
|
+
physical_part = input.split("[")[0]
|
|
28
|
+
logical_part = input.split("[")[1].split("]")[0]
|
|
29
|
+
|
|
30
|
+
if is_physical:
|
|
31
|
+
return int(physical_part)
|
|
32
|
+
else:
|
|
33
|
+
return int(logical_part)
|
|
34
|
+
else:
|
|
35
|
+
# backwards compatibility - convert string to int
|
|
36
|
+
return int(input)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def detect_csv_format(df):
|
|
40
|
+
"""Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
|
|
41
|
+
v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
|
|
42
|
+
return "v2" if v2_columns else "v1"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_column_name(base_name, csv_format):
|
|
46
|
+
"""Get the appropriate column name based on CSV format version"""
|
|
47
|
+
if csv_format == "v2":
|
|
48
|
+
return f"{base_name}_PAD[LOGICAL]"
|
|
49
|
+
else:
|
|
50
|
+
return base_name
|
|
51
|
+
|
|
52
|
+
|
|
17
53
|
def set_color_output(force_color, force_no_color):
|
|
18
54
|
global color_output
|
|
19
55
|
if force_no_color:
|
|
@@ -205,35 +241,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
|
|
|
205
241
|
)
|
|
206
242
|
|
|
207
243
|
|
|
208
|
-
def analyze_matmul(row):
|
|
244
|
+
def analyze_matmul(row, csv_format="v2"):
|
|
209
245
|
input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
|
|
210
246
|
input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
|
|
211
247
|
|
|
212
248
|
total_data_size_bytes = 0
|
|
213
249
|
if input_0_from_dram:
|
|
214
250
|
total_data_size_bytes += (
|
|
215
|
-
row["INPUT_0_W"]
|
|
216
|
-
* row["INPUT_0_Y"]
|
|
217
|
-
* row["INPUT_0_Z"]
|
|
218
|
-
* row["INPUT_0_X"]
|
|
251
|
+
get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
|
|
252
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
|
|
253
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
254
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
219
255
|
* get_datatype_size(row["INPUT_0_DATATYPE"])
|
|
220
256
|
)
|
|
221
257
|
if input_1_from_dram:
|
|
222
258
|
total_data_size_bytes += (
|
|
223
|
-
row["INPUT_1_W"]
|
|
224
|
-
* row["INPUT_1_Y"]
|
|
225
|
-
* row["INPUT_1_Z"]
|
|
226
|
-
* row["INPUT_1_X"]
|
|
259
|
+
get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
|
|
260
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
|
|
261
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
|
|
262
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
227
263
|
* get_datatype_size(row["INPUT_1_DATATYPE"])
|
|
228
264
|
)
|
|
229
265
|
|
|
230
266
|
# Always include output if it's written to DRAM
|
|
231
267
|
if "DRAM" in row["OUTPUT_0_MEMORY"]:
|
|
232
268
|
total_data_size_bytes += (
|
|
233
|
-
row["OUTPUT_0_W"]
|
|
234
|
-
* row["OUTPUT_0_Y"]
|
|
235
|
-
* row["OUTPUT_0_Z"]
|
|
236
|
-
* row["OUTPUT_0_X"]
|
|
269
|
+
get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
|
|
270
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
271
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
|
|
272
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
|
|
237
273
|
* get_datatype_size(row["OUTPUT_0_DATATYPE"])
|
|
238
274
|
)
|
|
239
275
|
|
|
@@ -253,8 +289,8 @@ def analyze_matmul(row):
|
|
|
253
289
|
|
|
254
290
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
255
291
|
|
|
256
|
-
M, K, N =
|
|
257
|
-
W, Z =
|
|
292
|
+
M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
293
|
+
W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
258
294
|
|
|
259
295
|
flops = (M * K * N * W * Z * 2) / duration_s
|
|
260
296
|
|
|
@@ -312,7 +348,7 @@ def analyze_halo(row):
|
|
|
312
348
|
|
|
313
349
|
return config
|
|
314
350
|
|
|
315
|
-
def analyze_conv(row):
|
|
351
|
+
def analyze_conv(row, csv_format="v2"):
|
|
316
352
|
duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
|
|
317
353
|
|
|
318
354
|
core_count = 64 # we decided to normalize to the max core count
|
|
@@ -323,10 +359,10 @@ def analyze_conv(row):
|
|
|
323
359
|
|
|
324
360
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
325
361
|
|
|
326
|
-
NHW =
|
|
327
|
-
CH_IN =
|
|
362
|
+
NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
363
|
+
CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
328
364
|
W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
|
|
329
|
-
CH_OUT =
|
|
365
|
+
CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
330
366
|
|
|
331
367
|
M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
|
|
332
368
|
flops = (M * K * N * 2) / duration_s
|
|
@@ -372,7 +408,7 @@ def analyze_conv(row):
|
|
|
372
408
|
config,
|
|
373
409
|
)
|
|
374
410
|
|
|
375
|
-
def analyze_op(row, prev_row):
|
|
411
|
+
def analyze_op(row, prev_row, csv_format="v2"):
|
|
376
412
|
op_code = Cell(row["OP CODE"])
|
|
377
413
|
cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
|
|
378
414
|
device_time = Cell(
|
|
@@ -425,7 +461,7 @@ def analyze_op(row, prev_row):
|
|
|
425
461
|
math_fidelity,
|
|
426
462
|
is_dram_sharded,
|
|
427
463
|
adjusted_core_count, # Get the potentially adjusted core count
|
|
428
|
-
) = analyze_matmul(row)
|
|
464
|
+
) = analyze_matmul(row, csv_format)
|
|
429
465
|
op_code = Cell(f"{op_code.raw_value} {size}")
|
|
430
466
|
dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
|
|
431
467
|
dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
|
|
@@ -446,7 +482,7 @@ def analyze_op(row, prev_row):
|
|
|
446
482
|
memory_info,
|
|
447
483
|
math_fidelity,
|
|
448
484
|
config,
|
|
449
|
-
) = analyze_conv(row)
|
|
485
|
+
) = analyze_conv(row, csv_format)
|
|
450
486
|
op_code = Cell(f"{op_code.raw_value} {size} {config}")
|
|
451
487
|
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
452
488
|
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
@@ -800,6 +836,93 @@ def generate_matmul_advice(op_data):
|
|
|
800
836
|
return advice
|
|
801
837
|
|
|
802
838
|
|
|
839
|
+
def generate_stacked_report(rows, visible_headers, stack_by_input0_layout:bool = False):
|
|
840
|
+
if stack_by_input0_layout:
|
|
841
|
+
visible_headers.append("Input 0 Memory")
|
|
842
|
+
|
|
843
|
+
# Create a pandas DataFrame from rows and headers
|
|
844
|
+
data = {header: [row[header].raw_value for row in rows] for header in visible_headers}
|
|
845
|
+
df = pd.DataFrame(data)
|
|
846
|
+
|
|
847
|
+
if (stack_by_input0_layout):
|
|
848
|
+
df["OP Code Joined"] = df["OP Code"].str.split().str[0] \
|
|
849
|
+
+ " (in0:" + df["Input 0 Memory"].str.split('_').str[-2].str.lower() + "_" + df["Input 0 Memory"].str.split('_').str[-1].str.lower() + ")"
|
|
850
|
+
else:
|
|
851
|
+
df["OP Code Joined"] = df["OP Code"].str.split().str[0]
|
|
852
|
+
|
|
853
|
+
# Group by the joined OP Code and aggregate the data
|
|
854
|
+
stacked_df = df.groupby("OP Code Joined").agg(
|
|
855
|
+
Device_Time_Sum_us=("Device Time", "sum"),
|
|
856
|
+
Ops_Count=("Device Time", "count"),
|
|
857
|
+
Flops_min=("FLOPs %", "min"),
|
|
858
|
+
Flops_max=("FLOPs %", "max"),
|
|
859
|
+
Flops_mean=("FLOPs %", "mean"),
|
|
860
|
+
Flops_std=("FLOPs %", "std"),
|
|
861
|
+
).reset_index()
|
|
862
|
+
|
|
863
|
+
# Calculate the percentage of device time
|
|
864
|
+
total_device_time = stacked_df["Device_Time_Sum_us"].sum()
|
|
865
|
+
stacked_df["%"] = (stacked_df["Device_Time_Sum_us"] / total_device_time) * 100
|
|
866
|
+
# Reorder columns to move Device_Time_Percentage to be the 3rd column
|
|
867
|
+
cols = stacked_df.columns.tolist()
|
|
868
|
+
cols.insert(0, cols.pop(cols.index("%")))
|
|
869
|
+
stacked_df = stacked_df[cols]
|
|
870
|
+
# Sort the stacked dataframe by "Device_Time_Sum_us" in descending order
|
|
871
|
+
stacked_df = stacked_df.sort_values(by="Device_Time_Sum_us", ascending=False)
|
|
872
|
+
|
|
873
|
+
return stacked_df
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def print_stacked_report(stacked_df: pd.DataFrame):
|
|
877
|
+
print("\n📊 Stacked report 📊\n============\n")
|
|
878
|
+
print(stacked_df.to_string(index=False, float_format="%.2f"))
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def dump_stacked_report(stacked_df: pd.DataFrame, output_file: str):
|
|
882
|
+
stacked_df.to_csv(output_file, index=False, float_format="%.1f")
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
def plot_stacked_report(stacked_df: pd.DataFrame, output_file: str, threshold: float = 0.02):
|
|
886
|
+
# Prepare data for the stacked bar plot
|
|
887
|
+
device_time_sum = stacked_df["Device_Time_Sum_us"]
|
|
888
|
+
total_sum = device_time_sum.sum()
|
|
889
|
+
|
|
890
|
+
# Create a stacked bar plot
|
|
891
|
+
plt.figure(figsize=(6, 8), dpi=300)
|
|
892
|
+
width = 0.5
|
|
893
|
+
bottom = 0
|
|
894
|
+
colors = plt.cm.tab20.colors + plt.cm.tab20b.colors + plt.cm.tab20c.colors
|
|
895
|
+
|
|
896
|
+
for i, row in stacked_df.iterrows():
|
|
897
|
+
color = colors[i % len(colors)]
|
|
898
|
+
bar = plt.bar(1, row["Device_Time_Sum_us"], width, label=row["OP Code Joined"], bottom=bottom, color=color)
|
|
899
|
+
|
|
900
|
+
text = f"({row['%']:.1f}%) {row['OP Code Joined']} total={row['Device_Time_Sum_us']:.1f}us; {row['Ops_Count']} ops"
|
|
901
|
+
if not pd.isna(row["Flops_mean"]):
|
|
902
|
+
text += f"\n Util [{row['Flops_min']:.1f} - {row['Flops_max']:.1f}] {row['Flops_mean']:.1f} ± {row['Flops_std']:.1f} %"
|
|
903
|
+
|
|
904
|
+
# Add overlay text if the data is significant
|
|
905
|
+
if row["Device_Time_Sum_us"] >= total_sum * threshold:
|
|
906
|
+
plt.text(
|
|
907
|
+
bar[0].get_x() + bar[0].get_width() / 2,
|
|
908
|
+
bottom + row["Device_Time_Sum_us"] / 2,
|
|
909
|
+
text,
|
|
910
|
+
ha="center",
|
|
911
|
+
va="center",
|
|
912
|
+
fontsize=6,
|
|
913
|
+
color="white"
|
|
914
|
+
)
|
|
915
|
+
bottom += row["Device_Time_Sum_us"]
|
|
916
|
+
|
|
917
|
+
# Set plot labels and title
|
|
918
|
+
plt.xlim(1 - width / 2 - 0.05, 1 + width / 2 + 0.05)
|
|
919
|
+
plt.ylabel("Device Time [us]")
|
|
920
|
+
plt.title(f"Stacked Device Time (Total: {total_sum:.1f} us)")
|
|
921
|
+
plt.tight_layout()
|
|
922
|
+
|
|
923
|
+
# Save the plot to a file
|
|
924
|
+
plt.savefig(output_file)
|
|
925
|
+
|
|
803
926
|
def merge_device_rows(df):
|
|
804
927
|
block_by_device = defaultdict(list)
|
|
805
928
|
|
|
@@ -901,7 +1024,7 @@ def main():
|
|
|
901
1024
|
args, id_range = parse_args()
|
|
902
1025
|
generate_perf_report(
|
|
903
1026
|
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice,
|
|
904
|
-
args.tracing_mode, args.raw_op_codes, args.no_host_ops)
|
|
1027
|
+
args.tracing_mode, args.raw_op_codes, args.no_host_ops, args.no_stacked_report, args.no_stack_by_in0, args.stacked_csv)
|
|
905
1028
|
|
|
906
1029
|
|
|
907
1030
|
def parse_args():
|
|
@@ -924,6 +1047,12 @@ def parse_args():
|
|
|
924
1047
|
parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
|
|
925
1048
|
parser.add_argument("--raw-op-codes", action="store_true", help="Include raw op codes in output")
|
|
926
1049
|
parser.add_argument("--no-host-ops", action="store_true", help="Do not include host ops in output")
|
|
1050
|
+
parser.add_argument("--no-stacked-report", action="store_true", help="Do not generate a stacked report")
|
|
1051
|
+
parser.add_argument("--no-stack-by-in0", action="store_true",
|
|
1052
|
+
help="Do not group the stacked report by the layout of Input 0 (extracted from the Input 0 Memory column)"
|
|
1053
|
+
)
|
|
1054
|
+
parser.add_argument("--stacked-csv", type=str,
|
|
1055
|
+
help="Output filename for the stacked report CSV; Defaults to OUTPUT_FILE_stacked.csv", metavar="STACKED_FILE")
|
|
927
1056
|
|
|
928
1057
|
args = parser.parse_args()
|
|
929
1058
|
|
|
@@ -942,9 +1071,15 @@ def parse_args():
|
|
|
942
1071
|
|
|
943
1072
|
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
944
1073
|
id_range, csv_output_file, no_advice, tracing_mode,
|
|
945
|
-
raw_op_codes, no_host_ops):
|
|
1074
|
+
raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
|
|
946
1075
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
947
1076
|
|
|
1077
|
+
# Detect CSV format version
|
|
1078
|
+
csv_format = detect_csv_format(df)
|
|
1079
|
+
|
|
1080
|
+
if csv_format != "v2":
|
|
1081
|
+
print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
|
|
1082
|
+
|
|
948
1083
|
# Add a column for original row numbers
|
|
949
1084
|
df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
|
|
950
1085
|
|
|
@@ -968,7 +1103,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
968
1103
|
device_ops = 0
|
|
969
1104
|
host_ops = 0
|
|
970
1105
|
for _, row in df.iterrows():
|
|
971
|
-
op_data, current_gap = analyze_op(row, prev_row)
|
|
1106
|
+
op_data, current_gap = analyze_op(row, prev_row, csv_format)
|
|
972
1107
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
973
1108
|
op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
|
|
974
1109
|
if raw_op_codes:
|
|
@@ -1046,6 +1181,22 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1046
1181
|
if not no_advice:
|
|
1047
1182
|
print_advice_section(rows, visible_headers, col_widths)
|
|
1048
1183
|
|
|
1184
|
+
# handle stacked report generation
|
|
1185
|
+
if not(no_stacked_report):
|
|
1186
|
+
stacked_report = generate_stacked_report(rows, visible_headers, not(no_stack_by_in0))
|
|
1187
|
+
|
|
1188
|
+
if not(csv_output_file):
|
|
1189
|
+
print_stacked_report(stacked_report)
|
|
1190
|
+
if stacked_report_file or csv_output_file:
|
|
1191
|
+
if not stacked_report_file:
|
|
1192
|
+
base_stacked_report_file = f"{os.path.splitext(csv_output_file)[0]}_stacked"
|
|
1193
|
+
else:
|
|
1194
|
+
base_stacked_report_file = os.path.splitext(stacked_report_file)[0]
|
|
1195
|
+
print(colored(f"Writing CSV stacked report to {base_stacked_report_file}.csv", "cyan"))
|
|
1196
|
+
dump_stacked_report(stacked_report, f"{base_stacked_report_file}.csv")
|
|
1197
|
+
print(colored(f"Plotting PNG stacked report to {base_stacked_report_file}.png", "cyan"))
|
|
1198
|
+
plot_stacked_report(stacked_report, f"{base_stacked_report_file}.png")
|
|
1199
|
+
|
|
1049
1200
|
|
|
1050
1201
|
def is_host_op(op_data):
|
|
1051
1202
|
return "(torch)" in op_data["OP Code"].raw_value
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -218,6 +218,7 @@ Description-Content-Type: text/markdown
|
|
|
218
218
|
License-File: LICENSE
|
|
219
219
|
License-File: LICENSE_understanding.txt
|
|
220
220
|
Requires-Dist: pandas
|
|
221
|
+
Requires-Dist: matplotlib
|
|
221
222
|
Dynamic: license-file
|
|
222
223
|
|
|
223
224
|
# Performance Report Analysis Tool
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
pandas
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.0.7 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|