tt-perf-report 1.1.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.1.0/src/tt_perf_report.egg-info → tt_perf_report-1.1.1}/PKG-INFO +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/pyproject.toml +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report/perf_report.py +53 -26
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1/src/tt_perf_report.egg-info}/PKG-INFO +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/LICENSE +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/README.md +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/setup.cfg +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.1"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -16,8 +16,14 @@ import pandas as pd
|
|
|
16
16
|
# Global variable to store color preference
|
|
17
17
|
color_output = None # None means auto-detect, True forces color, False forces no color
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
|
|
20
|
+
def get_value_physical_logical(input, is_physical : bool = True):
|
|
21
|
+
# Handle numeric inputs (old format)
|
|
22
|
+
if isinstance(input, (int, float)):
|
|
23
|
+
return int(input)
|
|
24
|
+
|
|
25
|
+
# Handle string inputs (new format)
|
|
26
|
+
if isinstance(input, str) and "[" in input and "]" in input:
|
|
21
27
|
physical_part = input.split("[")[0]
|
|
22
28
|
logical_part = input.split("[")[1].split("]")[0]
|
|
23
29
|
|
|
@@ -26,9 +32,24 @@ def get_value_physical_logical(input : str, is_physical : bool = True):
|
|
|
26
32
|
else:
|
|
27
33
|
return int(logical_part)
|
|
28
34
|
else:
|
|
29
|
-
#
|
|
35
|
+
# backwards compatibility - convert string to int
|
|
30
36
|
return int(input)
|
|
31
37
|
|
|
38
|
+
|
|
39
|
+
def detect_csv_format(df):
|
|
40
|
+
"""Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
|
|
41
|
+
v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
|
|
42
|
+
return "v2" if v2_columns else "v1"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_column_name(base_name, csv_format):
|
|
46
|
+
"""Get the appropriate column name based on CSV format version"""
|
|
47
|
+
if csv_format == "v2":
|
|
48
|
+
return f"{base_name}_PAD[LOGICAL]"
|
|
49
|
+
else:
|
|
50
|
+
return base_name
|
|
51
|
+
|
|
52
|
+
|
|
32
53
|
def set_color_output(force_color, force_no_color):
|
|
33
54
|
global color_output
|
|
34
55
|
if force_no_color:
|
|
@@ -220,35 +241,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
|
|
|
220
241
|
)
|
|
221
242
|
|
|
222
243
|
|
|
223
|
-
def analyze_matmul(row):
|
|
244
|
+
def analyze_matmul(row, csv_format="v2"):
|
|
224
245
|
input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
|
|
225
246
|
input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
|
|
226
247
|
|
|
227
248
|
total_data_size_bytes = 0
|
|
228
249
|
if input_0_from_dram:
|
|
229
250
|
total_data_size_bytes += (
|
|
230
|
-
get_value_physical_logical(row["
|
|
231
|
-
* get_value_physical_logical(row["
|
|
232
|
-
* get_value_physical_logical(row["
|
|
233
|
-
* get_value_physical_logical(row["
|
|
251
|
+
get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
|
|
252
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
|
|
253
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
254
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
234
255
|
* get_datatype_size(row["INPUT_0_DATATYPE"])
|
|
235
256
|
)
|
|
236
257
|
if input_1_from_dram:
|
|
237
258
|
total_data_size_bytes += (
|
|
238
|
-
get_value_physical_logical(row["
|
|
239
|
-
* get_value_physical_logical(row["
|
|
240
|
-
* get_value_physical_logical(row["
|
|
241
|
-
* get_value_physical_logical(row["
|
|
259
|
+
get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
|
|
260
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
|
|
261
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
|
|
262
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
242
263
|
* get_datatype_size(row["INPUT_1_DATATYPE"])
|
|
243
264
|
)
|
|
244
265
|
|
|
245
266
|
# Always include output if it's written to DRAM
|
|
246
267
|
if "DRAM" in row["OUTPUT_0_MEMORY"]:
|
|
247
268
|
total_data_size_bytes += (
|
|
248
|
-
get_value_physical_logical(row["
|
|
249
|
-
* get_value_physical_logical(row["
|
|
250
|
-
* get_value_physical_logical(row["
|
|
251
|
-
* get_value_physical_logical(row["
|
|
269
|
+
get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
|
|
270
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
271
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
|
|
272
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
|
|
252
273
|
* get_datatype_size(row["OUTPUT_0_DATATYPE"])
|
|
253
274
|
)
|
|
254
275
|
|
|
@@ -268,8 +289,8 @@ def analyze_matmul(row):
|
|
|
268
289
|
|
|
269
290
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
270
291
|
|
|
271
|
-
M, K, N = get_value_physical_logical(row["
|
|
272
|
-
W, Z = get_value_physical_logical(row["
|
|
292
|
+
M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
293
|
+
W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
273
294
|
|
|
274
295
|
flops = (M * K * N * W * Z * 2) / duration_s
|
|
275
296
|
|
|
@@ -327,7 +348,7 @@ def analyze_halo(row):
|
|
|
327
348
|
|
|
328
349
|
return config
|
|
329
350
|
|
|
330
|
-
def analyze_conv(row):
|
|
351
|
+
def analyze_conv(row, csv_format="v2"):
|
|
331
352
|
duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
|
|
332
353
|
|
|
333
354
|
core_count = 64 # we decided to normalize to the max core count
|
|
@@ -338,10 +359,10 @@ def analyze_conv(row):
|
|
|
338
359
|
|
|
339
360
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
340
361
|
|
|
341
|
-
NHW = get_value_physical_logical(row["
|
|
342
|
-
CH_IN = get_value_physical_logical(row["
|
|
362
|
+
NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
363
|
+
CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
343
364
|
W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
|
|
344
|
-
CH_OUT = get_value_physical_logical(row["
|
|
365
|
+
CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
345
366
|
|
|
346
367
|
M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
|
|
347
368
|
flops = (M * K * N * 2) / duration_s
|
|
@@ -387,7 +408,7 @@ def analyze_conv(row):
|
|
|
387
408
|
config,
|
|
388
409
|
)
|
|
389
410
|
|
|
390
|
-
def analyze_op(row, prev_row):
|
|
411
|
+
def analyze_op(row, prev_row, csv_format="v2"):
|
|
391
412
|
op_code = Cell(row["OP CODE"])
|
|
392
413
|
cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
|
|
393
414
|
device_time = Cell(
|
|
@@ -440,7 +461,7 @@ def analyze_op(row, prev_row):
|
|
|
440
461
|
math_fidelity,
|
|
441
462
|
is_dram_sharded,
|
|
442
463
|
adjusted_core_count, # Get the potentially adjusted core count
|
|
443
|
-
) = analyze_matmul(row)
|
|
464
|
+
) = analyze_matmul(row, csv_format)
|
|
444
465
|
op_code = Cell(f"{op_code.raw_value} {size}")
|
|
445
466
|
dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
|
|
446
467
|
dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
|
|
@@ -461,7 +482,7 @@ def analyze_op(row, prev_row):
|
|
|
461
482
|
memory_info,
|
|
462
483
|
math_fidelity,
|
|
463
484
|
config,
|
|
464
|
-
) = analyze_conv(row)
|
|
485
|
+
) = analyze_conv(row, csv_format)
|
|
465
486
|
op_code = Cell(f"{op_code.raw_value} {size} {config}")
|
|
466
487
|
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
467
488
|
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
@@ -1053,6 +1074,12 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1053
1074
|
raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
|
|
1054
1075
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
1055
1076
|
|
|
1077
|
+
# Detect CSV format version
|
|
1078
|
+
csv_format = detect_csv_format(df)
|
|
1079
|
+
|
|
1080
|
+
if csv_format != "v2":
|
|
1081
|
+
print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
|
|
1082
|
+
|
|
1056
1083
|
# Add a column for original row numbers
|
|
1057
1084
|
df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
|
|
1058
1085
|
|
|
@@ -1076,7 +1103,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1076
1103
|
device_ops = 0
|
|
1077
1104
|
host_ops = 0
|
|
1078
1105
|
for _, row in df.iterrows():
|
|
1079
|
-
op_data, current_gap = analyze_op(row, prev_row)
|
|
1106
|
+
op_data, current_gap = analyze_op(row, prev_row, csv_format)
|
|
1080
1107
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
1081
1108
|
op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
|
|
1082
1109
|
if raw_op_codes:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.1.0 → tt_perf_report-1.1.1}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|