tt-perf-report 1.1.0__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- {tt_perf_report-1.1.0/src/tt_perf_report.egg-info → tt_perf_report-1.1.2}/PKG-INFO +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/pyproject.toml +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report/perf_report.py +85 -32
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2/src/tt_perf_report.egg-info}/PKG-INFO +1 -1
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/LICENSE +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/LICENSE_understanding.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/README.md +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/setup.cfg +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/SOURCES.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.2"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -16,8 +16,14 @@ import pandas as pd
|
|
|
16
16
|
# Global variable to store color preference
|
|
17
17
|
color_output = None # None means auto-detect, True forces color, False forces no color
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
|
|
20
|
+
def get_value_physical_logical(input, is_physical : bool = True):
|
|
21
|
+
# Handle numeric inputs (old format)
|
|
22
|
+
if isinstance(input, (int, float)):
|
|
23
|
+
return int(input)
|
|
24
|
+
|
|
25
|
+
# Handle string inputs (new format)
|
|
26
|
+
if isinstance(input, str) and "[" in input and "]" in input:
|
|
21
27
|
physical_part = input.split("[")[0]
|
|
22
28
|
logical_part = input.split("[")[1].split("]")[0]
|
|
23
29
|
|
|
@@ -26,9 +32,24 @@ def get_value_physical_logical(input : str, is_physical : bool = True):
|
|
|
26
32
|
else:
|
|
27
33
|
return int(logical_part)
|
|
28
34
|
else:
|
|
29
|
-
#
|
|
35
|
+
# backwards compatibility - convert string to int
|
|
30
36
|
return int(input)
|
|
31
37
|
|
|
38
|
+
|
|
39
|
+
def detect_csv_format(df):
|
|
40
|
+
"""Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
|
|
41
|
+
v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
|
|
42
|
+
return "v2" if v2_columns else "v1"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_column_name(base_name, csv_format):
|
|
46
|
+
"""Get the appropriate column name based on CSV format version"""
|
|
47
|
+
if csv_format == "v2":
|
|
48
|
+
return f"{base_name}_PAD[LOGICAL]"
|
|
49
|
+
else:
|
|
50
|
+
return base_name
|
|
51
|
+
|
|
52
|
+
|
|
32
53
|
def set_color_output(force_color, force_no_color):
|
|
33
54
|
global color_output
|
|
34
55
|
if force_no_color:
|
|
@@ -147,11 +168,32 @@ def pad_string(string, length, align="left"):
|
|
|
147
168
|
return padding + string if align == "right" else string + padding
|
|
148
169
|
|
|
149
170
|
|
|
150
|
-
def evaluate_fidelity(
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
171
|
+
def evaluate_fidelity(
|
|
172
|
+
input_0_datatype, input_1_datatype, output_datatype, math_fidelity
|
|
173
|
+
):
|
|
174
|
+
integer_types = {"UINT8", "UINT16", "INT32", "UINT32"}
|
|
175
|
+
|
|
176
|
+
if (
|
|
177
|
+
input_0_datatype in integer_types
|
|
178
|
+
or input_1_datatype in integer_types
|
|
179
|
+
or output_datatype in integer_types
|
|
180
|
+
):
|
|
181
|
+
return (
|
|
182
|
+
"not_applicable",
|
|
183
|
+
"Fidelity evaluation is not applicable for integer datatypes (UINT8, UINT16, INT32, UINT32).",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
mantissa_bits = {"FLOAT32": 23, "BFLOAT16": 8, "BFLOAT8_B": 7, "BFLOAT4_B": 3}
|
|
187
|
+
try:
|
|
188
|
+
in0_bits = mantissa_bits[input_0_datatype] # activations -> srcB (7 bits)
|
|
189
|
+
in1_bits = mantissa_bits[input_1_datatype] # weights -> srcA (5 bits)
|
|
190
|
+
out_bits = mantissa_bits[output_datatype]
|
|
191
|
+
except KeyError as e:
|
|
192
|
+
return (
|
|
193
|
+
"unknown",
|
|
194
|
+
f"Datatype {e.args[0]} is not supported for fidelity evaluation.",
|
|
195
|
+
)
|
|
196
|
+
|
|
155
197
|
if in0_bits == 8 and out_bits >= 7:
|
|
156
198
|
if math_fidelity == "HiFi4":
|
|
157
199
|
return (
|
|
@@ -220,35 +262,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
|
|
|
220
262
|
)
|
|
221
263
|
|
|
222
264
|
|
|
223
|
-
def analyze_matmul(row):
|
|
265
|
+
def analyze_matmul(row, csv_format="v2"):
|
|
224
266
|
input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
|
|
225
267
|
input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
|
|
226
268
|
|
|
227
269
|
total_data_size_bytes = 0
|
|
228
270
|
if input_0_from_dram:
|
|
229
271
|
total_data_size_bytes += (
|
|
230
|
-
get_value_physical_logical(row["
|
|
231
|
-
* get_value_physical_logical(row["
|
|
232
|
-
* get_value_physical_logical(row["
|
|
233
|
-
* get_value_physical_logical(row["
|
|
272
|
+
get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
|
|
273
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
|
|
274
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
275
|
+
* get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
234
276
|
* get_datatype_size(row["INPUT_0_DATATYPE"])
|
|
235
277
|
)
|
|
236
278
|
if input_1_from_dram:
|
|
237
279
|
total_data_size_bytes += (
|
|
238
|
-
get_value_physical_logical(row["
|
|
239
|
-
* get_value_physical_logical(row["
|
|
240
|
-
* get_value_physical_logical(row["
|
|
241
|
-
* get_value_physical_logical(row["
|
|
280
|
+
get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
|
|
281
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
|
|
282
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
|
|
283
|
+
* get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
242
284
|
* get_datatype_size(row["INPUT_1_DATATYPE"])
|
|
243
285
|
)
|
|
244
286
|
|
|
245
287
|
# Always include output if it's written to DRAM
|
|
246
288
|
if "DRAM" in row["OUTPUT_0_MEMORY"]:
|
|
247
289
|
total_data_size_bytes += (
|
|
248
|
-
get_value_physical_logical(row["
|
|
249
|
-
* get_value_physical_logical(row["
|
|
250
|
-
* get_value_physical_logical(row["
|
|
251
|
-
* get_value_physical_logical(row["
|
|
290
|
+
get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
|
|
291
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
292
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
|
|
293
|
+
* get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
|
|
252
294
|
* get_datatype_size(row["OUTPUT_0_DATATYPE"])
|
|
253
295
|
)
|
|
254
296
|
|
|
@@ -268,8 +310,8 @@ def analyze_matmul(row):
|
|
|
268
310
|
|
|
269
311
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
270
312
|
|
|
271
|
-
M, K, N = get_value_physical_logical(row["
|
|
272
|
-
W, Z = get_value_physical_logical(row["
|
|
313
|
+
M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
314
|
+
W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
|
|
273
315
|
|
|
274
316
|
flops = (M * K * N * W * Z * 2) / duration_s
|
|
275
317
|
|
|
@@ -327,7 +369,7 @@ def analyze_halo(row):
|
|
|
327
369
|
|
|
328
370
|
return config
|
|
329
371
|
|
|
330
|
-
def analyze_conv(row):
|
|
372
|
+
def analyze_conv(row, csv_format="v2"):
|
|
331
373
|
duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
|
|
332
374
|
|
|
333
375
|
core_count = 64 # we decided to normalize to the max core count
|
|
@@ -338,10 +380,10 @@ def analyze_conv(row):
|
|
|
338
380
|
|
|
339
381
|
peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
|
|
340
382
|
|
|
341
|
-
NHW = get_value_physical_logical(row["
|
|
342
|
-
CH_IN = get_value_physical_logical(row["
|
|
383
|
+
NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
|
|
384
|
+
CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
|
|
343
385
|
W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
|
|
344
|
-
CH_OUT = get_value_physical_logical(row["
|
|
386
|
+
CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
|
|
345
387
|
|
|
346
388
|
M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
|
|
347
389
|
flops = (M * K * N * 2) / duration_s
|
|
@@ -387,7 +429,7 @@ def analyze_conv(row):
|
|
|
387
429
|
config,
|
|
388
430
|
)
|
|
389
431
|
|
|
390
|
-
def analyze_op(row, prev_row):
|
|
432
|
+
def analyze_op(row, prev_row, csv_format="v2"):
|
|
391
433
|
op_code = Cell(row["OP CODE"])
|
|
392
434
|
cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
|
|
393
435
|
device_time = Cell(
|
|
@@ -414,7 +456,12 @@ def analyze_op(row, prev_row):
|
|
|
414
456
|
output_datatype_cell = Cell(output_datatype)
|
|
415
457
|
input_0_datatype_cell = Cell(input_0_datatype)
|
|
416
458
|
input_1_datatype_cell = Cell(input_1_datatype)
|
|
417
|
-
short_name = lambda n: {
|
|
459
|
+
short_name = lambda n: {
|
|
460
|
+
"FLOAT32": "FP32",
|
|
461
|
+
"BFLOAT16": "BF16",
|
|
462
|
+
"BFLOAT8_B": "BFP8",
|
|
463
|
+
"BFLOAT4_B": "BFP4",
|
|
464
|
+
}.get(n, n)
|
|
418
465
|
|
|
419
466
|
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
420
467
|
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
@@ -440,7 +487,7 @@ def analyze_op(row, prev_row):
|
|
|
440
487
|
math_fidelity,
|
|
441
488
|
is_dram_sharded,
|
|
442
489
|
adjusted_core_count, # Get the potentially adjusted core count
|
|
443
|
-
) = analyze_matmul(row)
|
|
490
|
+
) = analyze_matmul(row, csv_format)
|
|
444
491
|
op_code = Cell(f"{op_code.raw_value} {size}")
|
|
445
492
|
dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
|
|
446
493
|
dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
|
|
@@ -461,7 +508,7 @@ def analyze_op(row, prev_row):
|
|
|
461
508
|
memory_info,
|
|
462
509
|
math_fidelity,
|
|
463
510
|
config,
|
|
464
|
-
) = analyze_conv(row)
|
|
511
|
+
) = analyze_conv(row, csv_format)
|
|
465
512
|
op_code = Cell(f"{op_code.raw_value} {size} {config}")
|
|
466
513
|
dram_speed = Cell(None, unit="GB/s", decimals=0)
|
|
467
514
|
dram_percentage = Cell(None, unit="%", decimals=1)
|
|
@@ -1053,6 +1100,12 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1053
1100
|
raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
|
|
1054
1101
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
1055
1102
|
|
|
1103
|
+
# Detect CSV format version
|
|
1104
|
+
csv_format = detect_csv_format(df)
|
|
1105
|
+
|
|
1106
|
+
if csv_format != "v2":
|
|
1107
|
+
print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
|
|
1108
|
+
|
|
1056
1109
|
# Add a column for original row numbers
|
|
1057
1110
|
df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
|
|
1058
1111
|
|
|
@@ -1076,7 +1129,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
|
1076
1129
|
device_ops = 0
|
|
1077
1130
|
host_ops = 0
|
|
1078
1131
|
for _, row in df.iterrows():
|
|
1079
|
-
op_data, current_gap = analyze_op(row, prev_row)
|
|
1132
|
+
op_data, current_gap = analyze_op(row, prev_row, csv_format)
|
|
1080
1133
|
op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
|
|
1081
1134
|
op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
|
|
1082
1135
|
if raw_op_codes:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|