tt-perf-report 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -16,8 +16,14 @@ import pandas as pd
16
16
  # Global variable to store color preference
17
17
  color_output = None # None means auto-detect, True forces color, False forces no color
18
18
 
19
- def get_value_physical_logical(input : str, is_physical : bool = True):
20
- if "[" in input and "]" in input:
19
+
20
+ def get_value_physical_logical(input, is_physical : bool = True):
21
+ # Handle numeric inputs (old format)
22
+ if isinstance(input, (int, float)):
23
+ return int(input)
24
+
25
+ # Handle string inputs (new format)
26
+ if isinstance(input, str) and "[" in input and "]" in input:
21
27
  physical_part = input.split("[")[0]
22
28
  logical_part = input.split("[")[1].split("]")[0]
23
29
 
@@ -26,9 +32,24 @@ def get_value_physical_logical(input : str, is_physical : bool = True):
26
32
  else:
27
33
  return int(logical_part)
28
34
  else:
29
- # back compatible
35
+ # backwards compatibility - convert string to int
30
36
  return int(input)
31
37
 
38
+
39
+ def detect_csv_format(df):
40
+ """Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
41
+ v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
42
+ return "v2" if v2_columns else "v1"
43
+
44
+
45
+ def get_column_name(base_name, csv_format):
46
+ """Get the appropriate column name based on CSV format version"""
47
+ if csv_format == "v2":
48
+ return f"{base_name}_PAD[LOGICAL]"
49
+ else:
50
+ return base_name
51
+
52
+
32
53
  def set_color_output(force_color, force_no_color):
33
54
  global color_output
34
55
  if force_no_color:
@@ -147,11 +168,32 @@ def pad_string(string, length, align="left"):
147
168
  return padding + string if align == "right" else string + padding
148
169
 
149
170
 
150
- def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_fidelity):
151
- mantissa_bits = {"BFLOAT16": 8, "BFLOAT8_B": 7, "BFLOAT4_B": 3}
152
- in0_bits = mantissa_bits[input_0_datatype] # activations -> srcB (7 bits)
153
- in1_bits = mantissa_bits[input_1_datatype] # weights -> srcA (5 bits)
154
- out_bits = mantissa_bits[output_datatype]
171
+ def evaluate_fidelity(
172
+ input_0_datatype, input_1_datatype, output_datatype, math_fidelity
173
+ ):
174
+ integer_types = {"UINT8", "UINT16", "INT32", "UINT32"}
175
+
176
+ if (
177
+ input_0_datatype in integer_types
178
+ or input_1_datatype in integer_types
179
+ or output_datatype in integer_types
180
+ ):
181
+ return (
182
+ "not_applicable",
183
+ "Fidelity evaluation is not applicable for integer datatypes (UINT8, UINT16, INT32, UINT32).",
184
+ )
185
+
186
+ mantissa_bits = {"FLOAT32": 23, "BFLOAT16": 8, "BFLOAT8_B": 7, "BFLOAT4_B": 3}
187
+ try:
188
+ in0_bits = mantissa_bits[input_0_datatype] # activations -> srcB (7 bits)
189
+ in1_bits = mantissa_bits[input_1_datatype] # weights -> srcA (5 bits)
190
+ out_bits = mantissa_bits[output_datatype]
191
+ except KeyError as e:
192
+ return (
193
+ "unknown",
194
+ f"Datatype {e.args[0]} is not supported for fidelity evaluation.",
195
+ )
196
+
155
197
  if in0_bits == 8 and out_bits >= 7:
156
198
  if math_fidelity == "HiFi4":
157
199
  return (
@@ -220,35 +262,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
220
262
  )
221
263
 
222
264
 
223
- def analyze_matmul(row):
265
+ def analyze_matmul(row, csv_format="v2"):
224
266
  input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
225
267
  input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
226
268
 
227
269
  total_data_size_bytes = 0
228
270
  if input_0_from_dram:
229
271
  total_data_size_bytes += (
230
- get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"])
231
- * get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"])
232
- * get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
233
- * get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
272
+ get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
273
+ * get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
274
+ * get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
275
+ * get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
234
276
  * get_datatype_size(row["INPUT_0_DATATYPE"])
235
277
  )
236
278
  if input_1_from_dram:
237
279
  total_data_size_bytes += (
238
- get_value_physical_logical(row["INPUT_1_W_PAD[LOGICAL]"])
239
- * get_value_physical_logical(row["INPUT_1_Y_PAD[LOGICAL]"])
240
- * get_value_physical_logical(row["INPUT_1_Z_PAD[LOGICAL]"])
241
- * get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
280
+ get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
281
+ * get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
282
+ * get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
283
+ * get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
242
284
  * get_datatype_size(row["INPUT_1_DATATYPE"])
243
285
  )
244
286
 
245
287
  # Always include output if it's written to DRAM
246
288
  if "DRAM" in row["OUTPUT_0_MEMORY"]:
247
289
  total_data_size_bytes += (
248
- get_value_physical_logical(row["OUTPUT_0_W_PAD[LOGICAL]"])
249
- * get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
250
- * get_value_physical_logical(row["OUTPUT_0_Z_PAD[LOGICAL]"])
251
- * get_value_physical_logical(row["OUTPUT_0_X_PAD[LOGICAL]"])
290
+ get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
291
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
292
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
293
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
252
294
  * get_datatype_size(row["OUTPUT_0_DATATYPE"])
253
295
  )
254
296
 
@@ -268,8 +310,8 @@ def analyze_matmul(row):
268
310
 
269
311
  peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
270
312
 
271
- M, K, N = get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
272
- W, Z = get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
313
+ M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
314
+ W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
273
315
 
274
316
  flops = (M * K * N * W * Z * 2) / duration_s
275
317
 
@@ -327,7 +369,7 @@ def analyze_halo(row):
327
369
 
328
370
  return config
329
371
 
330
- def analyze_conv(row):
372
+ def analyze_conv(row, csv_format="v2"):
331
373
  duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
332
374
 
333
375
  core_count = 64 # we decided to normalize to the max core count
@@ -338,10 +380,10 @@ def analyze_conv(row):
338
380
 
339
381
  peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
340
382
 
341
- NHW = get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
342
- CH_IN = get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
383
+ NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
384
+ CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
343
385
  W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
344
- CH_OUT = get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
386
+ CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
345
387
 
346
388
  M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
347
389
  flops = (M * K * N * 2) / duration_s
@@ -387,7 +429,7 @@ def analyze_conv(row):
387
429
  config,
388
430
  )
389
431
 
390
- def analyze_op(row, prev_row):
432
+ def analyze_op(row, prev_row, csv_format="v2"):
391
433
  op_code = Cell(row["OP CODE"])
392
434
  cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
393
435
  device_time = Cell(
@@ -414,7 +456,12 @@ def analyze_op(row, prev_row):
414
456
  output_datatype_cell = Cell(output_datatype)
415
457
  input_0_datatype_cell = Cell(input_0_datatype)
416
458
  input_1_datatype_cell = Cell(input_1_datatype)
417
- short_name = lambda n: {"BFLOAT16": "BF16", "BFLOAT8_B": "BFP8", "BFLOAT4_B": "BFP4"}.get(n, n)
459
+ short_name = lambda n: {
460
+ "FLOAT32": "FP32",
461
+ "BFLOAT16": "BF16",
462
+ "BFLOAT8_B": "BFP8",
463
+ "BFLOAT4_B": "BFP4",
464
+ }.get(n, n)
418
465
 
419
466
  dram_speed = Cell(None, unit="GB/s", decimals=0)
420
467
  dram_percentage = Cell(None, unit="%", decimals=1)
@@ -440,7 +487,7 @@ def analyze_op(row, prev_row):
440
487
  math_fidelity,
441
488
  is_dram_sharded,
442
489
  adjusted_core_count, # Get the potentially adjusted core count
443
- ) = analyze_matmul(row)
490
+ ) = analyze_matmul(row, csv_format)
444
491
  op_code = Cell(f"{op_code.raw_value} {size}")
445
492
  dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
446
493
  dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
@@ -461,7 +508,7 @@ def analyze_op(row, prev_row):
461
508
  memory_info,
462
509
  math_fidelity,
463
510
  config,
464
- ) = analyze_conv(row)
511
+ ) = analyze_conv(row, csv_format)
465
512
  op_code = Cell(f"{op_code.raw_value} {size} {config}")
466
513
  dram_speed = Cell(None, unit="GB/s", decimals=0)
467
514
  dram_percentage = Cell(None, unit="%", decimals=1)
@@ -1053,6 +1100,12 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1053
1100
  raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
1054
1101
  df = pd.read_csv(csv_file, low_memory=False)
1055
1102
 
1103
+ # Detect CSV format version
1104
+ csv_format = detect_csv_format(df)
1105
+
1106
+ if csv_format != "v2":
1107
+ print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
1108
+
1056
1109
  # Add a column for original row numbers
1057
1110
  df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
1058
1111
 
@@ -1076,7 +1129,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1076
1129
  device_ops = 0
1077
1130
  host_ops = 0
1078
1131
  for _, row in df.iterrows():
1079
- op_data, current_gap = analyze_op(row, prev_row)
1132
+ op_data, current_gap = analyze_op(row, prev_row, csv_format)
1080
1133
  op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
1081
1134
  op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
1082
1135
  if raw_op_codes:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tt-perf-report
3
- Version: 1.1.0
3
+ Version: 1.1.2
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -0,0 +1,9 @@
1
+ tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
+ tt_perf_report/perf_report.py,sha256=jR4qQXbp_EmMPzVLFPCMNdAkQIID86cQWDFi9aaJ7N0,49530
3
+ tt_perf_report-1.1.2.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
+ tt_perf_report-1.1.2.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
5
+ tt_perf_report-1.1.2.dist-info/METADATA,sha256=YDhjas8bQEK4in2sWiDMA0K0heB70TxhMwqqKTeHbOk,18393
6
+ tt_perf_report-1.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ tt_perf_report-1.1.2.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
8
+ tt_perf_report-1.1.2.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
9
+ tt_perf_report-1.1.2.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- tt_perf_report/__init__.py,sha256=-j4iFYebIwgdS8uphk8-M6zasRqGBL3CQGnJH9keRuI,92
2
- tt_perf_report/perf_report.py,sha256=UpXTZHtdbtAtCD8KhI-vUqyWEm-eZu_CTf9swxXpeX4,47661
3
- tt_perf_report-1.1.0.dist-info/licenses/LICENSE,sha256=6dZGjPECz_ULS-sf40FLlt6OmQFcrRvmzG5mJRZCQ5I,11825
4
- tt_perf_report-1.1.0.dist-info/licenses/LICENSE_understanding.txt,sha256=pymi-yb_RvYM9p2ZA4iSNsImcvhDBBxlGuJCY9dTq7M,233
5
- tt_perf_report-1.1.0.dist-info/METADATA,sha256=y2HGZxt8XOk2p2p3MED2jUUWzfgljnp8A4lfOC20bDs,18393
6
- tt_perf_report-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- tt_perf_report-1.1.0.dist-info/entry_points.txt,sha256=ReAziglcjbAkPbklqheUISkfoEVI5ptlFrBAJTIk5dI,67
8
- tt_perf_report-1.1.0.dist-info/top_level.txt,sha256=mEQ-BK3rRbmz9QyWitTCLy2xwmC5rmJno_TY_H9s9CE,15
9
- tt_perf_report-1.1.0.dist-info/RECORD,,