tt-perf-report 1.1.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tt-perf-report might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tt-perf-report
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tt-perf-report"
7
- version = "1.1.0"
7
+ version = "1.1.1"
8
8
  description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
9
9
  license = {file = "LICENSE"}
10
10
  readme = "README.md"
@@ -16,8 +16,14 @@ import pandas as pd
16
16
  # Global variable to store color preference
17
17
  color_output = None # None means auto-detect, True forces color, False forces no color
18
18
 
19
- def get_value_physical_logical(input : str, is_physical : bool = True):
20
- if "[" in input and "]" in input:
19
+
20
+ def get_value_physical_logical(input, is_physical : bool = True):
21
+ # Handle numeric inputs (old format)
22
+ if isinstance(input, (int, float)):
23
+ return int(input)
24
+
25
+ # Handle string inputs (new format)
26
+ if isinstance(input, str) and "[" in input and "]" in input:
21
27
  physical_part = input.split("[")[0]
22
28
  logical_part = input.split("[")[1].split("]")[0]
23
29
 
@@ -26,9 +32,24 @@ def get_value_physical_logical(input : str, is_physical : bool = True):
26
32
  else:
27
33
  return int(logical_part)
28
34
  else:
29
- # back compatible
35
+ # backwards compatibility - convert string to int
30
36
  return int(input)
31
37
 
38
+
39
+ def detect_csv_format(df):
40
+ """Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
41
+ v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
42
+ return "v2" if v2_columns else "v1"
43
+
44
+
45
+ def get_column_name(base_name, csv_format):
46
+ """Get the appropriate column name based on CSV format version"""
47
+ if csv_format == "v2":
48
+ return f"{base_name}_PAD[LOGICAL]"
49
+ else:
50
+ return base_name
51
+
52
+
32
53
  def set_color_output(force_color, force_no_color):
33
54
  global color_output
34
55
  if force_no_color:
@@ -220,35 +241,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
220
241
  )
221
242
 
222
243
 
223
- def analyze_matmul(row):
244
+ def analyze_matmul(row, csv_format="v2"):
224
245
  input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
225
246
  input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
226
247
 
227
248
  total_data_size_bytes = 0
228
249
  if input_0_from_dram:
229
250
  total_data_size_bytes += (
230
- get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"])
231
- * get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"])
232
- * get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
233
- * get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
251
+ get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
252
+ * get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
253
+ * get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
254
+ * get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
234
255
  * get_datatype_size(row["INPUT_0_DATATYPE"])
235
256
  )
236
257
  if input_1_from_dram:
237
258
  total_data_size_bytes += (
238
- get_value_physical_logical(row["INPUT_1_W_PAD[LOGICAL]"])
239
- * get_value_physical_logical(row["INPUT_1_Y_PAD[LOGICAL]"])
240
- * get_value_physical_logical(row["INPUT_1_Z_PAD[LOGICAL]"])
241
- * get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
259
+ get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
260
+ * get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
261
+ * get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
262
+ * get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
242
263
  * get_datatype_size(row["INPUT_1_DATATYPE"])
243
264
  )
244
265
 
245
266
  # Always include output if it's written to DRAM
246
267
  if "DRAM" in row["OUTPUT_0_MEMORY"]:
247
268
  total_data_size_bytes += (
248
- get_value_physical_logical(row["OUTPUT_0_W_PAD[LOGICAL]"])
249
- * get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
250
- * get_value_physical_logical(row["OUTPUT_0_Z_PAD[LOGICAL]"])
251
- * get_value_physical_logical(row["OUTPUT_0_X_PAD[LOGICAL]"])
269
+ get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
270
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
271
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
272
+ * get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
252
273
  * get_datatype_size(row["OUTPUT_0_DATATYPE"])
253
274
  )
254
275
 
@@ -268,8 +289,8 @@ def analyze_matmul(row):
268
289
 
269
290
  peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
270
291
 
271
- M, K, N = get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
272
- W, Z = get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
292
+ M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
293
+ W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
273
294
 
274
295
  flops = (M * K * N * W * Z * 2) / duration_s
275
296
 
@@ -327,7 +348,7 @@ def analyze_halo(row):
327
348
 
328
349
  return config
329
350
 
330
- def analyze_conv(row):
351
+ def analyze_conv(row, csv_format="v2"):
331
352
  duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
332
353
 
333
354
  core_count = 64 # we decided to normalize to the max core count
@@ -338,10 +359,10 @@ def analyze_conv(row):
338
359
 
339
360
  peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
340
361
 
341
- NHW = get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
342
- CH_IN = get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
362
+ NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
363
+ CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
343
364
  W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
344
- CH_OUT = get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
365
+ CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
345
366
 
346
367
  M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
347
368
  flops = (M * K * N * 2) / duration_s
@@ -387,7 +408,7 @@ def analyze_conv(row):
387
408
  config,
388
409
  )
389
410
 
390
- def analyze_op(row, prev_row):
411
+ def analyze_op(row, prev_row, csv_format="v2"):
391
412
  op_code = Cell(row["OP CODE"])
392
413
  cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
393
414
  device_time = Cell(
@@ -440,7 +461,7 @@ def analyze_op(row, prev_row):
440
461
  math_fidelity,
441
462
  is_dram_sharded,
442
463
  adjusted_core_count, # Get the potentially adjusted core count
443
- ) = analyze_matmul(row)
464
+ ) = analyze_matmul(row, csv_format)
444
465
  op_code = Cell(f"{op_code.raw_value} {size}")
445
466
  dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
446
467
  dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
@@ -461,7 +482,7 @@ def analyze_op(row, prev_row):
461
482
  memory_info,
462
483
  math_fidelity,
463
484
  config,
464
- ) = analyze_conv(row)
485
+ ) = analyze_conv(row, csv_format)
465
486
  op_code = Cell(f"{op_code.raw_value} {size} {config}")
466
487
  dram_speed = Cell(None, unit="GB/s", decimals=0)
467
488
  dram_percentage = Cell(None, unit="%", decimals=1)
@@ -1053,6 +1074,12 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1053
1074
  raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
1054
1075
  df = pd.read_csv(csv_file, low_memory=False)
1055
1076
 
1077
+ # Detect CSV format version
1078
+ csv_format = detect_csv_format(df)
1079
+
1080
+ if csv_format != "v2":
1081
+ print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
1082
+
1056
1083
  # Add a column for original row numbers
1057
1084
  df["ORIGINAL_ROW"] = df.index + 2 # +2 to match Excel row numbers (1-based + header)
1058
1085
 
@@ -1076,7 +1103,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
1076
1103
  device_ops = 0
1077
1104
  host_ops = 0
1078
1105
  for _, row in df.iterrows():
1079
- op_data, current_gap = analyze_op(row, prev_row)
1106
+ op_data, current_gap = analyze_op(row, prev_row, csv_format)
1080
1107
  op_data["ID"] = Cell(row["ORIGINAL_ROW"]) # Use the original row number
1081
1108
  op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
1082
1109
  if raw_op_codes:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tt-perf-report
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
File without changes
File without changes
File without changes