PyPI - tt-perf-report - Versions diffs - 1.1.0__tar.gz → 1.1.2__tar.gz - Mend

tt-perf-report 1.1.0tar.gz → 1.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tt-perf-report might be problematic. Click here for more details.

Files changed (14) hide show

{tt_perf_report-1.1.0/src/tt_perf_report.egg-info → tt_perf_report-1.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tt-perf-report
-Version: 1.1.0
+Version: 1.1.2
 Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
 License:                                  Apache License
                                    Version 2.0, January 2004

{tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tt-perf-report"
-version = "1.1.0"
+version = "1.1.2"
 description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
 license = {file = "LICENSE"}
 readme = "README.md"

{tt_perf_report-1.1.0 → tt_perf_report-1.1.2}/src/tt_perf_report/perf_report.py RENAMED Viewed

@@ -16,8 +16,14 @@ import pandas as pd
 # Global variable to store color preference
 color_output = None  # None means auto-detect, True forces color, False forces no color
-def get_value_physical_logical(input : str, is_physical : bool = True):
-    if "[" in input and "]" in input:
+def get_value_physical_logical(input, is_physical : bool = True):
+    # Handle numeric inputs (old format)
+    if isinstance(input, (int, float)):
+        return int(input)
+    # Handle string inputs (new format)
+    if isinstance(input, str) and "[" in input and "]" in input:
         physical_part = input.split("[")[0]
         logical_part = input.split("[")[1].split("]")[0]
@@ -26,9 +32,24 @@ def get_value_physical_logical(input : str, is_physical : bool = True):
         else:
             return int(logical_part)
     else:
-        # back compatible
+        # backwards compatibility - convert string to int
         return int(input)
+def detect_csv_format(df):
+    """Detect if CSV uses v1 (old) or v2 (new) format by checking for _PAD[LOGICAL] columns"""
+    v2_columns = [col for col in df.columns if "_PAD[LOGICAL]" in col]
+    return "v2" if v2_columns else "v1"
+def get_column_name(base_name, csv_format):
+    """Get the appropriate column name based on CSV format version"""
+    if csv_format == "v2":
+        return f"{base_name}_PAD[LOGICAL]"
+    else:
+        return base_name
 def set_color_output(force_color, force_no_color):
     global color_output
     if force_no_color:
@@ -147,11 +168,32 @@ def pad_string(string, length, align="left"):
     return padding + string if align == "right" else string + padding
-def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_fidelity):
-    mantissa_bits = {"BFLOAT16": 8, "BFLOAT8_B": 7, "BFLOAT4_B": 3}
-    in0_bits = mantissa_bits[input_0_datatype]  # activations -> srcB (7 bits)
-    in1_bits = mantissa_bits[input_1_datatype]  # weights -> srcA (5 bits)
-    out_bits = mantissa_bits[output_datatype]
+def evaluate_fidelity(
+    input_0_datatype, input_1_datatype, output_datatype, math_fidelity
+):
+    integer_types = {"UINT8", "UINT16", "INT32", "UINT32"}
+    if (
+        input_0_datatype in integer_types
+        or input_1_datatype in integer_types
+        or output_datatype in integer_types
+    ):
+        return (
+            "not_applicable",
+            "Fidelity evaluation is not applicable for integer datatypes (UINT8, UINT16, INT32, UINT32).",
+        )
+    mantissa_bits = {"FLOAT32": 23, "BFLOAT16": 8, "BFLOAT8_B": 7, "BFLOAT4_B": 3}
+    try:
+        in0_bits = mantissa_bits[input_0_datatype]  # activations -> srcB (7 bits)
+        in1_bits = mantissa_bits[input_1_datatype]  # weights -> srcA (5 bits)
+        out_bits = mantissa_bits[output_datatype]
+    except KeyError as e:
+        return (
+            "unknown",
+            f"Datatype {e.args[0]} is not supported for fidelity evaluation.",
+        )
     if in0_bits == 8 and out_bits >= 7:
         if math_fidelity == "HiFi4":
             return (
@@ -220,35 +262,35 @@ def evaluate_fidelity(input_0_datatype, input_1_datatype, output_datatype, math_
         )
-def analyze_matmul(row):
+def analyze_matmul(row, csv_format="v2"):
     input_0_from_dram = "DRAM" in row["INPUT_0_MEMORY"]
     input_1_from_dram = "DRAM" in row["INPUT_1_MEMORY"]
     total_data_size_bytes = 0
     if input_0_from_dram:
         total_data_size_bytes += (
-            get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
+            get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
             * get_datatype_size(row["INPUT_0_DATATYPE"])
         )
     if input_1_from_dram:
         total_data_size_bytes += (
-            get_value_physical_logical(row["INPUT_1_W_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_1_Y_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_1_Z_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
+            get_value_physical_logical(row[get_column_name("INPUT_1_W", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_1_Y", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_1_Z", csv_format)])
+            * get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
             * get_datatype_size(row["INPUT_1_DATATYPE"])
         )
     # Always include output if it's written to DRAM
     if "DRAM" in row["OUTPUT_0_MEMORY"]:
         total_data_size_bytes += (
-            get_value_physical_logical(row["OUTPUT_0_W_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["OUTPUT_0_Z_PAD[LOGICAL]"])
-            * get_value_physical_logical(row["OUTPUT_0_X_PAD[LOGICAL]"])
+            get_value_physical_logical(row[get_column_name("OUTPUT_0_W", csv_format)])
+            * get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
+            * get_value_physical_logical(row[get_column_name("OUTPUT_0_Z", csv_format)])
+            * get_value_physical_logical(row[get_column_name("OUTPUT_0_X", csv_format)])
             * get_datatype_size(row["OUTPUT_0_DATATYPE"])
         )
@@ -268,8 +310,8 @@ def analyze_matmul(row):
     peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
-    M, K, N = get_value_physical_logical(row["INPUT_0_Y_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
-    W, Z = get_value_physical_logical(row["INPUT_0_W_PAD[LOGICAL]"]), get_value_physical_logical(row["INPUT_0_Z_PAD[LOGICAL]"])
+    M, K, N = get_value_physical_logical(row[get_column_name("INPUT_0_Y", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
+    W, Z = get_value_physical_logical(row[get_column_name("INPUT_0_W", csv_format)]), get_value_physical_logical(row[get_column_name("INPUT_0_Z", csv_format)])
     flops = (M * K * N * W * Z * 2) / duration_s
@@ -327,7 +369,7 @@ def analyze_halo(row):
     return config
-def analyze_conv(row):
+def analyze_conv(row, csv_format="v2"):
     duration_s = row["DEVICE KERNEL DURATION [ns]"] * 1e-9
     core_count = 64 # we decided to normalize to the max core count
@@ -338,10 +380,10 @@ def analyze_conv(row):
     peak_flops_value = tflops_per_core(math_fidelity) * 1e12 * core_count
-    NHW = get_value_physical_logical(row["OUTPUT_0_Y_PAD[LOGICAL]"])
-    CH_IN = get_value_physical_logical(row["INPUT_0_X_PAD[LOGICAL]"])
+    NHW = get_value_physical_logical(row[get_column_name("OUTPUT_0_Y", csv_format)])
+    CH_IN = get_value_physical_logical(row[get_column_name("INPUT_0_X", csv_format)])
     W = [int(x) for x in (attributes.split("window_hw")[1].split("; ")[0][2:-1].split(";"))]
-    CH_OUT = get_value_physical_logical(row["INPUT_1_X_PAD[LOGICAL]"])
+    CH_OUT = get_value_physical_logical(row[get_column_name("INPUT_1_X", csv_format)])
     M, K, N = NHW, CH_IN * W[0] * W[1], CH_OUT
     flops = (M * K * N * 2) / duration_s
@@ -387,7 +429,7 @@ def analyze_conv(row):
         config,
     )
-def analyze_op(row, prev_row):
+def analyze_op(row, prev_row, csv_format="v2"):
     op_code = Cell(row["OP CODE"])
     cores = Cell(int(row["CORE COUNT"]) if pd.notna(row["CORE COUNT"]) else None)
     device_time = Cell(
@@ -414,7 +456,12 @@ def analyze_op(row, prev_row):
     output_datatype_cell = Cell(output_datatype)
     input_0_datatype_cell = Cell(input_0_datatype)
     input_1_datatype_cell = Cell(input_1_datatype)
-    short_name = lambda n: {"BFLOAT16": "BF16", "BFLOAT8_B": "BFP8", "BFLOAT4_B": "BFP4"}.get(n, n)
+    short_name = lambda n: {
+        "FLOAT32": "FP32",
+        "BFLOAT16": "BF16",
+        "BFLOAT8_B": "BFP8",
+        "BFLOAT4_B": "BFP4",
+    }.get(n, n)
     dram_speed = Cell(None, unit="GB/s", decimals=0)
     dram_percentage = Cell(None, unit="%", decimals=1)
@@ -440,7 +487,7 @@ def analyze_op(row, prev_row):
             math_fidelity,
             is_dram_sharded,
             adjusted_core_count,  # Get the potentially adjusted core count
-        ) = analyze_matmul(row)
+        ) = analyze_matmul(row, csv_format)
         op_code = Cell(f"{op_code.raw_value} {size}")
         dram_speed = Cell(dram_speed, unit="GB/s", decimals=0)
         dram_percentage = Cell(dram_percentage, unit="%", decimals=1)
@@ -461,7 +508,7 @@ def analyze_op(row, prev_row):
             memory_info,
             math_fidelity,
             config,
-        ) = analyze_conv(row)
+        ) = analyze_conv(row, csv_format)
         op_code = Cell(f"{op_code.raw_value} {size} {config}")
         dram_speed = Cell(None, unit="GB/s", decimals=0)
         dram_percentage = Cell(None, unit="%", decimals=1)
@@ -1053,6 +1100,12 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
                          raw_op_codes, no_host_ops, no_stacked_report, no_stack_by_in0, stacked_report_file):
     df = pd.read_csv(csv_file, low_memory=False)
+    # Detect CSV format version
+    csv_format = detect_csv_format(df)
+    if csv_format != "v2":
+        print(colored(f"Detected CSV format: v1 (legacy format)", "cyan"))
     # Add a column for original row numbers
     df["ORIGINAL_ROW"] = df.index + 2  # +2 to match Excel row numbers (1-based + header)
@@ -1076,7 +1129,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
     device_ops = 0
     host_ops = 0
     for _, row in df.iterrows():
-        op_data, current_gap = analyze_op(row, prev_row)
+        op_data, current_gap = analyze_op(row, prev_row, csv_format)
         op_data["ID"] = Cell(row["ORIGINAL_ROW"])  # Use the original row number
         op_data["Global Call Count"] = Cell(row["GLOBAL CALL COUNT"])
         if raw_op_codes:

{tt_perf_report-1.1.0 → tt_perf_report-1.1.2/src/tt_perf_report.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tt-perf-report
-Version: 1.1.0
+Version: 1.1.2
 Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
 License:                                  Apache License
                                    Version 2.0, January 2004