tt-perf-report 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tt-perf-report might be problematic. Click here for more details.
- tt_perf_report-1.0.6/LICENSE_understanding.txt +3 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/PKG-INFO +2 -1
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/pyproject.toml +1 -1
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report/perf_report.py +43 -6
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/PKG-INFO +2 -1
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/SOURCES.txt +1 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/LICENSE +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/README.md +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/setup.cfg +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report/__init__.py +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/dependency_links.txt +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/entry_points.txt +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/requires.txt +0 -0
- {tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -216,6 +216,7 @@ Project-URL: Repository, https://github.com/tenstorrent/tt-perf-report
|
|
|
216
216
|
Keywords: tenstorrent,tt-metal
|
|
217
217
|
Description-Content-Type: text/markdown
|
|
218
218
|
License-File: LICENSE
|
|
219
|
+
License-File: LICENSE_understanding.txt
|
|
219
220
|
Requires-Dist: pandas
|
|
220
221
|
|
|
221
222
|
# Performance Report Analysis Tool
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tt-perf-report"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.6"
|
|
8
8
|
description = "This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities."
|
|
9
9
|
license = {file = "LICENSE"}
|
|
10
10
|
readme = "README.md"
|
|
@@ -742,8 +742,28 @@ def merge_device_rows(df):
|
|
|
742
742
|
device_ids = sorted(block_by_device.keys())
|
|
743
743
|
merged_blocks = []
|
|
744
744
|
|
|
745
|
-
|
|
746
|
-
|
|
745
|
+
global_index = 0
|
|
746
|
+
while max(len(block_by_device[device_id]) for device_id in device_ids) > 0:
|
|
747
|
+
blocks = []
|
|
748
|
+
op_name = None
|
|
749
|
+
missing_devices = []
|
|
750
|
+
for device_id in device_ids:
|
|
751
|
+
if not len(block_by_device[device_id]):
|
|
752
|
+
print(colored(f"Warning: Device {device_id} is missing operation {op_name} at index {global_index}", "yellow"))
|
|
753
|
+
continue
|
|
754
|
+
if op_name is None:
|
|
755
|
+
op_name = block_by_device[device_id][0][0]
|
|
756
|
+
elif op_name != block_by_device[device_id][0][0]:
|
|
757
|
+
missing_devices.append(device_id)
|
|
758
|
+
continue
|
|
759
|
+
|
|
760
|
+
blocks.append(block_by_device[device_id].pop(0))
|
|
761
|
+
|
|
762
|
+
if missing_devices:
|
|
763
|
+
print(colored(f"Warning: {op_name} at index {global_index} not present in CSV for {len(missing_devices)} devices {missing_devices} - do not trust data for this op or directly subsequent ops with the same name", "yellow"))
|
|
764
|
+
|
|
765
|
+
if not blocks:
|
|
766
|
+
break
|
|
747
767
|
|
|
748
768
|
if "AllGather" in op_name or "ReduceScatter" in op_name:
|
|
749
769
|
# For collective ops, take the row with minimum duration
|
|
@@ -754,6 +774,8 @@ def merge_device_rows(df):
|
|
|
754
774
|
max_duration_block = max(blocks, key=lambda x: x[1]["DEVICE KERNEL DURATION [ns]"])
|
|
755
775
|
merged_blocks.append(max_duration_block[1])
|
|
756
776
|
|
|
777
|
+
global_index += 1
|
|
778
|
+
|
|
757
779
|
return pd.DataFrame(merged_blocks)
|
|
758
780
|
|
|
759
781
|
|
|
@@ -792,11 +814,15 @@ def filter_by_id_range(rows, id_range):
|
|
|
792
814
|
return rows
|
|
793
815
|
|
|
794
816
|
|
|
817
|
+
def filter_host_ops(rows):
|
|
818
|
+
return [row for row in rows if not is_host_op(row)]
|
|
819
|
+
|
|
820
|
+
|
|
795
821
|
def main():
|
|
796
822
|
args, id_range = parse_args()
|
|
797
823
|
generate_perf_report(
|
|
798
|
-
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice,
|
|
799
|
-
|
|
824
|
+
args.csv_file, args.signpost, args.ignore_signposts, args.min_percentage, id_range, args.csv, args.no_advice,
|
|
825
|
+
args.tracing_mode, args.raw_op_codes, args.no_host_ops)
|
|
800
826
|
|
|
801
827
|
|
|
802
828
|
def parse_args():
|
|
@@ -818,6 +844,8 @@ def parse_args():
|
|
|
818
844
|
parser.add_argument("--no-advice", action="store_true", help="Only show the table section of the report")
|
|
819
845
|
parser.add_argument("--tracing-mode", action="store_true", help="Do not sort when in tracing mode")
|
|
820
846
|
parser.add_argument("--raw-op-codes", action="store_true", help="Include raw op codes in output")
|
|
847
|
+
parser.add_argument("--no-host-ops", action="store_true", help="Do not include host ops in output")
|
|
848
|
+
|
|
821
849
|
args = parser.parse_args()
|
|
822
850
|
|
|
823
851
|
# Set the global color_output variable
|
|
@@ -833,7 +861,9 @@ def parse_args():
|
|
|
833
861
|
return args, id_range
|
|
834
862
|
|
|
835
863
|
|
|
836
|
-
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
864
|
+
def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage,
|
|
865
|
+
id_range, csv_output_file, no_advice, tracing_mode,
|
|
866
|
+
raw_op_codes, no_host_ops):
|
|
837
867
|
df = pd.read_csv(csv_file, low_memory=False)
|
|
838
868
|
|
|
839
869
|
# Add a column for original row numbers
|
|
@@ -867,7 +897,7 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
867
897
|
prev_row = row
|
|
868
898
|
|
|
869
899
|
# Count device and host ops
|
|
870
|
-
if
|
|
900
|
+
if is_host_op(op_data):
|
|
871
901
|
host_ops += 1
|
|
872
902
|
else:
|
|
873
903
|
device_ops += 1
|
|
@@ -878,6 +908,9 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
878
908
|
# Filter rows based on id_range
|
|
879
909
|
rows = filter_by_id_range(rows, id_range)
|
|
880
910
|
|
|
911
|
+
if no_host_ops:
|
|
912
|
+
rows = filter_host_ops(rows)
|
|
913
|
+
|
|
881
914
|
# Recalculate derived columns after filtering
|
|
882
915
|
add_derived_columns(rows)
|
|
883
916
|
|
|
@@ -933,5 +966,9 @@ def generate_perf_report(csv_file, signpost, ignore_signposts, min_percentage, i
|
|
|
933
966
|
print_advice_section(rows, visible_headers, col_widths)
|
|
934
967
|
|
|
935
968
|
|
|
969
|
+
def is_host_op(op_data):
|
|
970
|
+
return "(torch)" in op_data["OP Code"].raw_value
|
|
971
|
+
|
|
972
|
+
|
|
936
973
|
if __name__ == "__main__":
|
|
937
974
|
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: tt-perf-report
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: This tool analyzes performance traces from TT-Metal operations, providing insights into throughput, bottlenecks, and optimization opportunities.
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -216,6 +216,7 @@ Project-URL: Repository, https://github.com/tenstorrent/tt-perf-report
|
|
|
216
216
|
Keywords: tenstorrent,tt-metal
|
|
217
217
|
Description-Content-Type: text/markdown
|
|
218
218
|
License-File: LICENSE
|
|
219
|
+
License-File: LICENSE_understanding.txt
|
|
219
220
|
Requires-Dist: pandas
|
|
220
221
|
|
|
221
222
|
# Performance Report Analysis Tool
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tt_perf_report-1.0.4 → tt_perf_report-1.0.6}/src/tt_perf_report.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|